Skip to content

Commit

Permalink
read high-rate winds interpolated to 1D times
Browse files Browse the repository at this point in the history
read_winds.py provides code to read high-rate winds (or any high-rate
variable) with sub-second time dimensions and interpolate them to 1D time
series.  Intended to work for variables with only a single time dimension, but
not tested yet.
  • Loading branch information
garyjg committed Jan 5, 2025
1 parent 4852ec4 commit 31dfac5
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 1 deletion.
104 changes: 104 additions & 0 deletions read_winds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""
Read wind vectors from ISFS netcdf files.
"""

import logging
import numpy as np
import xarray as xr


logger = logging.getLogger(__name__)


def rdatetime(when: np.datetime64, period: np.timedelta64) -> np.datetime64:
"Round when to the nearest multiple of period."
when_ns = when.astype('datetime64[ns]')
period_ns = period.astype('timedelta64[ns]').astype(int)
mod = when_ns.astype(int) % period_ns
# compare with zero since period_ns // 2 is zero when period_ns is 1
if mod < period_ns // 2 or mod == 0:
when_ns -= mod
else:
when_ns += period_ns - mod
return np.datetime64(when_ns, 'ns').astype(when.dtype)


class ISFSDataset:

def __init__(self):
self.dataset = None
self.timev = None
self.timed = None

def open(self, filename):
self.dataset = xr.open_dataset(filename)
self.timev = self.dataset['time']
self.timed = self.timev.dims[0]
logger.debug(f"Opened dataset: {filename}, %s...%s",
self.timev[0], self.timev[-1])
return self

def resample_dim_name(self, dsv: xr.DataArray):
"Derive the resampled time dimension name."
dims = dsv.dims
if len(dims) == 1:
return dims[0]
nsample = self.dataset.sizes[dims[1]]
dname = f"time{nsample}"
return dname

def interpolate_times(self, dsv: xr.DataArray) -> xr.DataArray:
"""
Make sure the dataset has a time coordinate for these dimemsions, then
return it.
"""
# find the unique name for the time coordinate to see if it exists
dname = self.resample_dim_name(dsv)
if dname in self.dataset.coords:
return self.dataset.coords[dname]
dims = dsv.dims
if len(dims) == 1:
return self.timev
ntime = self.dataset.sizes[dims[0]]
nsample = self.dataset.sizes[dims[1]]
period = np.timedelta64(1000000000 // nsample, 'ns')
logger.debug(f"calculating 1D time coordinate for {dims}: "
f"{nsample} samples, period {period} ...")
tra = np.ndarray((ntime, nsample), dtype='datetime64[ns]')
for i in range(ntime):
seconds = self.timev.values[i] - np.timedelta64(500, 'ms')
for j in range(nsample):
tra[i, j] = seconds + (j * period)
sampled = xr.DataArray(name=dname, data=tra, dims=dims)
self.dataset.coords[dname] = sampled
logger.debug(f"Interpolated time coordinate for {dims}: %s", sampled)
return sampled

def get_variable(self, variable):
"""
Return the named variable from the dataset as a 1D time series. For
variables with rates higher that 1 Hz, this means flattening the time
dimensions and interpolating the sub-second timestamps.
"""
dsv: xr.DataArray
dsv = self.dataset[variable]
self.interpolate_times(dsv)
return dsv

def reshape_variable(self, dsv: xr.DataArray):
"""
Given a time series with a sub-sample dimension, reshape it to a 1D
time series. If it is already 1D, return it unchanged.
"""
if len(dsv.dims) == 1:
return dsv
data = dsv.values.flatten()
dname = self.resample_dim_name(dsv)
dtimes = self.interpolate_times(dsv)
times = dtimes.values.flatten()
reshaped = xr.DataArray(name=dsv.name, data=data, coords={dname: times})
logger.debug("reshaped %s: %s", dsv.name, reshaped)
return reshaped

def close(self):
self.dataset.close()
Binary file added test_data/u_2m_t0_20230804_160000.nc
Binary file not shown.
2 changes: 1 addition & 1 deletion test_dump_hotfilm.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_parse_line():
hf = ReadHotfilm()
data = hf.parse_line(_scan)
assert data is not None
print(data)
logger.debug(data)
y = data['ch1']
x = data.index
y2 = data[data.columns[0]]
Expand Down
50 changes: 50 additions & 0 deletions test_read_winds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

from pytest import approx
import logging
import numpy as np
from read_winds import ISFSDataset, rdatetime


logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)


def test_rdatetime():
ns = np.timedelta64(1, 'ns')
us = np.timedelta64(1, 'us')
ms = np.timedelta64(1, 'ms')
s = np.timedelta64(1, 's')
testdata = [
('2023-08-04T16:00:00.016666667', ms, '2023-08-04T16:00:00.017'),
('2023-08-04T16:00:00.016666667', us, '2023-08-04T16:00:00.016667'),
('2023-08-04T16:00:00.016666667', ns, '2023-08-04T16:00:00.016666667'),
('2023-08-04T16:00:00.000000500', us, '2023-08-04T16:00:00.000001000'),
('2023-08-04T16:00:00.000000500', ms, '2023-08-04T16:00:00.000'),
('2023-08-04T16:00:00.501', s, '2023-08-04T16:00:01'),
('2023-08-04T16:00:00.499', s, '2023-08-04T16:00:00'),
]

for when, period, expected in testdata:
when = np.datetime64(when)
expected = np.datetime64(expected)
assert rdatetime(when, period) == expected


def test_read_winds():
ids = ISFSDataset().open("test_data/u_2m_t0_20230804_160000.nc")
ds = ids.dataset
u = ids.get_variable("u_2m_t0")
first = ds.time[0]
last = ds.time[-1]
u = ids.reshape_variable(u)
assert len(u.values) == 3600 * 60
assert u.values[0] == approx(0.06215948)
assert u.values[-1] == approx(-0.145788)
dtime = u.dims[0]
# the first half-second timestamp at 500 ms does not match exactly the
# 30th timestamp at 16666666 ns, but it should round to exactly 500 ms.
us = np.timedelta64(1, 'us')
assert rdatetime(u.coords[dtime][30].values, us) == first
xlast = last.values + 29 * np.timedelta64(16666666, 'ns')
assert rdatetime(u.coords[dtime][-1].values, us) == rdatetime(xlast, us)
ds.close()

0 comments on commit 31dfac5

Please sign in to comment.