-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
read high-rate winds interpolated to 1D times
read_winds.py provides code to read high-rate winds (or any high-rate variable) with sub-second time dimensions and interpolate them to 1D time series. Intended to work for variables with only a single time dimension, but not tested yet.
- Loading branch information
Showing
4 changed files
with
155 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
""" | ||
Read wind vectors from ISFS netcdf files. | ||
""" | ||
|
||
import logging | ||
import numpy as np | ||
import xarray as xr | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def rdatetime(when: np.datetime64, period: np.timedelta64) -> np.datetime64: | ||
"Round when to the nearest multiple of period." | ||
when_ns = when.astype('datetime64[ns]') | ||
period_ns = period.astype('timedelta64[ns]').astype(int) | ||
mod = when_ns.astype(int) % period_ns | ||
# compare with zero since period_ns // 2 is zero when period_ns is 1 | ||
if mod < period_ns // 2 or mod == 0: | ||
when_ns -= mod | ||
else: | ||
when_ns += period_ns - mod | ||
return np.datetime64(when_ns, 'ns').astype(when.dtype) | ||
|
||
|
||
class ISFSDataset: | ||
|
||
def __init__(self): | ||
self.dataset = None | ||
self.timev = None | ||
self.timed = None | ||
|
||
def open(self, filename): | ||
self.dataset = xr.open_dataset(filename) | ||
self.timev = self.dataset['time'] | ||
self.timed = self.timev.dims[0] | ||
logger.debug(f"Opened dataset: {filename}, %s...%s", | ||
self.timev[0], self.timev[-1]) | ||
return self | ||
|
||
def resample_dim_name(self, dsv: xr.DataArray): | ||
"Derive the resampled time dimension name." | ||
dims = dsv.dims | ||
if len(dims) == 1: | ||
return dims[0] | ||
nsample = self.dataset.sizes[dims[1]] | ||
dname = f"time{nsample}" | ||
return dname | ||
|
||
def interpolate_times(self, dsv: xr.DataArray) -> xr.DataArray: | ||
""" | ||
Make sure the dataset has a time coordinate for these dimemsions, then | ||
return it. | ||
""" | ||
# find the unique name for the time coordinate to see if it exists | ||
dname = self.resample_dim_name(dsv) | ||
if dname in self.dataset.coords: | ||
return self.dataset.coords[dname] | ||
dims = dsv.dims | ||
if len(dims) == 1: | ||
return self.timev | ||
ntime = self.dataset.sizes[dims[0]] | ||
nsample = self.dataset.sizes[dims[1]] | ||
period = np.timedelta64(1000000000 // nsample, 'ns') | ||
logger.debug(f"calculating 1D time coordinate for {dims}: " | ||
f"{nsample} samples, period {period} ...") | ||
tra = np.ndarray((ntime, nsample), dtype='datetime64[ns]') | ||
for i in range(ntime): | ||
seconds = self.timev.values[i] - np.timedelta64(500, 'ms') | ||
for j in range(nsample): | ||
tra[i, j] = seconds + (j * period) | ||
sampled = xr.DataArray(name=dname, data=tra, dims=dims) | ||
self.dataset.coords[dname] = sampled | ||
logger.debug(f"Interpolated time coordinate for {dims}: %s", sampled) | ||
return sampled | ||
|
||
def get_variable(self, variable): | ||
""" | ||
Return the named variable from the dataset as a 1D time series. For | ||
variables with rates higher that 1 Hz, this means flattening the time | ||
dimensions and interpolating the sub-second timestamps. | ||
""" | ||
dsv: xr.DataArray | ||
dsv = self.dataset[variable] | ||
self.interpolate_times(dsv) | ||
return dsv | ||
|
||
def reshape_variable(self, dsv: xr.DataArray): | ||
""" | ||
Given a time series with a sub-sample dimension, reshape it to a 1D | ||
time series. If it is already 1D, return it unchanged. | ||
""" | ||
if len(dsv.dims) == 1: | ||
return dsv | ||
data = dsv.values.flatten() | ||
dname = self.resample_dim_name(dsv) | ||
dtimes = self.interpolate_times(dsv) | ||
times = dtimes.values.flatten() | ||
reshaped = xr.DataArray(name=dsv.name, data=data, coords={dname: times}) | ||
logger.debug("reshaped %s: %s", dsv.name, reshaped) | ||
return reshaped | ||
|
||
def close(self): | ||
self.dataset.close() |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
|
||
from pytest import approx | ||
import logging | ||
import numpy as np | ||
from read_winds import ISFSDataset, rdatetime | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.DEBUG) | ||
|
||
|
||
def test_rdatetime(): | ||
ns = np.timedelta64(1, 'ns') | ||
us = np.timedelta64(1, 'us') | ||
ms = np.timedelta64(1, 'ms') | ||
s = np.timedelta64(1, 's') | ||
testdata = [ | ||
('2023-08-04T16:00:00.016666667', ms, '2023-08-04T16:00:00.017'), | ||
('2023-08-04T16:00:00.016666667', us, '2023-08-04T16:00:00.016667'), | ||
('2023-08-04T16:00:00.016666667', ns, '2023-08-04T16:00:00.016666667'), | ||
('2023-08-04T16:00:00.000000500', us, '2023-08-04T16:00:00.000001000'), | ||
('2023-08-04T16:00:00.000000500', ms, '2023-08-04T16:00:00.000'), | ||
('2023-08-04T16:00:00.501', s, '2023-08-04T16:00:01'), | ||
('2023-08-04T16:00:00.499', s, '2023-08-04T16:00:00'), | ||
] | ||
|
||
for when, period, expected in testdata: | ||
when = np.datetime64(when) | ||
expected = np.datetime64(expected) | ||
assert rdatetime(when, period) == expected | ||
|
||
|
||
def test_read_winds(): | ||
ids = ISFSDataset().open("test_data/u_2m_t0_20230804_160000.nc") | ||
ds = ids.dataset | ||
u = ids.get_variable("u_2m_t0") | ||
first = ds.time[0] | ||
last = ds.time[-1] | ||
u = ids.reshape_variable(u) | ||
assert len(u.values) == 3600 * 60 | ||
assert u.values[0] == approx(0.06215948) | ||
assert u.values[-1] == approx(-0.145788) | ||
dtime = u.dims[0] | ||
# the first half-second timestamp at 500 ms does not match exactly the | ||
# 30th timestamp at 16666666 ns, but it should round to exactly 500 ms. | ||
us = np.timedelta64(1, 'us') | ||
assert rdatetime(u.coords[dtime][30].values, us) == first | ||
xlast = last.values + 29 * np.timedelta64(16666666, 'ns') | ||
assert rdatetime(u.coords[dtime][-1].values, us) == rdatetime(xlast, us) | ||
ds.close() |