read high-rate winds interpolated to 1D times

read_winds.py provides code to read high-rate winds (or any high-rate variable) with sub-second time dimensions and interpolate them to 1D time series. Intended to work for variables with only a single time dimension, but not tested yet.
NCAR · Jan 5, 2025 · 31dfac5 · 31dfac5
1 parent 4852ec4
commit 31dfac5
Show file tree

Hide file tree

Showing 4 changed files with 155 additions and 1 deletion.
diff --git a/read_winds.py b/read_winds.py
@@ -0,0 +1,104 @@
+"""
+Read wind vectors from ISFS netcdf files.
+"""
+
+import logging
+import numpy as np
+import xarray as xr
+
+
+logger = logging.getLogger(__name__)
+
+
+def rdatetime(when: np.datetime64, period: np.timedelta64) -> np.datetime64:
+    "Round when to the nearest multiple of period."
+    when_ns = when.astype('datetime64[ns]')
+    period_ns = period.astype('timedelta64[ns]').astype(int)
+    mod = when_ns.astype(int) % period_ns
+    # compare with zero since period_ns // 2 is zero when period_ns is 1
+    if mod < period_ns // 2 or mod == 0:
+        when_ns -= mod
+    else:
+        when_ns += period_ns - mod
+    return np.datetime64(when_ns, 'ns').astype(when.dtype)
+
+
+class ISFSDataset:
+
+    def __init__(self):
+        self.dataset = None
+        self.timev = None
+        self.timed = None
+
+    def open(self, filename):
+        self.dataset = xr.open_dataset(filename)
+        self.timev = self.dataset['time']
+        self.timed = self.timev.dims[0]
+        logger.debug(f"Opened dataset: {filename}, %s...%s",
+                     self.timev[0], self.timev[-1])
+        return self
+
+    def resample_dim_name(self, dsv: xr.DataArray):
+        "Derive the resampled time dimension name."
+        dims = dsv.dims
+        if len(dims) == 1:
+            return dims[0]
+        nsample = self.dataset.sizes[dims[1]]
+        dname = f"time{nsample}"
+        return dname
+
+    def interpolate_times(self, dsv: xr.DataArray) -> xr.DataArray:
+        """
+        Make sure the dataset has a time coordinate for these dimemsions, then
+        return it.
+        """
+        # find the unique name for the time coordinate to see if it exists
+        dname = self.resample_dim_name(dsv)
+        if dname in self.dataset.coords:
+            return self.dataset.coords[dname]
+        dims = dsv.dims
+        if len(dims) == 1:
+            return self.timev
+        ntime = self.dataset.sizes[dims[0]]
+        nsample = self.dataset.sizes[dims[1]]
+        period = np.timedelta64(1000000000 // nsample, 'ns')
+        logger.debug(f"calculating 1D time coordinate for {dims}: "
+                     f"{nsample} samples, period {period} ...")
+        tra = np.ndarray((ntime, nsample), dtype='datetime64[ns]')
+        for i in range(ntime):
+            seconds = self.timev.values[i] - np.timedelta64(500, 'ms')
+            for j in range(nsample):
+                tra[i, j] = seconds + (j * period)
+        sampled = xr.DataArray(name=dname, data=tra, dims=dims)
+        self.dataset.coords[dname] = sampled
+        logger.debug(f"Interpolated time coordinate for {dims}: %s", sampled)
+        return sampled
+
+    def get_variable(self, variable):
+        """
+        Return the named variable from the dataset as a 1D time series.  For
+        variables with rates higher that 1 Hz, this means flattening the time
+        dimensions and interpolating the sub-second timestamps.
+        """
+        dsv: xr.DataArray
+        dsv = self.dataset[variable]
+        self.interpolate_times(dsv)
+        return dsv
+
+    def reshape_variable(self, dsv: xr.DataArray):
+        """
+        Given a time series with a sub-sample dimension, reshape it to a 1D
+        time series.  If it is already 1D, return it unchanged.
+        """
+        if len(dsv.dims) == 1:
+            return dsv
+        data = dsv.values.flatten()
+        dname = self.resample_dim_name(dsv)
+        dtimes = self.interpolate_times(dsv)
+        times = dtimes.values.flatten()
+        reshaped = xr.DataArray(name=dsv.name, data=data, coords={dname: times})
+        logger.debug("reshaped %s: %s", dsv.name, reshaped)
+        return reshaped
+
+    def close(self):
+        self.dataset.close()
diff --git a/test_data/u_2m_t0_20230804_160000.nc b/test_data/u_2m_t0_20230804_160000.nc
diff --git a/test_dump_hotfilm.py b/test_dump_hotfilm.py
@@ -43,7 +43,7 @@ def test_parse_line():
     hf = ReadHotfilm()
     data = hf.parse_line(_scan)
     assert data is not None
-    print(data)
+    logger.debug(data)
     y = data['ch1']
     x = data.index
     y2 = data[data.columns[0]]

diff --git a/test_read_winds.py b/test_read_winds.py
@@ -0,0 +1,50 @@
+
+from pytest import approx
+import logging
+import numpy as np
+from read_winds import ISFSDataset, rdatetime
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.DEBUG)
+
+
+def test_rdatetime():
+    ns = np.timedelta64(1, 'ns')
+    us = np.timedelta64(1, 'us')
+    ms = np.timedelta64(1, 'ms')
+    s = np.timedelta64(1, 's')
+    testdata = [
+        ('2023-08-04T16:00:00.016666667', ms, '2023-08-04T16:00:00.017'),
+        ('2023-08-04T16:00:00.016666667', us, '2023-08-04T16:00:00.016667'),
+        ('2023-08-04T16:00:00.016666667', ns, '2023-08-04T16:00:00.016666667'),
+        ('2023-08-04T16:00:00.000000500', us, '2023-08-04T16:00:00.000001000'),
+        ('2023-08-04T16:00:00.000000500', ms, '2023-08-04T16:00:00.000'),
+        ('2023-08-04T16:00:00.501', s, '2023-08-04T16:00:01'),
+        ('2023-08-04T16:00:00.499', s, '2023-08-04T16:00:00'),
+    ]
+
+    for when, period, expected in testdata:
+        when = np.datetime64(when)
+        expected = np.datetime64(expected)
+        assert rdatetime(when, period) == expected
+
+
+def test_read_winds():
+    ids = ISFSDataset().open("test_data/u_2m_t0_20230804_160000.nc")
+    ds = ids.dataset
+    u = ids.get_variable("u_2m_t0")
+    first = ds.time[0]
+    last = ds.time[-1]
+    u = ids.reshape_variable(u)
+    assert len(u.values) == 3600 * 60
+    assert u.values[0] == approx(0.06215948)
+    assert u.values[-1] == approx(-0.145788)
+    dtime = u.dims[0]
+    # the first half-second timestamp at 500 ms does not match exactly the
+    # 30th timestamp at 16666666 ns, but it should round to exactly 500 ms.
+    us = np.timedelta64(1, 'us')
+    assert rdatetime(u.coords[dtime][30].values, us) == first
+    xlast = last.values + 29 * np.timedelta64(16666666, 'ns')
+    assert rdatetime(u.coords[dtime][-1].values, us) == rdatetime(xlast, us)
+    ds.close()