From 726aaf4857dfb9b7eb1e5774d551f7097bc70b2e Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Fri, 18 Aug 2023 16:19:39 -0700 Subject: [PATCH 01/15] Add function to create CF-compliant arrays/metadata for HDF5 stacks --- src/mintpy/prep_utils.py | 209 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 src/mintpy/prep_utils.py diff --git a/src/mintpy/prep_utils.py b/src/mintpy/prep_utils.py new file mode 100644 index 000000000..5b659803a --- /dev/null +++ b/src/mintpy/prep_utils.py @@ -0,0 +1,209 @@ +"""Utilities to make HDF5 files CF-compliant and readable by GDAL.""" +from __future__ import annotations + +import datetime + +import h5py +import numpy as np +import pyproj + +from mintpy.utils import readfile + +__all__ = ["write_coordinate_system"] + + +def write_coordinate_system( + filename: str, + dset_name: str, + xy_dim_names: tuple[str, str] = ("x", "y"), + grid_mapping_dset: str = "spatial_ref", +) -> None: + """ + Write the coordinate system CF metadata to an existing HDF5 file. + + Parameters + ---------- + filename : str + File path. + dset_name : str + Dataset name within the HDF5 file. + xy_dim_names : tuple[str, str], optional + x and y dimension names, by default ("x", "y"). + grid_mapping_dset : str, optional + Dataset name for grid mapping HDF5 attribute + By default "spatial_ref" (matching `rioxarray`). + """ + atr = readfile.read_attribute(filename) + epsg = int(atr.get("EPSG", 4326)) + + with h5py.File(filename, "a") as hf: + crs = pyproj.CRS.from_epsg(epsg) + dset = hf[dset_name] + + # Setup the dataset holding the SRS information + srs_dset = hf.require_dataset(grid_mapping_dset, shape=(), dtype=int) + srs_dset.attrs.update(crs.to_cf()) + dset.attrs["grid_mapping"] = grid_mapping_dset + + _setup_time_dimension(hf, dset) + _setup_xy_dimensions(hf, dset, atr, crs, xy_dim_names) + + +def _get_xy_arrays(atr: dict) -> tuple[np.ndarray, np.ndarray]: + """ + Generate x and y arrays from attribute dictionary. + + Parameters + ---------- + atr : dict + MintPy attribute dictionary containing ROIPAC raster attributes. + + Returns + ------- + tuple[np.ndarray, np.ndarray] + x and y arrays. + """ + x0 = float(atr["X_FIRST"]) + y0 = float(atr["Y_FIRST"]) + x_step = float(atr["X_STEP"]) + y_step = float(atr["Y_STEP"]) + rows = int(atr["LENGTH"]) + cols = int(atr["WIDTH"]) + + x_arr = x0 + x_step * np.arange(cols) + y_arr = y0 + y_step * np.arange(rows) + + # Shift by half pixel to get the centers + x_arr += x_step / 2 + y_arr += y_step / 2 + return x_arr, y_arr + + +def _get_coordinate_metadata(crs: pyproj.CRS) -> tuple[dict, dict]: + """Get HDF5 coordinate metadata based on CRS type. + + Parameters + ---------- + crs : pyproj.CRS + Coordinate Reference System. + + Returns + ------- + tuple[dict, dict] + Dictionary of metadata for x and y respectively. + """ + x_coord_attrs = {"axis": "X"} + y_coord_attrs = {"axis": "Y"} + + if crs.is_projected: + units = "meters" + # X metadata + x_coord_attrs.update( + { + "long_name": "x coordinate of projection", + "standard_name": "projection_x_coordinate", + "units": units, + } + ) + # Y metadata + y_coord_attrs.update( + { + "long_name": "y coordinate of projection", + "standard_name": "projection_y_coordinate", + "units": units, + } + ) + elif crs.is_geographic: + # X metadata + x_coord_attrs.update( + { + "long_name": "longitude", + "standard_name": "longitude", + "units": "degrees_east", + } + ) + # Y metadata + y_coord_attrs.update( + { + "long_name": "latitude", + "standard_name": "latitude", + "units": "degrees_north", + } + ) + return x_coord_attrs, y_coord_attrs + + +def _setup_xy_dimensions( + hf: h5py.File, + dset: h5py.Dataset, + atr, + crs: pyproj.CRS, + xy_dim_names: tuple[str, str] = ("x", "y"), +): + """ + Setup time dimension in the HDF5 file. + + Parameters + ---------- + hf : h5py.File + HDF5 file object. + dset : h5py.Dataset + Dataset within the HDF5 file. + atr : dict + MintPy attribute dictionary containing ROIPAC raster attributes. + xy_dim_names : tuple[str, str], optional + x and y dimension names, by default ("x", "y"). + + Returns + ------- + Optional[list[datetime.datetime]] + list of dates if they exist, otherwise None. + """ + x_dim_name, y_dim_name = xy_dim_names + # add metadata to x, y coordinates + x_arr, y_arr = _get_xy_arrays(atr) + x_dim_dset = hf.create_dataset(x_dim_name, data=x_arr) + x_dim_dset.make_scale(x_dim_name) + y_dim_dset = hf.create_dataset(y_dim_name, data=y_arr) + y_dim_dset.make_scale(y_dim_name) + + x_coord_attrs, y_coord_attrs = _get_coordinate_metadata(crs) + + y_dim_dset.attrs.update(y_coord_attrs) + x_dim_dset.attrs.update(x_coord_attrs) + + ndim = dset.ndim + dset.dims[ndim - 1].attach_scale(x_dim_dset) + dset.dims[ndim - 2].attach_scale(y_dim_dset) + dset.dims[ndim - 1].label = x_dim_name + dset.dims[ndim - 2].label = y_dim_name + + +def _setup_time_dimension(hf: h5py.File, dset: h5py.Dataset): + """Setup time dimension in the HDF5 file. + + Parameters + ---------- + hf : h5py.File + HDF5 file object. + dset : h5py.Dataset + Dataset within the HDF5 file. + """ + if "date" not in hf: + # If we want to do something other than time as a 3rd dimension + # (e.g. for ifg date pairs) we'll need to figure out what other valid + # dims there are. otherwise, you can just do `phony_dims="sort"` in xarray + return None + + date_arr = [ + datetime.datetime.strptime(ds, "%Y%m%d") for ds in hf["date"][()].astype(str) + ] + days_since = [(d - date_arr[0]).days for d in date_arr] + dt_dim = hf.create_dataset("time", data=days_since) + dt_dim.make_scale() + cf_attrs = dict( + units=f"days since {str(date_arr[0])}", calendar="proleptic_gregorian" + ) + dt_dim.attrs.update(cf_attrs) + dset.dims[0].attach_scale(dt_dim) + dset.dims[0].label = "time" From ffc60abc4504b576572adbd9284521d2755574a7 Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Fri, 18 Aug 2023 16:26:13 -0700 Subject: [PATCH 02/15] move `time` to a global constant for now --- src/mintpy/prep_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mintpy/prep_utils.py b/src/mintpy/prep_utils.py index 5b659803a..9beb5b55d 100644 --- a/src/mintpy/prep_utils.py +++ b/src/mintpy/prep_utils.py @@ -11,6 +11,8 @@ __all__ = ["write_coordinate_system"] +TIME_DSET_NAME = "time" + def write_coordinate_system( filename: str, @@ -199,11 +201,11 @@ def _setup_time_dimension(hf: h5py.File, dset: h5py.Dataset): datetime.datetime.strptime(ds, "%Y%m%d") for ds in hf["date"][()].astype(str) ] days_since = [(d - date_arr[0]).days for d in date_arr] - dt_dim = hf.create_dataset("time", data=days_since) + dt_dim = hf.create_dataset(TIME_DSET_NAME, data=days_since) dt_dim.make_scale() cf_attrs = dict( units=f"days since {str(date_arr[0])}", calendar="proleptic_gregorian" ) dt_dim.attrs.update(cf_attrs) dset.dims[0].attach_scale(dt_dim) - dset.dims[0].label = "time" + dset.dims[0].label = TIME_DSET_NAME From 9a6a21677f163f665c393089dfdcfbf19df4b744 Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Fri, 18 Aug 2023 18:47:02 -0700 Subject: [PATCH 03/15] keep coordinate functions in separate module, run by default in `layout_hdf5` --- src/mintpy/{ => utils}/prep_utils.py | 73 ++++++++++++++++++++-------- src/mintpy/utils/writefile.py | 5 +- 2 files changed, 56 insertions(+), 22 deletions(-) rename src/mintpy/{ => utils}/prep_utils.py (75%) diff --git a/src/mintpy/prep_utils.py b/src/mintpy/utils/prep_utils.py similarity index 75% rename from src/mintpy/prep_utils.py rename to src/mintpy/utils/prep_utils.py index 9beb5b55d..83e034930 100644 --- a/src/mintpy/prep_utils.py +++ b/src/mintpy/utils/prep_utils.py @@ -16,19 +16,18 @@ def write_coordinate_system( filename: str, - dset_name: str, + dset_names: list[str], xy_dim_names: tuple[str, str] = ("x", "y"), grid_mapping_dset: str = "spatial_ref", ) -> None: - """ - Write the coordinate system CF metadata to an existing HDF5 file. + """Write the coordinate system metadata to datasets within `filename`. Parameters ---------- filename : str File path. - dset_name : str - Dataset name within the HDF5 file. + dset_names : list[str] + list of dataset names within the HDF5 file. xy_dim_names : tuple[str, str], optional x and y dimension names, by default ("x", "y"). grid_mapping_dset : str, optional @@ -37,18 +36,20 @@ def write_coordinate_system( """ atr = readfile.read_attribute(filename) epsg = int(atr.get("EPSG", 4326)) + crs = pyproj.CRS.from_epsg(epsg) with h5py.File(filename, "a") as hf: - crs = pyproj.CRS.from_epsg(epsg) - dset = hf[dset_name] - - # Setup the dataset holding the SRS information + _setup_xy_dimensions(hf, atr, crs, xy_dim_names) srs_dset = hf.require_dataset(grid_mapping_dset, shape=(), dtype=int) srs_dset.attrs.update(crs.to_cf()) - dset.attrs["grid_mapping"] = grid_mapping_dset - _setup_time_dimension(hf, dset) - _setup_xy_dimensions(hf, dset, atr, crs, xy_dim_names) + for dset_name in dset_names: + dset = hf[dset_name] + # Setup the dataset holding the SRS information + dset.attrs["grid_mapping"] = grid_mapping_dset + _attach_xy_dimensions(hf, dset, xy_dim_names) + if dset.ndim == 3: + _setup_time_dimension(hf, dset) def _get_xy_arrays(atr: dict) -> tuple[np.ndarray, np.ndarray]: @@ -137,13 +138,13 @@ def _get_coordinate_metadata(crs: pyproj.CRS) -> tuple[dict, dict]: def _setup_xy_dimensions( hf: h5py.File, - dset: h5py.Dataset, - atr, + atr: dict, crs: pyproj.CRS, xy_dim_names: tuple[str, str] = ("x", "y"), ): - """ - Setup time dimension in the HDF5 file. + """Create X/Y spatial dimension in the HDF5 file. + + Sets the datasets as HDF5 dimension scales. Parameters ---------- @@ -153,6 +154,8 @@ def _setup_xy_dimensions( Dataset within the HDF5 file. atr : dict MintPy attribute dictionary containing ROIPAC raster attributes. + crs : pyproj.CRS + Coordinate Reference System. xy_dim_names : tuple[str, str], optional x and y dimension names, by default ("x", "y"). @@ -164,9 +167,13 @@ def _setup_xy_dimensions( x_dim_name, y_dim_name = xy_dim_names # add metadata to x, y coordinates x_arr, y_arr = _get_xy_arrays(atr) - x_dim_dset = hf.create_dataset(x_dim_name, data=x_arr) + x_dim_dset = hf.require_dataset( + x_dim_name, shape=x_arr.shape, dtype=x_arr.dtype, data=x_arr + ) x_dim_dset.make_scale(x_dim_name) - y_dim_dset = hf.create_dataset(y_dim_name, data=y_arr) + y_dim_dset = hf.require_dataset( + y_dim_name, shape=y_arr.shape, dtype=y_arr.dtype, data=y_arr + ) y_dim_dset.make_scale(y_dim_name) x_coord_attrs, y_coord_attrs = _get_coordinate_metadata(crs) @@ -174,9 +181,33 @@ def _setup_xy_dimensions( y_dim_dset.attrs.update(y_coord_attrs) x_dim_dset.attrs.update(x_coord_attrs) + +def _attach_xy_dimensions( + hf: h5py.File, + dset: h5py.Dataset, + xy_dim_names: tuple[str, str] = ("x", "y"), +): + """ + Setup time dimension in the HDF5 file. + + Parameters + ---------- + hf : h5py.File + HDF5 file object. + dset : h5py.Dataset + Dataset within the HDF5 file. + xy_dim_names : tuple[str, str], optional + x and y dimension names, by default ("x", "y"). + + Returns + ------- + Optional[list[datetime.datetime]] + list of dates if they exist, otherwise None. + """ + x_dim_name, y_dim_name = xy_dim_names ndim = dset.ndim - dset.dims[ndim - 1].attach_scale(x_dim_dset) - dset.dims[ndim - 2].attach_scale(y_dim_dset) + dset.dims[ndim - 1].attach_scale(hf[x_dim_name]) + dset.dims[ndim - 2].attach_scale(hf[y_dim_name]) dset.dims[ndim - 1].label = x_dim_name dset.dims[ndim - 2].label = y_dim_name @@ -201,7 +232,7 @@ def _setup_time_dimension(hf: h5py.File, dset: h5py.Dataset): datetime.datetime.strptime(ds, "%Y%m%d") for ds in hf["date"][()].astype(str) ] days_since = [(d - date_arr[0]).days for d in date_arr] - dt_dim = hf.create_dataset(TIME_DSET_NAME, data=days_since) + dt_dim = hf.require_dataset(TIME_DSET_NAME, data=days_since) dt_dim.make_scale() cf_attrs = dict( units=f"days since {str(date_arr[0])}", calendar="proleptic_gregorian" diff --git a/src/mintpy/utils/writefile.py b/src/mintpy/utils/writefile.py index 3862d3c7c..aa8b71cf8 100644 --- a/src/mintpy/utils/writefile.py +++ b/src/mintpy/utils/writefile.py @@ -14,7 +14,7 @@ import h5py import numpy as np -from mintpy.utils import readfile +from mintpy.utils import prep_utils, readfile def write(datasetDict, out_file, metadata=None, ref_file=None, compression=None, ds_unit_dict=None, print_msg=True): @@ -382,6 +382,9 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_ f[key].attrs['UNIT'] = value vprint(f'add /{key:<{max_digit}} attribute: UNIT = {value}') + vprint(f'Adding coordinate metadata to all datasets in {fname}') + prep_utils.write_coordinate_system(fname, list(ds_unit_dict.keys())) + vprint(f'close HDF5 file: {fname}') return fname From a2e8b4e75394e1a1df996552bccd4d686ebd9207 Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Fri, 18 Aug 2023 19:40:02 -0700 Subject: [PATCH 04/15] make `time` a np.array to pass to `require_dataset` --- src/mintpy/utils/prep_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mintpy/utils/prep_utils.py b/src/mintpy/utils/prep_utils.py index 83e034930..7b8f21387 100644 --- a/src/mintpy/utils/prep_utils.py +++ b/src/mintpy/utils/prep_utils.py @@ -231,8 +231,10 @@ def _setup_time_dimension(hf: h5py.File, dset: h5py.Dataset): date_arr = [ datetime.datetime.strptime(ds, "%Y%m%d") for ds in hf["date"][()].astype(str) ] - days_since = [(d - date_arr[0]).days for d in date_arr] - dt_dim = hf.require_dataset(TIME_DSET_NAME, data=days_since) + days_since = np.array([(d - date_arr[0]).days for d in date_arr]) + dt_dim = hf.require_dataset( + TIME_DSET_NAME, data=days_since, shape=days_since.shape, dtype=days_since.dtype + ) dt_dim.make_scale() cf_attrs = dict( units=f"days since {str(date_arr[0])}", calendar="proleptic_gregorian" From 0e10bb7ebfe295c79dc59be67e007364eba97404 Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Fri, 18 Aug 2023 19:50:36 -0700 Subject: [PATCH 05/15] use `ds_name_dict` to write coordinates, not unit dict --- src/mintpy/utils/writefile.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mintpy/utils/writefile.py b/src/mintpy/utils/writefile.py index aa8b71cf8..a4f9f0a96 100644 --- a/src/mintpy/utils/writefile.py +++ b/src/mintpy/utils/writefile.py @@ -382,8 +382,9 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_ f[key].attrs['UNIT'] = value vprint(f'add /{key:<{max_digit}} attribute: UNIT = {value}') - vprint(f'Adding coordinate metadata to all datasets in {fname}') - prep_utils.write_coordinate_system(fname, list(ds_unit_dict.keys())) + if ds_name_dict: + vprint(f'Adding coordinate metadata to all datasets in {fname}') + prep_utils.write_coordinate_system(fname, list(ds_name_dict.keys())) vprint(f'close HDF5 file: {fname}') From 3ef6e582443a943f42b7c2de69740fc8dcce50e0 Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Fri, 18 Aug 2023 20:16:54 -0700 Subject: [PATCH 06/15] add a check for geo vs radar coords --- src/mintpy/utils/prep_utils.py | 6 ++++++ src/mintpy/utils/writefile.py | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/mintpy/utils/prep_utils.py b/src/mintpy/utils/prep_utils.py index 7b8f21387..d5db3f55a 100644 --- a/src/mintpy/utils/prep_utils.py +++ b/src/mintpy/utils/prep_utils.py @@ -35,6 +35,12 @@ def write_coordinate_system( By default "spatial_ref" (matching `rioxarray`). """ atr = readfile.read_attribute(filename) + if "X_FIRST" not in atr: + raise ValueError(f"{filename} is not geocoded.") + # TODO: It should be possible to write in the 2D lat/lon arrays + # for radar coordinates. + # But unclear what quick benefit we get from that like with geocoded files. + epsg = int(atr.get("EPSG", 4326)) crs = pyproj.CRS.from_epsg(epsg) diff --git a/src/mintpy/utils/writefile.py b/src/mintpy/utils/writefile.py index a4f9f0a96..e5aaf4abc 100644 --- a/src/mintpy/utils/writefile.py +++ b/src/mintpy/utils/writefile.py @@ -384,7 +384,11 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_ if ds_name_dict: vprint(f'Adding coordinate metadata to all datasets in {fname}') - prep_utils.write_coordinate_system(fname, list(ds_name_dict.keys())) + try: + prep_utils.write_coordinate_system(fname, list(ds_name_dict.keys())) + except ValueError: + # Not a geocoded file + pass vprint(f'close HDF5 file: {fname}') From bec83b805bbd9ab3e07ced51726325f39c14739f Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Sat, 19 Aug 2023 10:08:37 -0700 Subject: [PATCH 07/15] account for 0-D `spatial_ref` dataset holding GDAL metadata --- src/mintpy/objects/stack.py | 4 ++-- src/mintpy/save_hdfeos5.py | 2 ++ src/mintpy/utils/prep_utils.py | 3 +++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mintpy/objects/stack.py b/src/mintpy/objects/stack.py index 655bf6813..11d1fc13a 100644 --- a/src/mintpy/objects/stack.py +++ b/src/mintpy/objects/stack.py @@ -658,8 +658,8 @@ def read(self, datasetName=GEOMETRY_DSET_NAMES[0], box=None, print_msg=True): if print_msg: print(f'reading {familyName:<15} data from file: {self.file} ...') - if len(ds.shape) == 1: - data = ds[:] + if len(ds.shape) <= 1: + data = ds[()] elif len(ds.shape) == 2: data = ds[box[1]:box[3], box[0]:box[2]] else: diff --git a/src/mintpy/save_hdfeos5.py b/src/mintpy/save_hdfeos5.py index 8c05b88c2..1c95a5007 100644 --- a/src/mintpy/save_hdfeos5.py +++ b/src/mintpy/save_hdfeos5.py @@ -406,6 +406,8 @@ def write_hdf5_file(metadata, out_file, ts_file, tcoh_file, scoh_file, mask_file else: raise ValueError(f'Un-recognized dataset name: {dsName}!') + if data.ndim < 1: # Skip the grid mapping scalar dataset + continue # write dset = create_hdf5_dataset(group, dsName, data) diff --git a/src/mintpy/utils/prep_utils.py b/src/mintpy/utils/prep_utils.py index d5db3f55a..cda18ccb1 100644 --- a/src/mintpy/utils/prep_utils.py +++ b/src/mintpy/utils/prep_utils.py @@ -51,6 +51,9 @@ def write_coordinate_system( for dset_name in dset_names: dset = hf[dset_name] + if dset.ndim < 2: + print(f"Skipping {dset.ndim}-dimensional dset {dset_name}") + continue # Setup the dataset holding the SRS information dset.attrs["grid_mapping"] = grid_mapping_dset _attach_xy_dimensions(hf, dset, xy_dim_names) From b8b50b4586e29cb964e0fa93647d3f1614947823 Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Sat, 19 Aug 2023 10:49:11 -0700 Subject: [PATCH 08/15] use custom exception in prep_utils We want to distinguish from other ValueErrors --- src/mintpy/utils/prep_utils.py | 6 +++++- src/mintpy/utils/writefile.py | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mintpy/utils/prep_utils.py b/src/mintpy/utils/prep_utils.py index cda18ccb1..1b0e93e5d 100644 --- a/src/mintpy/utils/prep_utils.py +++ b/src/mintpy/utils/prep_utils.py @@ -14,6 +14,9 @@ TIME_DSET_NAME = "time" +class CoordinateError(ValueError): + pass + def write_coordinate_system( filename: str, dset_names: list[str], @@ -36,7 +39,7 @@ def write_coordinate_system( """ atr = readfile.read_attribute(filename) if "X_FIRST" not in atr: - raise ValueError(f"{filename} is not geocoded.") + raise CoordinateError(f"{filename} is not geocoded.") # TODO: It should be possible to write in the 2D lat/lon arrays # for radar coordinates. # But unclear what quick benefit we get from that like with geocoded files. @@ -232,6 +235,7 @@ def _setup_time_dimension(hf: h5py.File, dset: h5py.Dataset): Dataset within the HDF5 file. """ if "date" not in hf: + print(f"'date' dataset not in {hf}, skipping 'time' dimension") # If we want to do something other than time as a 3rd dimension # (e.g. for ifg date pairs) we'll need to figure out what other valid # dims there are. otherwise, you can just do `phony_dims="sort"` in xarray diff --git a/src/mintpy/utils/writefile.py b/src/mintpy/utils/writefile.py index e5aaf4abc..553ab7343 100644 --- a/src/mintpy/utils/writefile.py +++ b/src/mintpy/utils/writefile.py @@ -386,7 +386,8 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_ vprint(f'Adding coordinate metadata to all datasets in {fname}') try: prep_utils.write_coordinate_system(fname, list(ds_name_dict.keys())) - except ValueError: + except prep_utils.CoordinateError: + vprint('Skipping, not geocoded.') # Not a geocoded file pass From f84efc932a097d0d8dced0672fa031d30b4e3964 Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Sat, 19 Aug 2023 10:49:59 -0700 Subject: [PATCH 09/15] add a pytest unit test for 2d / 3d coordinate writing --- tests/test_prep_utils.py | 124 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 tests/test_prep_utils.py diff --git a/tests/test_prep_utils.py b/tests/test_prep_utils.py new file mode 100644 index 000000000..d6ff16175 --- /dev/null +++ b/tests/test_prep_utils.py @@ -0,0 +1,124 @@ +from datetime import date, timedelta + +import h5py +import numpy as np +import pytest +from osgeo import gdal + +from mintpy.utils import prep_utils, writefile + +gdal.UseExceptions() + + +@pytest.fixture +def metadata(): + return { + "EPSG": "32611", + "X_FIRST": "480540.0", + "X_STEP": "30.0", + "X_UNIT": "meters", + "Y_FIRST": "3902670.0", + "Y_STEP": "-30.0", + "Y_UNIT": "meters", + "LENGTH": "100", + "WIDTH": "100", + "REF_X": "50", + "REF_Y": "50", + # Sample S1 metadata + "ALOOKS": "1", + "AZIMUTH_PIXEL_SIZE": "14.1", + "CENTER_LINE_UTC": "49436.654675", + "EARTH_RADIUS": "6371000.0", + "HEIGHT": "750000.0", + "ORBIT_DIRECTION": "Descending", + "PLATFORM": "S1A", + "RANGE_PIXEL_SIZE": "2.329562114715323", + "RLOOKS": "1", + "STARTING_RANGE": "845960.7488998839", + "UNIT": "m", + "WAVELENGTH": "0.05546576", + "DATA_TYPE": "float32", + "PROCESSOR": "mintpy", + "NO_DATA_VALUE": "none", + } + + +@pytest.fixture +def data2d(metadata): + rows = int(metadata["LENGTH"]) + cols = int(metadata["WIDTH"]) + return np.random.randn(rows, cols).astype("float32") + + +@pytest.fixture +def dates(): + num_dates = 10 + date_list = [date(2020, 1, 1) + i * timedelta(days=12) for i in range(num_dates)] + date_strs = [d.strftime("%Y%m%d") for d in date_list] + return np.array(date_strs, dtype=np.string_) + + +@pytest.fixture +def data3d(dates, metadata): + rows = int(metadata["LENGTH"]) + cols = int(metadata["WIDTH"]) + return np.random.randn(len(dates), rows, cols).astype("float32") + + +def _check_gdal_metadata(gdal_str, meta): + ds = gdal.Open(gdal_str) + assert ds is not None + assert int(ds.GetSpatialRef().GetAuthorityCode(None)) == int(meta["EPSG"]) + + gt = ds.GetGeoTransform() + # GEOTRANSFORM = [204500.0, 5.0, 0.0, 2151300.0, 0.0, -10.0] + assert gt[0] == float(meta["X_FIRST"]) + assert gt[3] == float(meta["Y_FIRST"]) + assert gt[1] == float(meta["X_STEP"]) + assert gt[5] == float(meta["Y_STEP"]) + ds = None + + +def test_2d(metadata, data2d, tmp_path): + rows, cols = data2d.shape + ds_name_dict = { + "velocity": [np.float32, (rows, cols), data2d], + } + + # initiate HDF5 file + metadata["FILE_TYPE"] = "velocity" + # meta["REF_DATE"] = ref_date # might not be the first date! + outfile = tmp_path / "velocity.h5" + writefile.layout_hdf5(str(outfile), ds_name_dict, metadata=metadata) + + # Check the metadata was written + with h5py.File(outfile) as hf: + assert "x" in hf + assert "y" in hf + assert prep_utils.TIME_DSET_NAME not in hf + assert "spatial_ref" in hf + + _check_gdal_metadata(f"NETCDF:{outfile}:velocity", metadata) + + +def test_3d(metadata, dates, data3d, tmp_path): + num_dates, rows, cols = data3d.shape + ds_name_dict = { + "date": [dates.dtype, (num_dates,), dates], + "timeseries": [np.float32, (num_dates, rows, cols), data3d], + } + + # initiate HDF5 file + metadata["FILE_TYPE"] = "timeseries" + # meta["REF_DATE"] = ref_date # might not be the first date! + outfile = tmp_path / "timeseries.h5" + writefile.layout_hdf5(str(outfile), ds_name_dict, metadata=metadata) + + # Check the metadata was written + with h5py.File(outfile) as hf: + assert "x" in hf + assert "y" in hf + assert prep_utils.TIME_DSET_NAME in hf + assert "spatial_ref" in hf + + _check_gdal_metadata(f"NETCDF:{outfile}:timeseries", metadata) From eb57687b82ddf0515422ca2b4bc23b2476913399 Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Sat, 19 Aug 2023 11:05:59 -0700 Subject: [PATCH 10/15] add other check for 2D date dataset we only want to convert 1D date to time --- src/mintpy/utils/prep_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mintpy/utils/prep_utils.py b/src/mintpy/utils/prep_utils.py index 1b0e93e5d..cc4cb7fa4 100644 --- a/src/mintpy/utils/prep_utils.py +++ b/src/mintpy/utils/prep_utils.py @@ -234,7 +234,7 @@ def _setup_time_dimension(hf: h5py.File, dset: h5py.Dataset): dset : h5py.Dataset Dataset within the HDF5 file. """ - if "date" not in hf: + if "date" not in hf or hf["dset"].ndim == 2: print(f"'date' dataset not in {hf}, skipping 'time' dimension") # If we want to do something other than time as a 3rd dimension # (e.g. for ifg date pairs) we'll need to figure out what other valid From 5197cf2b8e5651d8203f2857cdfe175ff6d0eb1d Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Sat, 19 Aug 2023 11:10:16 -0700 Subject: [PATCH 11/15] print exception from `prep_aria` --- src/mintpy/load_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mintpy/load_data.py b/src/mintpy/load_data.py index 972e01228..a1d478e10 100644 --- a/src/mintpy/load_data.py +++ b/src/mintpy/load_data.py @@ -731,8 +731,8 @@ def prepare_metadata(iDict): ut.print_command_line(script_name, iargs) try: prep_module.main(iargs) - except: - warnings.warn('prep_aria.py failed. Assuming its result exists and continue...') + except Exception as e: + warnings.warn(f'prep_aria.py failed: {e}\n Assuming its result exists and continuing...') elif processor == 'gmtsar': # use the custom template file if exists & input From 5dcf27b530d9f6bfd401bd7d26ca768b8091a0b2 Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Sat, 19 Aug 2023 11:19:10 -0700 Subject: [PATCH 12/15] fix 0-D write file errors from reference in `layout_hdf5` --- src/mintpy/utils/prep_utils.py | 2 +- src/mintpy/utils/writefile.py | 25 ++++++++++++++----------- tests/requirements.txt | 1 + 3 files changed, 16 insertions(+), 12 deletions(-) create mode 100644 tests/requirements.txt diff --git a/src/mintpy/utils/prep_utils.py b/src/mintpy/utils/prep_utils.py index cc4cb7fa4..01afc9107 100644 --- a/src/mintpy/utils/prep_utils.py +++ b/src/mintpy/utils/prep_utils.py @@ -234,7 +234,7 @@ def _setup_time_dimension(hf: h5py.File, dset: h5py.Dataset): dset : h5py.Dataset Dataset within the HDF5 file. """ - if "date" not in hf or hf["dset"].ndim == 2: + if "date" not in hf or hf["date"].ndim == 2: print(f"'date' dataset not in {hf}, skipping 'time' dimension") # If we want to do something other than time as a 3rd dimension # (e.g. for ifg date pairs) we'll need to figure out what other valid diff --git a/src/mintpy/utils/writefile.py b/src/mintpy/utils/writefile.py index 553ab7343..7593e48ec 100644 --- a/src/mintpy/utils/writefile.py +++ b/src/mintpy/utils/writefile.py @@ -309,9 +309,9 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_ ds = fr[key] if isinstance(ds, h5py.Dataset): - # auxliary dataset + # auxiliary dataset if ds.shape[-2:] != shape2d_orig: - ds_name_dict[key] = [ds.dtype, ds.shape, ds[:]] + ds_name_dict[key] = [ds.dtype, ds.shape, ds[()]] # dataset else: @@ -360,16 +360,19 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_ t=str(data_type), s=str(data_shape), c=ds_comp)) - ds = f.create_dataset(key, - shape=data_shape, - maxshape=max_shape, - dtype=data_type, - chunks=True, - compression=ds_comp) - - # write auxliary data + if len(data_shape) > 0: + kwargs = dict( + maxshape=max_shape, + chunks=True, + compression=ds_comp + ) + else: + kwargs = {} + ds = f.create_dataset(key, shape=data_shape, dtype=data_type, **kwargs) + + # write auxiliary data if len(ds_name_dict[key]) > 2 and ds_name_dict[key][2] is not None: - ds[:] = np.array(ds_name_dict[key][2]) + ds[()] = np.array(ds_name_dict[key][2]) # write attributes in root level for key, value in meta.items(): diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 000000000..e079f8a60 --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1 @@ +pytest From 49927c69c531f2867ff408b66a0f6d6f170648cd Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Sat, 19 Aug 2023 11:20:37 -0700 Subject: [PATCH 13/15] reformat and clarify why not chunking --- src/mintpy/utils/writefile.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/mintpy/utils/writefile.py b/src/mintpy/utils/writefile.py index 7593e48ec..67586faae 100644 --- a/src/mintpy/utils/writefile.py +++ b/src/mintpy/utils/writefile.py @@ -360,14 +360,11 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_ t=str(data_type), s=str(data_shape), c=ds_comp)) - if len(data_shape) > 0: - kwargs = dict( - maxshape=max_shape, - chunks=True, - compression=ds_comp - ) - else: + if len(data_shape) < 1: + # scalar datasets can't be chunked kwargs = {} + else: + kwargs = dict(maxshape=max_shape, chunks=True, compression=ds_comp) ds = f.create_dataset(key, shape=data_shape, dtype=data_type, **kwargs) # write auxiliary data From 8ca67c334693fc8cec2153dec3195335cf42be4b Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Sat, 19 Aug 2023 11:33:17 -0700 Subject: [PATCH 14/15] remove `pass` to appease codacy --- src/mintpy/utils/writefile.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mintpy/utils/writefile.py b/src/mintpy/utils/writefile.py index 67586faae..a34a3c660 100644 --- a/src/mintpy/utils/writefile.py +++ b/src/mintpy/utils/writefile.py @@ -388,8 +388,6 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_ prep_utils.write_coordinate_system(fname, list(ds_name_dict.keys())) except prep_utils.CoordinateError: vprint('Skipping, not geocoded.') - # Not a geocoded file - pass vprint(f'close HDF5 file: {fname}') From 90b8f4b39ad36d04c68d996068dd3ba1106d827a Mon Sep 17 00:00:00 2001 From: Zhang Yunjun Date: Mon, 21 Aug 2023 16:02:06 +0800 Subject: [PATCH 15/15] add except Exceptions as e to all instances --- src/mintpy/load_data.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mintpy/load_data.py b/src/mintpy/load_data.py index a1d478e10..fd41b67ab 100644 --- a/src/mintpy/load_data.py +++ b/src/mintpy/load_data.py @@ -676,8 +676,8 @@ def prepare_metadata(iDict): ut.print_command_line(script_name, iargs) try: prep_module.main(iargs) - except: - warnings.warn('prep_isce.py failed. Assuming its result exists and continue...') + except Exception as e: + warnings.warn(f'prep_isce.py failed: {e}.\nAssuming its result exists and continue...') # [optional] for topsStack: SAFE_files.txt --> S1A/B_date.txt if os.path.isfile(meta_file) and isce_utils.get_processor(meta_file) == 'topsStack': @@ -732,7 +732,7 @@ def prepare_metadata(iDict): try: prep_module.main(iargs) except Exception as e: - warnings.warn(f'prep_aria.py failed: {e}\n Assuming its result exists and continuing...') + warnings.warn(f'prep_aria.py failed: {e}.\n Assuming its result exists and continuing...') elif processor == 'gmtsar': # use the custom template file if exists & input @@ -746,8 +746,8 @@ def prepare_metadata(iDict): ut.print_command_line(script_name, iargs) try: prep_module.main(iargs) - except: - warnings.warn('prep_gmtsar.py failed. Assuming its result exists and continue...') + except Exception as e: + warnings.warn(f'prep_gmtsar.py failed: {e}.\nAssuming its result exists and continue...') return