Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function to create CF-compliant arrays/metadata for HDF5 stacks #1073

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/mintpy/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,8 +676,8 @@ def prepare_metadata(iDict):
ut.print_command_line(script_name, iargs)
try:
prep_module.main(iargs)
except:
warnings.warn('prep_isce.py failed. Assuming its result exists and continue...')
except Exception as e:
warnings.warn(f'prep_isce.py failed: {e}.\nAssuming its result exists and continue...')

# [optional] for topsStack: SAFE_files.txt --> S1A/B_date.txt
if os.path.isfile(meta_file) and isce_utils.get_processor(meta_file) == 'topsStack':
Expand Down Expand Up @@ -731,8 +731,8 @@ def prepare_metadata(iDict):
ut.print_command_line(script_name, iargs)
try:
prep_module.main(iargs)
except:
warnings.warn('prep_aria.py failed. Assuming its result exists and continue...')
except Exception as e:
warnings.warn(f'prep_aria.py failed: {e}.\n Assuming its result exists and continuing...')

elif processor == 'gmtsar':
# use the custom template file if exists & input
Expand All @@ -746,8 +746,8 @@ def prepare_metadata(iDict):
ut.print_command_line(script_name, iargs)
try:
prep_module.main(iargs)
except:
warnings.warn('prep_gmtsar.py failed. Assuming its result exists and continue...')
except Exception as e:
warnings.warn(f'prep_gmtsar.py failed: {e}.\nAssuming its result exists and continue...')

return

Expand Down
4 changes: 2 additions & 2 deletions src/mintpy/objects/stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,8 +658,8 @@ def read(self, datasetName=GEOMETRY_DSET_NAMES[0], box=None, print_msg=True):
if print_msg:
print(f'reading {familyName:<15} data from file: {self.file} ...')

if len(ds.shape) == 1:
data = ds[:]
if len(ds.shape) <= 1:
data = ds[()]
elif len(ds.shape) == 2:
data = ds[box[1]:box[3], box[0]:box[2]]
else:
Expand Down
2 changes: 2 additions & 0 deletions src/mintpy/save_hdfeos5.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,8 @@ def write_hdf5_file(metadata, out_file, ts_file, tcoh_file, scoh_file, mask_file
else:
raise ValueError(f'Un-recognized dataset name: {dsName}!')

if data.ndim < 1: # Skip the grid mapping scalar dataset
continue
# write
dset = create_hdf5_dataset(group, dsName, data)

Expand Down
257 changes: 257 additions & 0 deletions src/mintpy/utils/prep_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
"""Utilities to make HDF5 files CF-compliant and readable by GDAL."""
from __future__ import annotations

import datetime

import h5py
import numpy as np
import pyproj

from mintpy.utils import readfile

__all__ = ["write_coordinate_system"]

TIME_DSET_NAME = "time"


class CoordinateError(ValueError):
pass

def write_coordinate_system(
filename: str,
dset_names: list[str],
xy_dim_names: tuple[str, str] = ("x", "y"),
grid_mapping_dset: str = "spatial_ref",
) -> None:
"""Write the coordinate system metadata to datasets within `filename`.

Parameters
----------
filename : str
File path.
dset_names : list[str]
list of dataset names within the HDF5 file.
xy_dim_names : tuple[str, str], optional
x and y dimension names, by default ("x", "y").
grid_mapping_dset : str, optional
Dataset name for grid mapping HDF5 attribute
By default "spatial_ref" (matching `rioxarray`).
"""
atr = readfile.read_attribute(filename)
if "X_FIRST" not in atr:
raise CoordinateError(f"{filename} is not geocoded.")
# TODO: It should be possible to write in the 2D lat/lon arrays
# for radar coordinates.
# But unclear what quick benefit we get from that like with geocoded files.

epsg = int(atr.get("EPSG", 4326))
crs = pyproj.CRS.from_epsg(epsg)

with h5py.File(filename, "a") as hf:
_setup_xy_dimensions(hf, atr, crs, xy_dim_names)
srs_dset = hf.require_dataset(grid_mapping_dset, shape=(), dtype=int)
srs_dset.attrs.update(crs.to_cf())

for dset_name in dset_names:
dset = hf[dset_name]
if dset.ndim < 2:
print(f"Skipping {dset.ndim}-dimensional dset {dset_name}")
continue
# Setup the dataset holding the SRS information
dset.attrs["grid_mapping"] = grid_mapping_dset
_attach_xy_dimensions(hf, dset, xy_dim_names)
if dset.ndim == 3:
_setup_time_dimension(hf, dset)


def _get_xy_arrays(atr: dict) -> tuple[np.ndarray, np.ndarray]:
"""
Generate x and y arrays from attribute dictionary.

Parameters
----------
atr : dict
MintPy attribute dictionary containing ROIPAC raster attributes.

Returns
-------
tuple[np.ndarray, np.ndarray]
x and y arrays.
"""
x0 = float(atr["X_FIRST"])
y0 = float(atr["Y_FIRST"])
x_step = float(atr["X_STEP"])
y_step = float(atr["Y_STEP"])
rows = int(atr["LENGTH"])
cols = int(atr["WIDTH"])

x_arr = x0 + x_step * np.arange(cols)
y_arr = y0 + y_step * np.arange(rows)

# Shift by half pixel to get the centers
x_arr += x_step / 2
y_arr += y_step / 2
return x_arr, y_arr


def _get_coordinate_metadata(crs: pyproj.CRS) -> tuple[dict, dict]:
"""Get HDF5 coordinate metadata based on CRS type.

Parameters
----------
crs : pyproj.CRS
Coordinate Reference System.

Returns
-------
tuple[dict, dict]
Dictionary of metadata for x and y respectively.
"""
x_coord_attrs = {"axis": "X"}
y_coord_attrs = {"axis": "Y"}

if crs.is_projected:
units = "meters"
# X metadata
x_coord_attrs.update(
{
"long_name": "x coordinate of projection",
"standard_name": "projection_x_coordinate",
"units": units,
}
)
# Y metadata
y_coord_attrs.update(
{
"long_name": "y coordinate of projection",
"standard_name": "projection_y_coordinate",
"units": units,
}
)
elif crs.is_geographic:
# X metadata
x_coord_attrs.update(
{
"long_name": "longitude",
"standard_name": "longitude",
"units": "degrees_east",
}
)
# Y metadata
y_coord_attrs.update(
{
"long_name": "latitude",
"standard_name": "latitude",
"units": "degrees_north",
}
)
return x_coord_attrs, y_coord_attrs


def _setup_xy_dimensions(
hf: h5py.File,
atr: dict,
crs: pyproj.CRS,
xy_dim_names: tuple[str, str] = ("x", "y"),
):
"""Create X/Y spatial dimension in the HDF5 file.

Sets the datasets as HDF5 dimension scales.

Parameters
----------
hf : h5py.File
HDF5 file object.
dset : h5py.Dataset
Dataset within the HDF5 file.
atr : dict
MintPy attribute dictionary containing ROIPAC raster attributes.
crs : pyproj.CRS
Coordinate Reference System.
xy_dim_names : tuple[str, str], optional
x and y dimension names, by default ("x", "y").

Returns
-------
Optional[list[datetime.datetime]]
list of dates if they exist, otherwise None.
"""
x_dim_name, y_dim_name = xy_dim_names
# add metadata to x, y coordinates
x_arr, y_arr = _get_xy_arrays(atr)
x_dim_dset = hf.require_dataset(
x_dim_name, shape=x_arr.shape, dtype=x_arr.dtype, data=x_arr
)
x_dim_dset.make_scale(x_dim_name)
y_dim_dset = hf.require_dataset(
y_dim_name, shape=y_arr.shape, dtype=y_arr.dtype, data=y_arr
)
y_dim_dset.make_scale(y_dim_name)

x_coord_attrs, y_coord_attrs = _get_coordinate_metadata(crs)

y_dim_dset.attrs.update(y_coord_attrs)
x_dim_dset.attrs.update(x_coord_attrs)


def _attach_xy_dimensions(
hf: h5py.File,
dset: h5py.Dataset,
xy_dim_names: tuple[str, str] = ("x", "y"),
):
"""
Setup time dimension in the HDF5 file.

Parameters
----------
hf : h5py.File
HDF5 file object.
dset : h5py.Dataset
Dataset within the HDF5 file.
xy_dim_names : tuple[str, str], optional
x and y dimension names, by default ("x", "y").

Returns
-------
Optional[list[datetime.datetime]]
list of dates if they exist, otherwise None.
"""
x_dim_name, y_dim_name = xy_dim_names
ndim = dset.ndim
dset.dims[ndim - 1].attach_scale(hf[x_dim_name])
dset.dims[ndim - 2].attach_scale(hf[y_dim_name])
dset.dims[ndim - 1].label = x_dim_name
dset.dims[ndim - 2].label = y_dim_name


def _setup_time_dimension(hf: h5py.File, dset: h5py.Dataset):
"""Setup time dimension in the HDF5 file.

Parameters
----------
hf : h5py.File
HDF5 file object.
dset : h5py.Dataset
Dataset within the HDF5 file.
"""
if "date" not in hf or hf["date"].ndim == 2:
print(f"'date' dataset not in {hf}, skipping 'time' dimension")
# If we want to do something other than time as a 3rd dimension
# (e.g. for ifg date pairs) we'll need to figure out what other valid
# dims there are. otherwise, you can just do `phony_dims="sort"` in xarray
return None

date_arr = [
datetime.datetime.strptime(ds, "%Y%m%d") for ds in hf["date"][()].astype(str)
]
days_since = np.array([(d - date_arr[0]).days for d in date_arr])
dt_dim = hf.require_dataset(
TIME_DSET_NAME, data=days_since, shape=days_since.shape, dtype=days_since.dtype
)
dt_dim.make_scale()
cf_attrs = dict(
units=f"days since {str(date_arr[0])}", calendar="proleptic_gregorian"
)
dt_dim.attrs.update(cf_attrs)
dset.dims[0].attach_scale(dt_dim)
dset.dims[0].label = TIME_DSET_NAME
31 changes: 19 additions & 12 deletions src/mintpy/utils/writefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import h5py
import numpy as np

from mintpy.utils import readfile
from mintpy.utils import prep_utils, readfile


def write(datasetDict, out_file, metadata=None, ref_file=None, compression=None, ds_unit_dict=None, print_msg=True):
Expand Down Expand Up @@ -309,9 +309,9 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_
ds = fr[key]
if isinstance(ds, h5py.Dataset):

# auxliary dataset
# auxiliary dataset
if ds.shape[-2:] != shape2d_orig:
ds_name_dict[key] = [ds.dtype, ds.shape, ds[:]]
ds_name_dict[key] = [ds.dtype, ds.shape, ds[()]]

# dataset
else:
Expand Down Expand Up @@ -360,16 +360,16 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_
t=str(data_type),
s=str(data_shape),
c=ds_comp))
ds = f.create_dataset(key,
shape=data_shape,
maxshape=max_shape,
dtype=data_type,
chunks=True,
compression=ds_comp)

# write auxliary data
if len(data_shape) < 1:
# scalar datasets can't be chunked
kwargs = {}
else:
kwargs = dict(maxshape=max_shape, chunks=True, compression=ds_comp)
ds = f.create_dataset(key, shape=data_shape, dtype=data_type, **kwargs)

# write auxiliary data
if len(ds_name_dict[key]) > 2 and ds_name_dict[key][2] is not None:
ds[:] = np.array(ds_name_dict[key][2])
ds[()] = np.array(ds_name_dict[key][2])

# write attributes in root level
for key, value in meta.items():
Expand All @@ -382,6 +382,13 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_
f[key].attrs['UNIT'] = value
vprint(f'add /{key:<{max_digit}} attribute: UNIT = {value}')

if ds_name_dict:
vprint(f'Adding coordinate metadata to all datasets in {fname}')
try:
prep_utils.write_coordinate_system(fname, list(ds_name_dict.keys()))
except prep_utils.CoordinateError:
vprint('Skipping, not geocoded.')

vprint(f'close HDF5 file: {fname}')

return fname
Expand Down
1 change: 1 addition & 0 deletions tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pytest
Loading