Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions weather_mv/loader_pipeline/sinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def _get_band_data(i):
ds = xr.merge(band_data_list)
ds.attrs['is_normalized'] = ds_is_normalized_attr

end_time = None
start_time, end_time = None, None
if initialization_time_regex and forecast_time_regex:
try:
start_time = match_datetime(uri, initialization_time_regex)
Expand All @@ -175,18 +175,22 @@ def _get_band_data(i):
raise RuntimeError("Wrong regex passed in --forecast_time_regex.")
ds.attrs['start_time'] = start_time
ds.attrs['end_time'] = end_time
elif 'start_time' in ds.attrs and 'end_time' in ds.attrs:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3 points

  • Our tiff files do not preserve attrs so reading them with xr.open_dataset(file.tiff, engine='rasterio') will have empty ds.attrs.
  • 'start_time' and 'end_time' in dataset attrs is in 2021-01-01T00:00:00Z format so we need to parse it into datetime.datetime() for below datetime_value_s variable to initialize.
  • start_time is not being used. Can be skipped.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good points Deep.
May be we can pick this #159 (comment) next & restructure _preprocess_tif altogether.

start_time, end_time = ds.attrs['start_time'], ds.attrs['end_time']

# TODO(#159): Explore ways to capture required metadata using xarray.
with rasterio.open(filename) as f:
datetime_value_ms = None
datetime_value_s = None
try:
datetime_value_s = (int(end_time.timestamp()) if end_time is not None
else int(f.tags()[tif_metadata_for_datetime]) / 1000.0)
datetime_value_s = (
int(end_time.timestamp()) if end_time is not None
else int(f.tags()[tif_metadata_for_datetime]) / 1000.0
)
ds = ds.assign_coords({'time': datetime.datetime.utcfromtimestamp(datetime_value_s)})
except KeyError:
raise RuntimeError(f"Invalid datetime metadata of tif: {tif_metadata_for_datetime}.")
except ValueError:
raise RuntimeError(f"Invalid datetime value in tif's metadata: {datetime_value_ms}.")
raise RuntimeError(f"Invalid datetime value in tif's metadata: {datetime_value_s}.")

return ds

Expand Down