found a workaround (potentially) for the missing divide-attributes

taddyb · taddyb · commit 872bb536ee4f · 2025-02-16T22:02:32.000-06:00
diff --git a/config/training_config.yaml b/config/training_config.yaml
@@ -8,9 +8,9 @@ name: ${version}-ddr_jrb-${forcings}
 
 data_sources:
   conus_hydrofabric: /projects/mhpi/data/hydrofabric/v2.2/conus_nextgen.gpkg
-  local_hydrofabric: /projects/mhpi/data/hydrofabric/v2.2/JRB.gpkg
+  local_hydrofabric: /projects/mhpi/data/hydrofabric/v2.2/jrb_2.gpkg
   network: /projects/mhpi/tbindas/ddr/data/network.zarr
-  transition_matrix: /projects/mhpi/tbindas/ddr/transition_matrix.csv
+  transition_matrix: /projects/mhpi/data/hydrofabric/v2.2/jrb_transition_matrix.csv
   statistics: /projects/mhpi/tbindas/ddr/data/statistics
   streamflow:  /projects/mhpi/data/MERIT/streamflow/zarr/${forcings}
   observations: /projects/mhpi/data/observations/gages_9000.zarr
@@ -47,6 +47,7 @@ params:
     slope: 0.0001
     velocity: 0.01
     depth: 0.01
+    bottom_width: 0.01
   attributes_region:
   - '73'
   parameter_ranges:
@@ -57,9 +58,8 @@ params:
       q_spatial:
       - 0.0
       - 3.0
-      p_spatial:
-      - 0.0
-      - 42.0
+  defaults:
+    p: 21
 
 np_seed: 1
 seed: 0
diff --git a/engine/intersect_merit_and_hydro.py b/engine/intersect_merit_and_hydro.py
@@ -1,8 +1,8 @@
 import geopandas as gpd
 import pandas as pd
 
-path_1 = "/Users/taddbindas/Downloads/drive-download-20250204T042718Z-001/cat_pfaf_73_MERIT_Hydro_v07_Basins_v01_bugfix1.shp"
-path_2 = "/Users/taddbindas/projects/ddr/data/SRB.gpkg"
+path_1 = "/projects/mhpi/data/MERIT/raw/basins/cat_pfaf_73_MERIT_Hydro_v07_Basins_v01_bugfix1.shp"
+path_2 = "/projects/mhpi/data/hydrofabric/v2.2/jrb_2.gpkg"
 
 gdf1 = gpd.read_file(path_1).set_crs(epsg=4326).to_crs(epsg=5070)
 gdf2 = gpd.read_file(path_2, layer="divides").to_crs(epsg=5070)
@@ -22,5 +22,5 @@
                              columns='divide_id',  # replace with your actual column name from gdf1
                              fill_value=0)
 
-weight_matrix.to_csv("/Users/taddbindas/projects/ddr/data/transition_matrix.csv")
-print("Created transition matrix @ /Users/taddbindas/projects/ddr/data/transition_matrix.csv")
+weight_matrix.to_csv("/projects/mhpi/data/hydrofabric/v2.2/jrb_transition_matrix.csv")
+print("Created transition matrix @ /projects/mhpi/data/hydrofabric/v2.2/jrb_transition_matrix.csv")
diff --git a/src/ddr/dataset/train_dataset.py b/src/ddr/dataset/train_dataset.py
@@ -25,7 +25,7 @@ class Hydrofabric:
     length: Union[torch.Tensor, None] = field(default=None)
     slope: Union[torch.Tensor, None] = field(default=None)
     side_slope: Union[torch.Tensor, None] = field(default=None)
-    width: Union[torch.Tensor, None] = field(default=None)
+    top_width: Union[torch.Tensor, None] = field(default=None)
     x: Union[torch.Tensor, None] = field(default=None)
     dates: Union[Dates, None] = field(default=None)
     normalized_spatial_attributes: Union[torch.Tensor, None] = field(default=None)
@@ -54,7 +54,7 @@ def __init__(self, cfg: DictConfig):
         self.gage_ids = np.array([str(_id.zfill(8)) for _id in self.obs_reader.gage_dict["STAID"]])
 
         self.network = gpd.read_file(cfg.data_sources.local_hydrofabric, layer="network")
-        self.divides = gpd.read_file(cfg.data_sources.local_hydrofabric, layer="divides").set_index("id")
+        self.divides = gpd.read_file(cfg.data_sources.local_hydrofabric, layer="divides").set_index("divide_id")
         self.divide_attr = gpd.read_file(cfg.data_sources.local_hydrofabric, layer="divide-attributes").set_index("divide_id")
         self.flowpath_attr = gpd.read_file(cfg.data_sources.local_hydrofabric, layer="flowpath-attributes-ml").set_index("id")
         self.flowpaths = gpd.read_file(cfg.data_sources.local_hydrofabric, layer="flowpaths").set_index("id")
@@ -65,12 +65,16 @@ def __init__(self, cfg: DictConfig):
         self.adjacency_matrix, self.order = read_coo(Path(cfg.data_sources.network), self.gage_ids[0])
         self.network_matrix = torch.tensor(self.adjacency_matrix.todense(), dtype=torch.float32, device=cfg.device)
         
-        ordered_index = [f"wb-{_id}" for _id in self.order]
-        self.divides_sorted = self.divides.reindex(ordered_index)
-        self.divide_attr_sorted = self.divide_attr.reindex(self.divides_sorted["divide_id"])
-        self.flowpaths_sorted = self.flowpaths.reindex(ordered_index)
-        self.flowpath_attr = self.flowpath_attr[~self.flowpath_attr.index.duplicated(keep='first')]
-        self.flowpath_attr_sorted = self.flowpath_attr.reindex(ordered_index)
+        # TODO get mike johnson et al. to fix the subset bug: https://github.com/owp-spatial/hfsubsetR/issues/9
+        wb_ordered_index = [f"wb-{_id}" for _id in self.order]
+        cat_ordered_index = [f"cat-{_id}" for _id in self.order]
+        self.divides_sorted = self.divides.reindex(cat_ordered_index).dropna(how='all')
+        self.divide_attr_sorted = self.divide_attr.reindex(self.divides_sorted.index)
+        
+        self.flowpaths_sorted = self.flowpaths.reindex(wb_ordered_index).dropna(how='all')
+        self.flowpath_attr = self.flowpath_attr[~self.flowpath_attr.index.duplicated(keep='first')].dropna(how='all')
+        self.flowpath_attr_sorted = self.flowpath_attr.reindex(wb_ordered_index).dropna(how='all')
+        
         # self.idx_mapper = {_id: idx for idx, _id in enumerate(self.divides_sorted.index)}
         # self.catchment_mapper = {_id : idx for idx, _id in enumerate(self.divides_sorted["divide_id"])}
         
@@ -97,11 +101,16 @@ def collate_fn(self, *args, **kwargs) -> Hydrofabric:
         self.dates.calculate_time_period()
         
         spatial_attributes = torch.tensor(
-            np.array([self.divide_attr[attr].values for attr in self.cfg.kan.input_var_names]),
+            np.array([self.divide_attr_sorted[attr].values for attr in self.cfg.kan.input_var_names]),
             device=self.cfg.device,
             dtype=torch.float32
         )
         
+        for r in range(spatial_attributes.shape[0]):
+            row_means = torch.nanmean(spatial_attributes[r])
+            nan_mask = torch.isnan(spatial_attributes[r])
+            spatial_attributes[r, nan_mask] = row_means
+        
         normalized_spatial_attributes = (spatial_attributes - self.means) / self.stds
         normalized_spatial_attributes = normalized_spatial_attributes.T  # transposing for NN inputs
         
@@ -112,14 +121,14 @@ def collate_fn(self, *args, **kwargs) -> Hydrofabric:
         )
 
         # TODO make this a dynamic lookup
-        transition_matrix = pd.read_csv("/projects/mhpi/tbindas/ddr/data/transition_matrix.csv").set_index("COMID")
+        transition_matrix = pd.read_csv(self.cfg.data_sources.transition_matrix).set_index("COMID")
         
         return Hydrofabric(
             spatial_attributes=spatial_attributes,
             length=self.length,
             slope=self.slope,
             side_slope=self.side_slope,
-            width=self.top_width,
+            top_width=self.top_width,
             x=self.x,
             dates=self.dates,
             adjacency_matrix=self.network_matrix,
diff --git a/src/ddr/routing/dmc.py b/src/ddr/routing/dmc.py
@@ -17,36 +17,46 @@
 def _log_base_q(x, q):
     return torch.log(x) / torch.log(torch.tensor(q, dtype=x.dtype))
 
-def _get_velocity(q_t, _n, _p_spatial, width, _q_spatial, _s0, velocity_lb, depth_lb) -> torch.Tensor:
+def _get_trapezoid_velocity(
+    q_t,
+    _n: torch.Tensor,
+    top_width: torch.Tensor,
+    side_slope: torch.Tensor,
+    _s0: torch.Tensor,
+    p_spatial: torch.Tensor,
+    _q_spatial: torch.Tensor,
+    velocity_lb: torch.Tensor,
+    depth_lb: torch.Tensor,
+    _btm_width_lb: torch.Tensor,
+) -> torch.Tensor:
     """Calculate flow velocity using Manning's equation.
-
-    Parameters
-    ----------
-    q_t : torch.Tensor
-        Discharge at time t.
-    _n : torch.Tensor
-        Manning's roughness coefficient.
-    _q_spatial : torch.Tensor
-        Spatial discharge parameter.
-    _s0 : torch.Tensor
-        Channel slope.
-    p_spatial : torch.Tensor
-        Spatial parameter for width calculation.
-
-    Returns
-    -------
-    torch.Tensor
-        Celerity (wave speed) of the flow.
-
-    Notes
-    -----
-    The function first calculates flow depth using Manning's equation, then
-    computes velocity and finally celerity. The celerity is clamped between
-    0.3 and 15 m/s and scaled by 5/3 according to kinematic wave theory.
     """
-    depth = _log_base_q(width/_p_spatial, _q_spatial)
-    v = torch.div(1, _n) * torch.pow(depth, (2 / 3)) * torch.pow(_s0, (1 / 2))
-    c_ = torch.clamp(v, velocity_lb, 15)
+    numerator = q_t * _n * (_q_spatial + 1)
+    denominator = p_spatial * torch.pow(_s0, 0.5)
+    depth = torch.clamp(
+        torch.pow(
+            torch.div(numerator, denominator + 1e-8),
+            torch.div(3.0, 5.0 + 3.0 * _q_spatial),
+        ),
+        min=depth_lb,
+    )
+
+    # For z:1 side slopes (z horizontal : 1 vertical)
+    _bottom_width = top_width - (2 * side_slope * depth)
+    bottom_width = torch.clamp(_bottom_width, min=_btm_width_lb)
+
+    # Area = (top_width + bottom_width)*depth/2
+    area = (top_width + bottom_width) * depth / 2
+
+    # Side length = sqrt(1 + z^2) * depth
+    # Since for every 1 unit vertical, we go z units horizontal
+    wetted_p = bottom_width + 2 * depth * torch.sqrt(1 + side_slope**2)
+
+    # Calculate hydraulic radius
+    R = area / wetted_p
+
+    v = torch.div(1, _n) * torch.pow(R, (2 / 3)) * torch.pow(_s0, (1 / 2))
+    c_ = torch.clamp(v, min=velocity_lb, max=15)
     c = c_ * 5 / 3
     return c
 
@@ -76,19 +86,17 @@ def __init__(
         # Base routing parameters
         self.n = None
         self.q_spatial = None
-        self.p_spatial = None
 
         # Routing state
-        self.length = None
-        self.slope = None
-        self.velocity = None
         self._discharge_t = None
-        self.adjacency_matrix = None
+        self.network = None
 
         self.parameter_bounds = self.cfg.params.parameter_ranges.range
+        self.p_spatial =  torch.tensor(self.cfg.params.defaults.p, device=self.device_num)
         self.velocity_lb = torch.tensor(self.cfg.params.attribute_minimums.velocity, device=self.device_num)
         self.depth_lb = torch.tensor(self.cfg.params.attribute_minimums.depth, device=self.device_num)
         self.discharge_lb = torch.tensor(self.cfg.params.attribute_minimums.discharge, device=self.device_num)
+        self.bottom_width_lb = torch.tensor(self.cfg.params.attribute_minimums.bottom_width, device=self.device_num)
 
     def forward(self, **kwargs) -> dict[str, torch.Tensor]:
         """The forward pass for the dMC model
@@ -106,18 +114,14 @@ def forward(self, **kwargs) -> dict[str, torch.Tensor]:
         # gage_information = hydrofabric.network.gage_information
         # TODO: create a dynamic gauge look up
         gage_indices = torch.tensor([-1])
-        self.adjacency_matrix = hydrofabric.adjacency_matrix
+        self.network = hydrofabric.adjacency_matrix
 
         # Set up base parameters
         self.n = denormalize(value=kwargs["spatial_parameters"]["n"], bounds=self.parameter_bounds["n"])
         self.q_spatial = denormalize(
             value=kwargs["spatial_parameters"]["q_spatial"],
             bounds=self.parameter_bounds["q_spatial"],
         )
-        self.p_spatial = denormalize(
-            value=kwargs["spatial_parameters"]["p_spatial"],
-            bounds=self.parameter_bounds["p_spatial"],
-        )
 
         # Initialize discharge
         self._discharge_t = q_prime[0].to(self.device_num)
@@ -147,8 +151,9 @@ def forward(self, **kwargs) -> dict[str, torch.Tensor]:
             hydrofabric.slope.to(self.device_num).to(torch.float32),
             min=self.cfg.params.attribute_minimums.slope,
         )
-        width = hydrofabric.length.to(self.device_num).to(torch.float32)
-        x_storage = hydrofabric.length.to(self.device_num).to(torch.float32)
+        top_width = hydrofabric.top_width.to(self.device_num).to(torch.float32)
+        side_slope = hydrofabric.side_slope.to(self.device_num).to(torch.float32)
+        x_storage = hydrofabric.x.to(self.device_num).to(torch.float32)
 
         desc = "Running dMC Routing"
         for timestep in tqdm(
@@ -161,22 +166,24 @@ def forward(self, **kwargs) -> dict[str, torch.Tensor]:
         ):
             q_prime_sub = q_prime[timestep - 1].clone()
             q_prime_clamp = torch.clamp(q_prime_sub, min=self.cfg.params.attribute_minimums.discharge)
-            velocity = _get_velocity(
+            velocity = _get_trapezoid_velocity(
                 q_t=self._discharge_t,
                 _n=self.n,
-                _q_spatial=self.q_spatial,
+                top_width=top_width,
+                side_slope=side_slope,
                 _s0=slope,
-                _p_spatial=self.p_spatial,
-                width=width,
+                p_spatial=self.p_spatial,
+                _q_spatial=self.q_spatial,
                 velocity_lb=self.velocity_lb,
                 depth_lb=self.depth_lb,
+                _btm_width_lb=self.bottom_width_lb,
             )
             k = torch.div(length, velocity)
             denom = (2.0 * k * (1.0 - x_storage)) + self.t
             c_2 = (self.t + (2.0 * k * x_storage)) / denom
             c_3 = ((2.0 * k * (1.0 - x_storage)) - self.t) / denom
             c_4 = (2.0 * self.t) / denom
-            i_t = torch.matmul(self.adjacency_matrix, self._discharge_t)
+            i_t = torch.matmul(self.network, self._discharge_t)
             q_l = q_prime_clamp
 
             b_array = (c_2 * i_t) + (c_3 * self._discharge_t) + (c_4 * q_l)
@@ -204,3 +211,44 @@ def forward(self, **kwargs) -> dict[str, torch.Tensor]:
         }
 
         return output_dict
+    
+    def fill_op(self, data_vector: torch.Tensor):
+        """A fill operation function for the sparse matrix
+
+        The equation we want to solve
+        (I - C_1*N) * Q_t+1 = c_2*(N*Q_t_1) + c_3*Q_t + c_4*Q`
+        (I - C_1*N) * Q_t+1 = b(t)
+
+        Parameters
+        ----------
+        data_vector: torch.Tensor
+            The data vector to fill the sparse matrix with
+        """
+        identity_matrix = self._sparse_eye(self.network.shape[0])
+        vec_diag = self._sparse_diag(data_vector)
+        # vec_filled = bnb.matmul(vec_diag, self.network, threshold=6.0)
+        vec_filled = torch.matmul(vec_diag.cpu(), self.network.cpu()).to(self.device_num)
+        A = identity_matrix + vec_filled
+        return A
+
+    def _sparse_eye(self, n):
+        indices = torch.arange(n, dtype=torch.int32, device=self.device_num)
+        values = torch.ones(n, device=self.device_num)
+        identity_coo = torch.sparse_coo_tensor(
+            indices=torch.vstack([indices, indices]),
+            values=values,
+            size=(n, n),
+            device=self.device_num,
+        )
+        return identity_coo.to_sparse_csr()
+
+    def _sparse_diag(self, data):
+        n = len(data)
+        indices = torch.arange(n, dtype=torch.int32, device=self.device_num)
+        diagonal_coo = torch.sparse_coo_tensor(
+            indices=torch.vstack([indices, indices]),
+            values=data,
+            size=(n, n),
+            device=self.device_num,
+        )
+        return diagonal_coo.to_sparse_csr()