Skip to content

Commit 872bb53

Browse files
committed
found a workaround (potentially) for the missing divide-attributes
1 parent a8d1489 commit 872bb53

File tree

4 files changed

+122
-65
lines changed

4 files changed

+122
-65
lines changed

config/training_config.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ name: ${version}-ddr_jrb-${forcings}
88

99
data_sources:
1010
conus_hydrofabric: /projects/mhpi/data/hydrofabric/v2.2/conus_nextgen.gpkg
11-
local_hydrofabric: /projects/mhpi/data/hydrofabric/v2.2/JRB.gpkg
11+
local_hydrofabric: /projects/mhpi/data/hydrofabric/v2.2/jrb_2.gpkg
1212
network: /projects/mhpi/tbindas/ddr/data/network.zarr
13-
transition_matrix: /projects/mhpi/tbindas/ddr/transition_matrix.csv
13+
transition_matrix: /projects/mhpi/data/hydrofabric/v2.2/jrb_transition_matrix.csv
1414
statistics: /projects/mhpi/tbindas/ddr/data/statistics
1515
streamflow: /projects/mhpi/data/MERIT/streamflow/zarr/${forcings}
1616
observations: /projects/mhpi/data/observations/gages_9000.zarr
@@ -47,6 +47,7 @@ params:
4747
slope: 0.0001
4848
velocity: 0.01
4949
depth: 0.01
50+
bottom_width: 0.01
5051
attributes_region:
5152
- '73'
5253
parameter_ranges:
@@ -57,9 +58,8 @@ params:
5758
q_spatial:
5859
- 0.0
5960
- 3.0
60-
p_spatial:
61-
- 0.0
62-
- 42.0
61+
defaults:
62+
p: 21
6363

6464
np_seed: 1
6565
seed: 0

engine/intersect_merit_and_hydro.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import geopandas as gpd
22
import pandas as pd
33

4-
path_1 = "/Users/taddbindas/Downloads/drive-download-20250204T042718Z-001/cat_pfaf_73_MERIT_Hydro_v07_Basins_v01_bugfix1.shp"
5-
path_2 = "/Users/taddbindas/projects/ddr/data/SRB.gpkg"
4+
path_1 = "/projects/mhpi/data/MERIT/raw/basins/cat_pfaf_73_MERIT_Hydro_v07_Basins_v01_bugfix1.shp"
5+
path_2 = "/projects/mhpi/data/hydrofabric/v2.2/jrb_2.gpkg"
66

77
gdf1 = gpd.read_file(path_1).set_crs(epsg=4326).to_crs(epsg=5070)
88
gdf2 = gpd.read_file(path_2, layer="divides").to_crs(epsg=5070)
@@ -22,5 +22,5 @@
2222
columns='divide_id', # replace with your actual column name from gdf1
2323
fill_value=0)
2424

25-
weight_matrix.to_csv("/Users/taddbindas/projects/ddr/data/transition_matrix.csv")
26-
print("Created transition matrix @ /Users/taddbindas/projects/ddr/data/transition_matrix.csv")
25+
weight_matrix.to_csv("/projects/mhpi/data/hydrofabric/v2.2/jrb_transition_matrix.csv")
26+
print("Created transition matrix @ /projects/mhpi/data/hydrofabric/v2.2/jrb_transition_matrix.csv")

src/ddr/dataset/train_dataset.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class Hydrofabric:
2525
length: Union[torch.Tensor, None] = field(default=None)
2626
slope: Union[torch.Tensor, None] = field(default=None)
2727
side_slope: Union[torch.Tensor, None] = field(default=None)
28-
width: Union[torch.Tensor, None] = field(default=None)
28+
top_width: Union[torch.Tensor, None] = field(default=None)
2929
x: Union[torch.Tensor, None] = field(default=None)
3030
dates: Union[Dates, None] = field(default=None)
3131
normalized_spatial_attributes: Union[torch.Tensor, None] = field(default=None)
@@ -54,7 +54,7 @@ def __init__(self, cfg: DictConfig):
5454
self.gage_ids = np.array([str(_id.zfill(8)) for _id in self.obs_reader.gage_dict["STAID"]])
5555

5656
self.network = gpd.read_file(cfg.data_sources.local_hydrofabric, layer="network")
57-
self.divides = gpd.read_file(cfg.data_sources.local_hydrofabric, layer="divides").set_index("id")
57+
self.divides = gpd.read_file(cfg.data_sources.local_hydrofabric, layer="divides").set_index("divide_id")
5858
self.divide_attr = gpd.read_file(cfg.data_sources.local_hydrofabric, layer="divide-attributes").set_index("divide_id")
5959
self.flowpath_attr = gpd.read_file(cfg.data_sources.local_hydrofabric, layer="flowpath-attributes-ml").set_index("id")
6060
self.flowpaths = gpd.read_file(cfg.data_sources.local_hydrofabric, layer="flowpaths").set_index("id")
@@ -65,12 +65,16 @@ def __init__(self, cfg: DictConfig):
6565
self.adjacency_matrix, self.order = read_coo(Path(cfg.data_sources.network), self.gage_ids[0])
6666
self.network_matrix = torch.tensor(self.adjacency_matrix.todense(), dtype=torch.float32, device=cfg.device)
6767

68-
ordered_index = [f"wb-{_id}" for _id in self.order]
69-
self.divides_sorted = self.divides.reindex(ordered_index)
70-
self.divide_attr_sorted = self.divide_attr.reindex(self.divides_sorted["divide_id"])
71-
self.flowpaths_sorted = self.flowpaths.reindex(ordered_index)
72-
self.flowpath_attr = self.flowpath_attr[~self.flowpath_attr.index.duplicated(keep='first')]
73-
self.flowpath_attr_sorted = self.flowpath_attr.reindex(ordered_index)
68+
# TODO get mike johnson et al. to fix the subset bug: https://github.com/owp-spatial/hfsubsetR/issues/9
69+
wb_ordered_index = [f"wb-{_id}" for _id in self.order]
70+
cat_ordered_index = [f"cat-{_id}" for _id in self.order]
71+
self.divides_sorted = self.divides.reindex(cat_ordered_index).dropna(how='all')
72+
self.divide_attr_sorted = self.divide_attr.reindex(self.divides_sorted.index)
73+
74+
self.flowpaths_sorted = self.flowpaths.reindex(wb_ordered_index).dropna(how='all')
75+
self.flowpath_attr = self.flowpath_attr[~self.flowpath_attr.index.duplicated(keep='first')].dropna(how='all')
76+
self.flowpath_attr_sorted = self.flowpath_attr.reindex(wb_ordered_index).dropna(how='all')
77+
7478
# self.idx_mapper = {_id: idx for idx, _id in enumerate(self.divides_sorted.index)}
7579
# self.catchment_mapper = {_id : idx for idx, _id in enumerate(self.divides_sorted["divide_id"])}
7680

@@ -97,11 +101,16 @@ def collate_fn(self, *args, **kwargs) -> Hydrofabric:
97101
self.dates.calculate_time_period()
98102

99103
spatial_attributes = torch.tensor(
100-
np.array([self.divide_attr[attr].values for attr in self.cfg.kan.input_var_names]),
104+
np.array([self.divide_attr_sorted[attr].values for attr in self.cfg.kan.input_var_names]),
101105
device=self.cfg.device,
102106
dtype=torch.float32
103107
)
104108

109+
for r in range(spatial_attributes.shape[0]):
110+
row_means = torch.nanmean(spatial_attributes[r])
111+
nan_mask = torch.isnan(spatial_attributes[r])
112+
spatial_attributes[r, nan_mask] = row_means
113+
105114
normalized_spatial_attributes = (spatial_attributes - self.means) / self.stds
106115
normalized_spatial_attributes = normalized_spatial_attributes.T # transposing for NN inputs
107116

@@ -112,14 +121,14 @@ def collate_fn(self, *args, **kwargs) -> Hydrofabric:
112121
)
113122

114123
# TODO make this a dynamic lookup
115-
transition_matrix = pd.read_csv("/projects/mhpi/tbindas/ddr/data/transition_matrix.csv").set_index("COMID")
124+
transition_matrix = pd.read_csv(self.cfg.data_sources.transition_matrix).set_index("COMID")
116125

117126
return Hydrofabric(
118127
spatial_attributes=spatial_attributes,
119128
length=self.length,
120129
slope=self.slope,
121130
side_slope=self.side_slope,
122-
width=self.top_width,
131+
top_width=self.top_width,
123132
x=self.x,
124133
dates=self.dates,
125134
adjacency_matrix=self.network_matrix,

src/ddr/routing/dmc.py

Lines changed: 93 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -17,36 +17,46 @@
1717
def _log_base_q(x, q):
1818
return torch.log(x) / torch.log(torch.tensor(q, dtype=x.dtype))
1919

20-
def _get_velocity(q_t, _n, _p_spatial, width, _q_spatial, _s0, velocity_lb, depth_lb) -> torch.Tensor:
20+
def _get_trapezoid_velocity(
21+
q_t,
22+
_n: torch.Tensor,
23+
top_width: torch.Tensor,
24+
side_slope: torch.Tensor,
25+
_s0: torch.Tensor,
26+
p_spatial: torch.Tensor,
27+
_q_spatial: torch.Tensor,
28+
velocity_lb: torch.Tensor,
29+
depth_lb: torch.Tensor,
30+
_btm_width_lb: torch.Tensor,
31+
) -> torch.Tensor:
2132
"""Calculate flow velocity using Manning's equation.
22-
23-
Parameters
24-
----------
25-
q_t : torch.Tensor
26-
Discharge at time t.
27-
_n : torch.Tensor
28-
Manning's roughness coefficient.
29-
_q_spatial : torch.Tensor
30-
Spatial discharge parameter.
31-
_s0 : torch.Tensor
32-
Channel slope.
33-
p_spatial : torch.Tensor
34-
Spatial parameter for width calculation.
35-
36-
Returns
37-
-------
38-
torch.Tensor
39-
Celerity (wave speed) of the flow.
40-
41-
Notes
42-
-----
43-
The function first calculates flow depth using Manning's equation, then
44-
computes velocity and finally celerity. The celerity is clamped between
45-
0.3 and 15 m/s and scaled by 5/3 according to kinematic wave theory.
4633
"""
47-
depth = _log_base_q(width/_p_spatial, _q_spatial)
48-
v = torch.div(1, _n) * torch.pow(depth, (2 / 3)) * torch.pow(_s0, (1 / 2))
49-
c_ = torch.clamp(v, velocity_lb, 15)
34+
numerator = q_t * _n * (_q_spatial + 1)
35+
denominator = p_spatial * torch.pow(_s0, 0.5)
36+
depth = torch.clamp(
37+
torch.pow(
38+
torch.div(numerator, denominator + 1e-8),
39+
torch.div(3.0, 5.0 + 3.0 * _q_spatial),
40+
),
41+
min=depth_lb,
42+
)
43+
44+
# For z:1 side slopes (z horizontal : 1 vertical)
45+
_bottom_width = top_width - (2 * side_slope * depth)
46+
bottom_width = torch.clamp(_bottom_width, min=_btm_width_lb)
47+
48+
# Area = (top_width + bottom_width)*depth/2
49+
area = (top_width + bottom_width) * depth / 2
50+
51+
# Side length = sqrt(1 + z^2) * depth
52+
# Since for every 1 unit vertical, we go z units horizontal
53+
wetted_p = bottom_width + 2 * depth * torch.sqrt(1 + side_slope**2)
54+
55+
# Calculate hydraulic radius
56+
R = area / wetted_p
57+
58+
v = torch.div(1, _n) * torch.pow(R, (2 / 3)) * torch.pow(_s0, (1 / 2))
59+
c_ = torch.clamp(v, min=velocity_lb, max=15)
5060
c = c_ * 5 / 3
5161
return c
5262

@@ -76,19 +86,17 @@ def __init__(
7686
# Base routing parameters
7787
self.n = None
7888
self.q_spatial = None
79-
self.p_spatial = None
8089

8190
# Routing state
82-
self.length = None
83-
self.slope = None
84-
self.velocity = None
8591
self._discharge_t = None
86-
self.adjacency_matrix = None
92+
self.network = None
8793

8894
self.parameter_bounds = self.cfg.params.parameter_ranges.range
95+
self.p_spatial = torch.tensor(self.cfg.params.defaults.p, device=self.device_num)
8996
self.velocity_lb = torch.tensor(self.cfg.params.attribute_minimums.velocity, device=self.device_num)
9097
self.depth_lb = torch.tensor(self.cfg.params.attribute_minimums.depth, device=self.device_num)
9198
self.discharge_lb = torch.tensor(self.cfg.params.attribute_minimums.discharge, device=self.device_num)
99+
self.bottom_width_lb = torch.tensor(self.cfg.params.attribute_minimums.bottom_width, device=self.device_num)
92100

93101
def forward(self, **kwargs) -> dict[str, torch.Tensor]:
94102
"""The forward pass for the dMC model
@@ -106,18 +114,14 @@ def forward(self, **kwargs) -> dict[str, torch.Tensor]:
106114
# gage_information = hydrofabric.network.gage_information
107115
# TODO: create a dynamic gauge look up
108116
gage_indices = torch.tensor([-1])
109-
self.adjacency_matrix = hydrofabric.adjacency_matrix
117+
self.network = hydrofabric.adjacency_matrix
110118

111119
# Set up base parameters
112120
self.n = denormalize(value=kwargs["spatial_parameters"]["n"], bounds=self.parameter_bounds["n"])
113121
self.q_spatial = denormalize(
114122
value=kwargs["spatial_parameters"]["q_spatial"],
115123
bounds=self.parameter_bounds["q_spatial"],
116124
)
117-
self.p_spatial = denormalize(
118-
value=kwargs["spatial_parameters"]["p_spatial"],
119-
bounds=self.parameter_bounds["p_spatial"],
120-
)
121125

122126
# Initialize discharge
123127
self._discharge_t = q_prime[0].to(self.device_num)
@@ -147,8 +151,9 @@ def forward(self, **kwargs) -> dict[str, torch.Tensor]:
147151
hydrofabric.slope.to(self.device_num).to(torch.float32),
148152
min=self.cfg.params.attribute_minimums.slope,
149153
)
150-
width = hydrofabric.length.to(self.device_num).to(torch.float32)
151-
x_storage = hydrofabric.length.to(self.device_num).to(torch.float32)
154+
top_width = hydrofabric.top_width.to(self.device_num).to(torch.float32)
155+
side_slope = hydrofabric.side_slope.to(self.device_num).to(torch.float32)
156+
x_storage = hydrofabric.x.to(self.device_num).to(torch.float32)
152157

153158
desc = "Running dMC Routing"
154159
for timestep in tqdm(
@@ -161,22 +166,24 @@ def forward(self, **kwargs) -> dict[str, torch.Tensor]:
161166
):
162167
q_prime_sub = q_prime[timestep - 1].clone()
163168
q_prime_clamp = torch.clamp(q_prime_sub, min=self.cfg.params.attribute_minimums.discharge)
164-
velocity = _get_velocity(
169+
velocity = _get_trapezoid_velocity(
165170
q_t=self._discharge_t,
166171
_n=self.n,
167-
_q_spatial=self.q_spatial,
172+
top_width=top_width,
173+
side_slope=side_slope,
168174
_s0=slope,
169-
_p_spatial=self.p_spatial,
170-
width=width,
175+
p_spatial=self.p_spatial,
176+
_q_spatial=self.q_spatial,
171177
velocity_lb=self.velocity_lb,
172178
depth_lb=self.depth_lb,
179+
_btm_width_lb=self.bottom_width_lb,
173180
)
174181
k = torch.div(length, velocity)
175182
denom = (2.0 * k * (1.0 - x_storage)) + self.t
176183
c_2 = (self.t + (2.0 * k * x_storage)) / denom
177184
c_3 = ((2.0 * k * (1.0 - x_storage)) - self.t) / denom
178185
c_4 = (2.0 * self.t) / denom
179-
i_t = torch.matmul(self.adjacency_matrix, self._discharge_t)
186+
i_t = torch.matmul(self.network, self._discharge_t)
180187
q_l = q_prime_clamp
181188

182189
b_array = (c_2 * i_t) + (c_3 * self._discharge_t) + (c_4 * q_l)
@@ -204,3 +211,44 @@ def forward(self, **kwargs) -> dict[str, torch.Tensor]:
204211
}
205212

206213
return output_dict
214+
215+
def fill_op(self, data_vector: torch.Tensor):
216+
"""A fill operation function for the sparse matrix
217+
218+
The equation we want to solve
219+
(I - C_1*N) * Q_t+1 = c_2*(N*Q_t_1) + c_3*Q_t + c_4*Q`
220+
(I - C_1*N) * Q_t+1 = b(t)
221+
222+
Parameters
223+
----------
224+
data_vector: torch.Tensor
225+
The data vector to fill the sparse matrix with
226+
"""
227+
identity_matrix = self._sparse_eye(self.network.shape[0])
228+
vec_diag = self._sparse_diag(data_vector)
229+
# vec_filled = bnb.matmul(vec_diag, self.network, threshold=6.0)
230+
vec_filled = torch.matmul(vec_diag.cpu(), self.network.cpu()).to(self.device_num)
231+
A = identity_matrix + vec_filled
232+
return A
233+
234+
def _sparse_eye(self, n):
235+
indices = torch.arange(n, dtype=torch.int32, device=self.device_num)
236+
values = torch.ones(n, device=self.device_num)
237+
identity_coo = torch.sparse_coo_tensor(
238+
indices=torch.vstack([indices, indices]),
239+
values=values,
240+
size=(n, n),
241+
device=self.device_num,
242+
)
243+
return identity_coo.to_sparse_csr()
244+
245+
def _sparse_diag(self, data):
246+
n = len(data)
247+
indices = torch.arange(n, dtype=torch.int32, device=self.device_num)
248+
diagonal_coo = torch.sparse_coo_tensor(
249+
indices=torch.vstack([indices, indices]),
250+
values=data,
251+
size=(n, n),
252+
device=self.device_num,
253+
)
254+
return diagonal_coo.to_sparse_csr()

0 commit comments

Comments
 (0)