Skip to content

Commit 6b102db

Browse files
committed
add lake_id to gages_df for data assimilation
1 parent 738f845 commit 6b102db

File tree

1 file changed

+28
-1
lines changed

1 file changed

+28
-1
lines changed

src/troute-network/troute/HYFeaturesNetwork.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -654,9 +654,13 @@ def preprocess_waterbodies(self, lakes, nexus):
654654
def preprocess_data_assimilation(self, flowpaths):
655655
gages_df = flowpaths[~flowpaths['gage'].isna()]
656656
if not gages_df.empty:
657+
'''
657658
gages_df = gages_df[['id','gage']]
658659
gages_df['id'] = gages_df['id'].str.split('-',expand=True).loc[:,1].astype(float).astype(int)
659660
gages_df.set_index('id', inplace=True)
661+
import pdb; pdb.set_trace()
662+
'''
663+
660664
'''
661665
gages_df = network[['id','hl_uri','hydroseq']].drop_duplicates()
662666
# clear out missing values
@@ -690,6 +694,29 @@ def preprocess_data_assimilation(self, flowpaths):
690694
)
691695
'''
692696

697+
gages_df = gages_df[['id','gage','hydroseq']]
698+
# make 'id' an integer
699+
gages_df['id'] = gages_df['id'].str.split('-',expand=True).loc[:,1].astype(float).astype(int)
700+
# Some IDs have multiple gages associated with them. This will expand the dataframe so
701+
# there is a unique row per gage ID. Also adds lake ids to the dataframe for creating
702+
# lake-gage crosswalk dataframes.
703+
gages_df['gage'] = gages_df.gage.str.split(', ')
704+
gages_df = gages_df.explode(column='gage').set_index('id').join(
705+
pd.DataFrame().from_dict(self.waterbody_connections,orient='index',columns=['lake_id'])
706+
)
707+
# transform dataframe into a dictionary where key is segment ID and value is gage ID
708+
usgs_ind = gages_df.gage.str.isnumeric() #usgs gages used for streamflow DA
709+
# Use hydroseq information to determine furthest downstream gage when multiple are present.
710+
idx_id = gages_df.index.name
711+
if not idx_id:
712+
idx_id = 'index'
713+
self._gages = (
714+
gages_df.loc[usgs_ind].reset_index()
715+
.sort_values('hydroseq').drop_duplicates(['gage'],keep='last')
716+
.set_index(idx_id)[['gage']].rename(columns={'gage': 'gages'})
717+
.rename_axis(None, axis=0).to_dict()
718+
)
719+
693720
# transform dataframe into a dictionary where key is segment ID and value is gage ID
694721
usgs_ind = gages_df.gage.str.isnumeric() #usgs gages used for streamflow DA
695722
# Use hydroseq information to determine furthest downstream gage when multiple are present.
@@ -709,7 +736,7 @@ def preprocess_data_assimilation(self, flowpaths):
709736

710737
if 'lake_id' in gages_df.columns:
711738
# Find furthest downstream gage and create our lake_gage_df to make crosswalk dataframes.
712-
lake_gage_hydroseq_df = gages_df[~gages_df['lake_id'].isnull()][['lake_id', 'value', 'hydroseq']].rename(columns={'value': 'gages'})
739+
lake_gage_hydroseq_df = gages_df[~gages_df['lake_id'].isnull()][['lake_id', 'gage', 'hydroseq']].rename(columns={'gage': 'gages'})
713740
lake_gage_hydroseq_df['lake_id'] = lake_gage_hydroseq_df['lake_id'].astype(int)
714741
lake_gage_df = lake_gage_hydroseq_df[['lake_id','gages']].drop_duplicates()
715742
lake_gage_hydroseq_df = lake_gage_hydroseq_df.groupby(['lake_id','gages']).max('hydroseq').reset_index().set_index('lake_id')

0 commit comments

Comments
 (0)