@@ -654,9 +654,13 @@ def preprocess_waterbodies(self, lakes, nexus):
654654 def preprocess_data_assimilation (self , flowpaths ):
655655 gages_df = flowpaths [~ flowpaths ['gage' ].isna ()]
656656 if not gages_df .empty :
657+ '''
657658 gages_df = gages_df[['id','gage']]
658659 gages_df['id'] = gages_df['id'].str.split('-',expand=True).loc[:,1].astype(float).astype(int)
659660 gages_df.set_index('id', inplace=True)
661+ import pdb; pdb.set_trace()
662+ '''
663+
660664 '''
661665 gages_df = network[['id','hl_uri','hydroseq']].drop_duplicates()
662666 # clear out missing values
@@ -690,6 +694,29 @@ def preprocess_data_assimilation(self, flowpaths):
690694 )
691695 '''
692696
697+ gages_df = gages_df [['id' ,'gage' ,'hydroseq' ]]
698+ # make 'id' an integer
699+ gages_df ['id' ] = gages_df ['id' ].str .split ('-' ,expand = True ).loc [:,1 ].astype (float ).astype (int )
700+ # Some IDs have multiple gages associated with them. This will expand the dataframe so
701+ # there is a unique row per gage ID. Also adds lake ids to the dataframe for creating
702+ # lake-gage crosswalk dataframes.
703+ gages_df ['gage' ] = gages_df .gage .str .split (', ' )
704+ gages_df = gages_df .explode (column = 'gage' ).set_index ('id' ).join (
705+ pd .DataFrame ().from_dict (self .waterbody_connections ,orient = 'index' ,columns = ['lake_id' ])
706+ )
707+ # transform dataframe into a dictionary where key is segment ID and value is gage ID
708+ usgs_ind = gages_df .gage .str .isnumeric () #usgs gages used for streamflow DA
709+ # Use hydroseq information to determine furthest downstream gage when multiple are present.
710+ idx_id = gages_df .index .name
711+ if not idx_id :
712+ idx_id = 'index'
713+ self ._gages = (
714+ gages_df .loc [usgs_ind ].reset_index ()
715+ .sort_values ('hydroseq' ).drop_duplicates (['gage' ],keep = 'last' )
716+ .set_index (idx_id )[['gage' ]].rename (columns = {'gage' : 'gages' })
717+ .rename_axis (None , axis = 0 ).to_dict ()
718+ )
719+
693720 # transform dataframe into a dictionary where key is segment ID and value is gage ID
694721 usgs_ind = gages_df .gage .str .isnumeric () #usgs gages used for streamflow DA
695722 # Use hydroseq information to determine furthest downstream gage when multiple are present.
@@ -709,7 +736,7 @@ def preprocess_data_assimilation(self, flowpaths):
709736
710737 if 'lake_id' in gages_df .columns :
711738 # Find furthest downstream gage and create our lake_gage_df to make crosswalk dataframes.
712- lake_gage_hydroseq_df = gages_df [~ gages_df ['lake_id' ].isnull ()][['lake_id' , 'value ' , 'hydroseq' ]].rename (columns = {'value ' : 'gages' })
739+ lake_gage_hydroseq_df = gages_df [~ gages_df ['lake_id' ].isnull ()][['lake_id' , 'gage ' , 'hydroseq' ]].rename (columns = {'gage ' : 'gages' })
713740 lake_gage_hydroseq_df ['lake_id' ] = lake_gage_hydroseq_df ['lake_id' ].astype (int )
714741 lake_gage_df = lake_gage_hydroseq_df [['lake_id' ,'gages' ]].drop_duplicates ()
715742 lake_gage_hydroseq_df = lake_gage_hydroseq_df .groupby (['lake_id' ,'gages' ]).max ('hydroseq' ).reset_index ().set_index ('lake_id' )
0 commit comments