added warm up and fix seed functions

JiangtaoLiud · JiangtaoLiud · commit be7adcd60aa8 · 2022-06-14T21:47:36.000-04:00
diff --git a/example/tutorial_quick_start.py b/example/tutorial_quick_start.py
@@ -82,7 +82,7 @@
 c_val = val_csv.load_constant(var_constant, convert_time_series=False)
 y_val = val_csv.load_time_series(target, remove_nan=False)
 
-val_epoch = 100 # Select the epoch for testing
+val_epoch = EPOCH # Select the epoch for testing
 
 # load the model
 test_model = loadModel(output_s, epoch=val_epoch)
diff --git a/example/tutorial_quick_start_warm_up.py b/example/tutorial_quick_start_warm_up.py
@@ -0,0 +1,109 @@
+import os
+import torch
+import numpy as np
+
+import sys 
+sys.path.append('..')
+
+from hydroDL.master.master import loadModel
+from hydroDL.model.crit import RmseLoss
+from hydroDL.model.rnn import CudnnLstmModel as LSTM
+from hydroDL.model.rnn import CpuLstmModel as LSTM_CPU
+from hydroDL.model.train import trainModel
+from hydroDL.model.test import testModel
+from hydroDL.post.stat import statError as cal_metric
+from hydroDL.data.load_csv import LoadCSV
+from hydroDL.utils.norm import re_folder, trans_norm
+from hydroDL.utils.norm import fix_seed
+
+# set configuration
+fix_seed(42)
+output_s = "./output/quick_start/"  # output path
+csv_path_s = "./demo_data/"  # demo data path
+all_date_list = ["2015-04-01", "2017-03-31"]  # demo data time period
+train_date_list = ["2015-04-01", "2016-03-31"]  # training period
+# time series variables list
+var_time_series = ["VGRD_10_FORA", "DLWRF_FORA", "UGRD_10_FORA", "DSWRF_FORA", "TMP_2_FORA", "SPFH_2_FORA", "APCP_FORA", ]
+# constant variables list
+var_constant = ["flag_extraOrd", "Clay", "Bulk", "Sand", "flag_roughness", "flag_landcover", "flag_vegDense", "Silt", "NDVI",
+         "flag_albedo", "flag_waterbody", "Capa", ]
+# target variable list
+target = ["SMAP_AM"]
+
+# generate output folder
+re_folder(output_s)
+
+# hyperparameter
+EPOCH = 20
+BATCH_SIZE = 50
+RHO = 30
+HIDDEN_SIZE = 256
+WARM_UP_DAY = 10
+# WARM_UP_DAY = None
+
+# load your datasets
+"""
+You can change it with your data. The data structure is as follows:
+x_train (forcing data, e.g. precipitation, temperature ...): [pixels, time, features] 
+c_train (constant data, e.g. soil properties, land cover ...): [pixels, features]
+target (e.g. soil moisture, streamflow ...): [pixels, time, 1]
+
+Data type: numpy.float
+"""
+train_csv = LoadCSV(csv_path_s, train_date_list, all_date_list)
+x_train = train_csv.load_time_series(var_time_series)  # data size: [pixels, time, features]
+c_train = train_csv.load_constant(var_constant, convert_time_series=False)  # [pixels, features]
+y_train = train_csv.load_time_series(target, remove_nan=False)  # [pixels, time, 1]
+
+# define model and loss function
+loss_fn = RmseLoss()  # loss function
+# select model: GPU or CPU
+if torch.cuda.is_available():
+    LSTM = LSTM
+else:
+    LSTM = LSTM_CPU
+model = LSTM(nx=len(var_time_series) + len(var_constant), ny=len(target), hiddenSize=HIDDEN_SIZE, warmUpDay=WARM_UP_DAY)
+
+# training the model
+last_model = trainModel(
+    model,
+    x_train,
+    y_train,
+    c_train,
+    loss_fn,
+    nEpoch=EPOCH,
+    miniBatch=[BATCH_SIZE, RHO],
+    saveEpoch=1,
+    saveFolder=output_s,
+)
+
+# validation the result
+# load validation datasets
+val_date_list = ["2016-04-01", "2017-03-31"]  # validation period
+# load your data. same as training data
+val_csv = LoadCSV(csv_path_s, val_date_list, all_date_list)
+x_val = val_csv.load_time_series(var_time_series)
+c_val = val_csv.load_constant(var_constant, convert_time_series=False)
+y_val = val_csv.load_time_series(target, remove_nan=False)
+
+val_epoch = EPOCH # Select the epoch for testing
+
+# load the model
+test_model = loadModel(output_s, epoch=val_epoch)
+
+# set the path to save result
+save_csv = os.path.join(output_s, "predict.csv")
+
+# validation
+pred_val = testModel(test_model, x_val, c_val, batchSize=len(x_train), filePathLst=[save_csv],)
+
+# select the metrics
+metrics_list = ["Bias", "RMSE", "ubRMSE", "Corr"]
+pred_val = pred_val.numpy()
+# denormalization
+pred_val = trans_norm(pred_val, csv_path_s, var_s=target[0], from_raw=False)
+y_val = trans_norm(y_val, csv_path_s, var_s=target[0], from_raw=False)
+pred_val, y_val = np.squeeze(pred_val), np.squeeze(y_val)
+metrics_dict = cal_metric(pred_val, y_val)  # calculate the metrics
+metrics = ["Median {}: {:.4f}".format(x, np.nanmedian(metrics_dict[x])) for x in metrics_list]
+print("Epoch {}: {}".format(val_epoch, metrics))
diff --git a/hydroDL/model/rnn/CudnnLstmModel.py b/hydroDL/model/rnn/CudnnLstmModel.py
@@ -12,7 +12,7 @@
 
 
 class CudnnLstmModel(torch.nn.Module):
-    def __init__(self, *, nx, ny, hiddenSize, dr=0.5):
+    def __init__(self, *, nx, ny, hiddenSize, dr=0.5, warmUpDay=None):
         super(CudnnLstmModel, self).__init__()
         self.nx = nx
         self.ny = ny
@@ -32,6 +32,7 @@ def __init__(self, *, nx, ny, hiddenSize, dr=0.5):
         self.name = "CudnnLstmModel"
         self.is_legacy = True
         # self.drtest = torch.nn.Dropout(p=0.4)
+        self.warmUpDay = warmUpDay
 
     def forward(self, x, doDropMC=False, dropoutFalse=False):
         """
@@ -41,6 +42,9 @@ def forward(self, x, doDropMC=False, dropoutFalse=False):
         :param dropoutFalse:
         :return:
         """
+        if not self.warmUpDay is None:
+            x, warmUpDay = self.extend_day(x, warmUpDay=self.warmUpDay)
+
         x0 = F.relu(self.linearIn(x))
         if torch.__version__ > "1.9":
             outLSTM, (hn, cn) = self.lstm(x0)
@@ -50,4 +54,19 @@ def forward(self, x, doDropMC=False, dropoutFalse=False):
             )
         # outLSTMdr = self.drtest(outLSTM)
         out = self.linearOut(outLSTM)
+
+        if not self.warmUpDay is None:
+            out = self.reduce_day(out, warmUpDay=warmUpDay)
+
         return out
+
+    def extend_day(self, x, warm_up_day):
+        x_num_day = x.shape[0]
+        warm_up_day = min(x_num_day, warm_up_day)
+        x_select = x[:warm_up_day, :, :]
+        x = torch.cat([x_select, x], dim=0)
+        return x, warm_up_day
+
+    def reduce_day(self, x, warm_up_day):
+        x = x[warm_up_day:,:,:]
+        return x
diff --git a/hydroDL/utils/norm.py b/hydroDL/utils/norm.py
@@ -69,3 +69,17 @@ def re_folder(path_s, del_old_path=False):
             pass
     else:
         re_folder_rec(path_s)
+
+def fix_seed(SEED):
+    import os
+    import numpy as np
+    import random
+    import torch
+    np.random.seed(SEED)
+    random.seed(SEED)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    torch.manual_seed(SEED)
+    torch.cuda.manual_seed(SEED)
+    torch.cuda.manual_seed_all(SEED)
+    os.environ["PYTHONHASHSEED"] = str(SEED)