AbnerHqC · Inger-Chao · Sep 24, 2020 · Sep 24, 2020
diff --git a/model/initialization.py b/model/initialization.py
@@ -12,9 +12,11 @@
 
 def initialize_data(config, train=False, test=False):
     print("Initializing data source...")
+    # data_loder.py: load_data(config.py: conf@'data', whether need cache)
     train_source, test_source = load_data(**config['data'], cache=(train or test))
     if train:
         print("Loading training data...")
+        # Dataset Train.load_all_data
         train_source.load_all_data()
     if test:
         print("Loading test data...")
@@ -27,11 +29,29 @@ def initialize_model(config, train_source, test_source):
     print("Initializing model...")
     data_config = config['data']
     model_config = config['model']
+    # A deep copy constructs a new compound object and then,
+    # recursively, inserts copies into it of the objects found in the original.
+    #    "model": {
+    #     'hidden_dim': 256,
+    #     'lr': 1e-4,
+    #     'hard_or_full_trip': 'full',
+    #     'batch_size': (8, 16),
+    #     'restore_iter': 0,
+    #     'total_iter': 80000,
+    #     'margin': 0.2,
+    #     'num_workers': 3,
+    #     'frame_num': 30,
+    #     'model_name': 'GaitSet',
+    # },
     model_param = deepcopy(model_config)
+    # add other parameters in the model dictionary
     model_param['train_source'] = train_source
     model_param['test_source'] = test_source
     model_param['train_pid_num'] = data_config['pid_num']
+    # Batch size is a term used in machine learning
+    # and refers to the number of training examples utilized in one iteration.
     batch_size = int(np.prod(model_config['batch_size']))
+    # Define the saved model name
     model_param['save_name'] = '_'.join(map(str,[
         model_config['model_name'],
         data_config['dataset'],
@@ -44,15 +64,18 @@ def initialize_model(config, train_source, test_source):
         model_config['frame_num'],
     ]))
 
+    # create Model object
     m = Model(**model_param)
     print("Model initialization complete.")
     return m, model_param['save_name']
 
-
+# Note that all config refer to config.py.conf
 def initialization(config, train=False, test=False):
     print("Initialzing...")
     WORK_PATH = config['WORK_PATH']
-    os.chdir(WORK_PATH)
+    os.chdir(WORK_PATH) # Change current work path to WORK_PATH
+    # os.environ[“CUDA_VISIBLE_DEVICES”] = “0,1”
+    # 设置当前使用的GPU设备为0,1号两个设备,名称依次为'/gpu:0'、'/gpu:1'
     os.environ["CUDA_VISIBLE_DEVICES"] = config["CUDA_VISIBLE_DEVICES"]
     train_source, test_source = initialize_data(config, train, test)
     return initialize_model(config, train_source, test_source)
diff --git a/model/model.py b/model/model.py
@@ -55,12 +55,16 @@ def __init__(self,
         self.img_size = img_size
 
         self.encoder = SetNet(self.hidden_dim).float()
+        # DataParallel Layers (multi-GPU, distributed)
+        # Implements data parallelism at the module level.
         self.encoder = nn.DataParallel(self.encoder)
         self.triplet_loss = TripletLoss(self.P * self.M, self.hard_or_full_trip, self.margin).float()
         self.triplet_loss = nn.DataParallel(self.triplet_loss)
+        # Moves all model parameters and buffers to the GPU.
         self.encoder.cuda()
         self.triplet_loss.cuda()
 
+        # Construct a optimizer by Adam Algorithm.
         self.optimizer = optim.Adam([
             {'params': self.encoder.parameters()},
         ], lr=self.lr)
@@ -149,6 +153,7 @@ def fit(self):
         _time1 = datetime.now()
         for seq, view, seq_type, label, batch_frame in train_loader:
             self.restore_iter += 1
+            # Clears the gradients of all optimized torch.Tensor s.
             self.optimizer.zero_grad()
 
             for i in range(len(seq)):
@@ -251,6 +256,7 @@ def transform(self, flag, batch_size=1):
 
         return np.concatenate(feature_list, 0), view_list, seq_type_list, label_list
 
+    # Save trained model.
     def save(self):
         os.makedirs(osp.join('checkpoint', self.model_name), exist_ok=True)
         torch.save(self.encoder.state_dict(),

diff --git a/model/network/basic_blocks.py b/model/network/basic_blocks.py
@@ -2,6 +2,11 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+# Standard write method in doc:
+# https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module
+# Assign the submodules as regular attributes.
+
+# BasicConv2d defined a basic 2d convolutional layer
 class BasicConv2d(nn.Module):
     def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
         super(BasicConv2d, self).__init__()
@@ -11,7 +16,7 @@ def forward(self, x):
         x = self.conv(x)
         return F.leaky_relu(x, inplace=True)
 
-
+# SetBlock defined a
 class SetBlock(nn.Module):
     def __init__(self, forward_block, pooling=False):
         super(SetBlock, self).__init__()

diff --git a/model/network/gaitset.py b/model/network/gaitset.py
@@ -4,7 +4,8 @@
 
 from .basic_blocks import SetBlock, BasicConv2d
 
-
+# extended from pytorch Neural network package,
+# torchc.nn.Module: Base class for all neural network modules.
 class SetNet(nn.Module):
     def __init__(self, hidden_dim):
         super(SetNet, self).__init__()
@@ -13,6 +14,11 @@ def __init__(self, hidden_dim):
 
         _set_in_channels = 1
         _set_channels = [32, 64, 128]
+        # BasicConv2d(self, inChannel, outChannel, kernelSize, **kwargs)
+        # SetBlock(self, forward_block)
+        # Modules can also contain other Modules, allowing to nest them in a tree structure.
+        # So, in the GaitSet, the forward_block is BasicConv2D,
+        # that means the BasicConv2d modules nested in the SetBlock Module.
         self.set_layer1 = SetBlock(BasicConv2d(_set_in_channels, _set_channels[0], 5, padding=2))
         self.set_layer2 = SetBlock(BasicConv2d(_set_channels[0], _set_channels[0], 3, padding=1), True)
         self.set_layer3 = SetBlock(BasicConv2d(_set_channels[0], _set_channels[1], 3, padding=1))
@@ -29,6 +35,12 @@ def __init__(self, hidden_dim):
         self.gl_pooling = nn.MaxPool2d(2)
 
         self.bin_num = [1, 2, 4, 8, 16]
+        # torch.nn.ParameterList can be indexed like a regular Python list,
+        # but parameters it contains are properly registered,
+        # and will be visible by all Module methods.
+        #
+        # nn.init.xavier_uniform_: Fills the input Tensor with values.
+        # See https://pytorch.org/docs/stable/nn.init.html
         self.fc_bin = nn.ParameterList([
             nn.Parameter(
                 nn.init.xavier_uniform_(
@@ -44,6 +56,7 @@ def __init__(self, hidden_dim):
                 nn.init.normal(m.weight.data, 1.0, 0.02)
                 nn.init.constant(m.bias.data, 0.0)
 
+    # The max(·) function proposed in the paper;
     def frame_max(self, x):
         if self.batch_frame is None:
             return torch.max(x, 1)
@@ -56,6 +69,7 @@ def frame_max(self, x):
             arg_max_list = torch.cat([_tmp[i][1] for i in range(len(_tmp))], 0)
             return max_list, arg_max_list
 
+    # The median(·) function proposed in the paper;
     def frame_median(self, x):
         if self.batch_frame is None:
             return torch.median(x, 1)
@@ -68,6 +82,7 @@ def frame_median(self, x):
             arg_median_list = torch.cat([_tmp[i][1] for i in range(len(_tmp))], 0)
             return median_list, arg_median_list
 
+
     def forward(self, silho, batch_frame=None):
         # n: batch_size, s: frame_num, k: keypoints_num, c: channel
         if batch_frame is not None:

diff --git a/model/network/triplet.py b/model/network/triplet.py
@@ -2,7 +2,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-
+# utils class: TripletLoss
 class TripletLoss(nn.Module):
     def __init__(self, batch_size, hard_or_full, margin):
         super(TripletLoss, self).__init__()
@@ -12,8 +12,8 @@ def __init__(self, batch_size, hard_or_full, margin):
     def forward(self, feature, label):
         # feature: [n, m, d], label: [n, m]
         n, m, d = feature.size()
-        hp_mask = (label.unsqueeze(1) == label.unsqueeze(2)).byte().view(-1)
-        hn_mask = (label.unsqueeze(1) != label.unsqueeze(2)).byte().view(-1)
+        hp_mask = (label.unsqueeze(1) == label.unsqueeze(2)).bool().view(-1)
+        hn_mask = (label.unsqueeze(1) != label.unsqueeze(2)).bool().view(-1)
 
         dist = self.batch_dist(feature)
         mean_dist = dist.mean(1).mean(1)

diff --git a/model/utils/data_loader.py b/model/utils/data_loader.py
@@ -5,43 +5,66 @@
 
 from .data_set import DataSet
 
-
+# The parameters are in the config.py: conf
+# resolution: conf@data:resolution
+# pid_num: conf@data: pid_num;
+# pid_shuffle: conf@data: pid_shuffle
 def load_data(dataset_path, resolution, dataset, pid_num, pid_shuffle, cache=True):
+
+    # define an empty list
     seq_dir = list()
     view = list()
     seq_type = list()
     label = list()
 
+    # _label: the subdir in dataset_path
     for _label in sorted(list(os.listdir(dataset_path))):
         # In CASIA-B, data of subject #5 is incomplete.
         # Thus, we ignore it in training.
         if dataset == 'CASIA-B' and _label == '005':
             continue
+        # label_path: /dataset path/subject id/
+        # eg: /casia-b/001/
         label_path = osp.join(dataset_path, _label)
+        # _seq_type: NM-01, BG-01, CL-01, ...
         for _seq_type in sorted(list(os.listdir(label_path))):
+            # seq_type_path: /dataset path/subject id/sequence id/
+            # eg: casia-b/001/NM-01/
             seq_type_path = osp.join(label_path, _seq_type)
+            # _view:00, 18, ..., 162, 180
             for _view in sorted(list(os.listdir(seq_type_path))):
+                # _seq_dir: casia-b/001/NM-01/45
                 _seq_dir = osp.join(seq_type_path, _view)
+                # seqs: 001-bg-01-054-031.png, ...
                 seqs = os.listdir(_seq_dir)
                 if len(seqs) > 0:
                     seq_dir.append([_seq_dir])
                     label.append(_label)
                     seq_type.append(_seq_type)
                     view.append(_view)
 
+    # split a dataset into training set and testing set
     pid_fname = osp.join('partition', '{}_{}_{}.npy'.format(
         dataset, pid_num, pid_shuffle))
     if not osp.exists(pid_fname):
         pid_list = sorted(list(set(label)))
         if pid_shuffle:
             np.random.shuffle(pid_list)
+        # pid_list split to 0-72, 73-end
         pid_list = [pid_list[0:pid_num], pid_list[pid_num:]]
         os.makedirs('partition', exist_ok=True)
+        # numpy.save(file, arr, allow_pickle=True, fix_imports=True)
+        # Save an array to a binary file in NumPy .npy format.
         np.save(pid_fname, pid_list)
 
+    # Load arrays or pickled objects from .npy, .npz or pickled files
     pid_list = np.load(pid_fname)
+    # train_list = [0,..,72]
     train_list = pid_list[0]
+    # test_list = [73, ...]
     test_list = pid_list[1]
+    # enumerate(): return a (index, value) list
+    # i: index, l: subject id
     train_source = DataSet(
         [seq_dir[i] for i, l in enumerate(label) if l in train_list],
         [label[i] for i, l in enumerate(label) if l in train_list],

diff --git a/model/utils/data_set.py b/model/utils/data_set.py
@@ -6,7 +6,9 @@
 import cv2
 import xarray as xr
 
-
+# extend torch.utils.data.Dataset,
+# Build the dataset implementation for gait recognition,
+# which include the parameters: label, type[NM, BG, CL], sequence id, view angle.
 class DataSet(tordata.Dataset):
     def __init__(self, seq_dir, label, seq_type, view, cache, resolution):
         self.seq_dir = seq_dir
@@ -15,18 +17,22 @@ def __init__(self, seq_dir, label, seq_type, view, cache, resolution):
         self.label = label
         self.cache = cache
         self.resolution = int(resolution)
+        # cut_padding = 10
         self.cut_padding = int(float(resolution)/64*10)
         self.data_size = len(self.label)
         self.data = [None] * self.data_size
         self.frame_set = [None] * self.data_size
 
+        # remove duplicated value
         self.label_set = set(self.label)
         self.seq_type_set = set(self.seq_type)
         self.view_set = set(self.view)
+        # Return a new array of given shape and type, filled with zeros.
         _ = np.zeros((len(self.label_set),
                       len(self.seq_type_set),
                       len(self.view_set))).astype('int')
         _ -= 1
+        # DataArray(values, Coordinates, dims)
         self.index_dict = xr.DataArray(
             _,
             coords={'label': sorted(list(self.label_set)),
@@ -38,6 +44,8 @@ def __init__(self, seq_dir, label, seq_type, view, cache, resolution):
             _label = self.label[i]
             _seq_type = self.seq_type[i]
             _view = self.view[i]
+            # Attribute for location based indexing. Only supports __getitem__,
+            # and only when the key is a dict of the form {dim: labels}.
             self.index_dict.loc[_label, _seq_type, _view] = i
 
     def load_all_data(self):
@@ -47,11 +55,13 @@ def load_all_data(self):
     def load_data(self, index):
         return self.__getitem__(index)
 
+    # load the image to xarray
     def __loader__(self, path):
         return self.img2xarray(
             path)[:, :, self.cut_padding:-self.cut_padding].astype(
             'float32') / 255.0
 
+    # overwrite the getitem function in the subclass
     def __getitem__(self, index):
         # pose sequence sampling
         if not self.cache:
@@ -71,8 +81,16 @@ def __getitem__(self, index):
         return data, frame_set, self.view[
             index], self.seq_type[index], self.label[index],
 
+    # Image to Xarray
     def img2xarray(self, flie_path):
         imgs = sorted(list(os.listdir(flie_path)))
+        # [:,:,0] Take the first dimension of the 3D data
+        # [:,:,1] Take the second dimension of the 3D data
+        # np.reshape the image data to [64, 64, -1]
+        # When using a -1, the dimension corresponding to the -1
+        # will be the product of the dimensions of the original array
+        # divided by the product of the dimensions given to reshape
+        # so as to maintain the same number of elements.
         frame_list = [np.reshape(
             cv2.imread(osp.join(flie_path, _img_path)),
             [self.resolution, self.resolution, -1])[:, :, 0]
@@ -86,5 +104,6 @@ def img2xarray(self, flie_path):
         )
         return data_dict
 
+    # overwrite the len function in the subclass
     def __len__(self):
         return len(self.label)