Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add some comments and fix a type warning #118

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions model/initialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@

def initialize_data(config, train=False, test=False):
print("Initializing data source...")
# data_loder.py: load_data(config.py: conf@'data', whether need cache)
train_source, test_source = load_data(**config['data'], cache=(train or test))
if train:
print("Loading training data...")
# Dataset Train.load_all_data
train_source.load_all_data()
if test:
print("Loading test data...")
Expand All @@ -27,11 +29,29 @@ def initialize_model(config, train_source, test_source):
print("Initializing model...")
data_config = config['data']
model_config = config['model']
# A deep copy constructs a new compound object and then,
# recursively, inserts copies into it of the objects found in the original.
# "model": {
# 'hidden_dim': 256,
# 'lr': 1e-4,
# 'hard_or_full_trip': 'full',
# 'batch_size': (8, 16),
# 'restore_iter': 0,
# 'total_iter': 80000,
# 'margin': 0.2,
# 'num_workers': 3,
# 'frame_num': 30,
# 'model_name': 'GaitSet',
# },
model_param = deepcopy(model_config)
# add other parameters in the model dictionary
model_param['train_source'] = train_source
model_param['test_source'] = test_source
model_param['train_pid_num'] = data_config['pid_num']
# Batch size is a term used in machine learning
# and refers to the number of training examples utilized in one iteration.
batch_size = int(np.prod(model_config['batch_size']))
# Define the saved model name
model_param['save_name'] = '_'.join(map(str,[
model_config['model_name'],
data_config['dataset'],
Expand All @@ -44,15 +64,18 @@ def initialize_model(config, train_source, test_source):
model_config['frame_num'],
]))

# create Model object
m = Model(**model_param)
print("Model initialization complete.")
return m, model_param['save_name']


# Note that all config refer to config.py.conf
def initialization(config, train=False, test=False):
print("Initialzing...")
WORK_PATH = config['WORK_PATH']
os.chdir(WORK_PATH)
os.chdir(WORK_PATH) # Change current work path to WORK_PATH
# os.environ[“CUDA_VISIBLE_DEVICES”] = “0,1”
# 设置当前使用的GPU设备为0,1号两个设备,名称依次为'/gpu:0'、'/gpu:1'
os.environ["CUDA_VISIBLE_DEVICES"] = config["CUDA_VISIBLE_DEVICES"]
train_source, test_source = initialize_data(config, train, test)
return initialize_model(config, train_source, test_source)
6 changes: 6 additions & 0 deletions model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,16 @@ def __init__(self,
self.img_size = img_size

self.encoder = SetNet(self.hidden_dim).float()
# DataParallel Layers (multi-GPU, distributed)
# Implements data parallelism at the module level.
self.encoder = nn.DataParallel(self.encoder)
self.triplet_loss = TripletLoss(self.P * self.M, self.hard_or_full_trip, self.margin).float()
self.triplet_loss = nn.DataParallel(self.triplet_loss)
# Moves all model parameters and buffers to the GPU.
self.encoder.cuda()
self.triplet_loss.cuda()

# Construct a optimizer by Adam Algorithm.
self.optimizer = optim.Adam([
{'params': self.encoder.parameters()},
], lr=self.lr)
Expand Down Expand Up @@ -149,6 +153,7 @@ def fit(self):
_time1 = datetime.now()
for seq, view, seq_type, label, batch_frame in train_loader:
self.restore_iter += 1
# Clears the gradients of all optimized torch.Tensor s.
self.optimizer.zero_grad()

for i in range(len(seq)):
Expand Down Expand Up @@ -251,6 +256,7 @@ def transform(self, flag, batch_size=1):

return np.concatenate(feature_list, 0), view_list, seq_type_list, label_list

# Save trained model.
def save(self):
os.makedirs(osp.join('checkpoint', self.model_name), exist_ok=True)
torch.save(self.encoder.state_dict(),
Expand Down
7 changes: 6 additions & 1 deletion model/network/basic_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
import torch.nn as nn
import torch.nn.functional as F

# Standard write method in doc:
# https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module
# Assign the submodules as regular attributes.

# BasicConv2d defined a basic 2d convolutional layer
class BasicConv2d(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
super(BasicConv2d, self).__init__()
Expand All @@ -11,7 +16,7 @@ def forward(self, x):
x = self.conv(x)
return F.leaky_relu(x, inplace=True)


# SetBlock defined a
class SetBlock(nn.Module):
def __init__(self, forward_block, pooling=False):
super(SetBlock, self).__init__()
Expand Down
17 changes: 16 additions & 1 deletion model/network/gaitset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from .basic_blocks import SetBlock, BasicConv2d


# extended from pytorch Neural network package,
# torchc.nn.Module: Base class for all neural network modules.
class SetNet(nn.Module):
def __init__(self, hidden_dim):
super(SetNet, self).__init__()
Expand All @@ -13,6 +14,11 @@ def __init__(self, hidden_dim):

_set_in_channels = 1
_set_channels = [32, 64, 128]
# BasicConv2d(self, inChannel, outChannel, kernelSize, **kwargs)
# SetBlock(self, forward_block)
# Modules can also contain other Modules, allowing to nest them in a tree structure.
# So, in the GaitSet, the forward_block is BasicConv2D,
# that means the BasicConv2d modules nested in the SetBlock Module.
self.set_layer1 = SetBlock(BasicConv2d(_set_in_channels, _set_channels[0], 5, padding=2))
self.set_layer2 = SetBlock(BasicConv2d(_set_channels[0], _set_channels[0], 3, padding=1), True)
self.set_layer3 = SetBlock(BasicConv2d(_set_channels[0], _set_channels[1], 3, padding=1))
Expand All @@ -29,6 +35,12 @@ def __init__(self, hidden_dim):
self.gl_pooling = nn.MaxPool2d(2)

self.bin_num = [1, 2, 4, 8, 16]
# torch.nn.ParameterList can be indexed like a regular Python list,
# but parameters it contains are properly registered,
# and will be visible by all Module methods.
#
# nn.init.xavier_uniform_: Fills the input Tensor with values.
# See https://pytorch.org/docs/stable/nn.init.html
self.fc_bin = nn.ParameterList([
nn.Parameter(
nn.init.xavier_uniform_(
Expand All @@ -44,6 +56,7 @@ def __init__(self, hidden_dim):
nn.init.normal(m.weight.data, 1.0, 0.02)
nn.init.constant(m.bias.data, 0.0)

# The max(·) function proposed in the paper;
def frame_max(self, x):
if self.batch_frame is None:
return torch.max(x, 1)
Expand All @@ -56,6 +69,7 @@ def frame_max(self, x):
arg_max_list = torch.cat([_tmp[i][1] for i in range(len(_tmp))], 0)
return max_list, arg_max_list

# The median(·) function proposed in the paper;
def frame_median(self, x):
if self.batch_frame is None:
return torch.median(x, 1)
Expand All @@ -68,6 +82,7 @@ def frame_median(self, x):
arg_median_list = torch.cat([_tmp[i][1] for i in range(len(_tmp))], 0)
return median_list, arg_median_list


def forward(self, silho, batch_frame=None):
# n: batch_size, s: frame_num, k: keypoints_num, c: channel
if batch_frame is not None:
Expand Down
6 changes: 3 additions & 3 deletions model/network/triplet.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import torch.nn as nn
import torch.nn.functional as F


# utils class: TripletLoss
class TripletLoss(nn.Module):
def __init__(self, batch_size, hard_or_full, margin):
super(TripletLoss, self).__init__()
Expand All @@ -12,8 +12,8 @@ def __init__(self, batch_size, hard_or_full, margin):
def forward(self, feature, label):
# feature: [n, m, d], label: [n, m]
n, m, d = feature.size()
hp_mask = (label.unsqueeze(1) == label.unsqueeze(2)).byte().view(-1)
hn_mask = (label.unsqueeze(1) != label.unsqueeze(2)).byte().view(-1)
hp_mask = (label.unsqueeze(1) == label.unsqueeze(2)).bool().view(-1)
hn_mask = (label.unsqueeze(1) != label.unsqueeze(2)).bool().view(-1)

dist = self.batch_dist(feature)
mean_dist = dist.mean(1).mean(1)
Expand Down
25 changes: 24 additions & 1 deletion model/utils/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,66 @@

from .data_set import DataSet


# The parameters are in the config.py: conf
# resolution: conf@data:resolution
# pid_num: conf@data: pid_num;
# pid_shuffle: conf@data: pid_shuffle
def load_data(dataset_path, resolution, dataset, pid_num, pid_shuffle, cache=True):

# define an empty list
seq_dir = list()
view = list()
seq_type = list()
label = list()

# _label: the subdir in dataset_path
for _label in sorted(list(os.listdir(dataset_path))):
# In CASIA-B, data of subject #5 is incomplete.
# Thus, we ignore it in training.
if dataset == 'CASIA-B' and _label == '005':
continue
# label_path: /dataset path/subject id/
# eg: /casia-b/001/
label_path = osp.join(dataset_path, _label)
# _seq_type: NM-01, BG-01, CL-01, ...
for _seq_type in sorted(list(os.listdir(label_path))):
# seq_type_path: /dataset path/subject id/sequence id/
# eg: casia-b/001/NM-01/
seq_type_path = osp.join(label_path, _seq_type)
# _view:00, 18, ..., 162, 180
for _view in sorted(list(os.listdir(seq_type_path))):
# _seq_dir: casia-b/001/NM-01/45
_seq_dir = osp.join(seq_type_path, _view)
# seqs: 001-bg-01-054-031.png, ...
seqs = os.listdir(_seq_dir)
if len(seqs) > 0:
seq_dir.append([_seq_dir])
label.append(_label)
seq_type.append(_seq_type)
view.append(_view)

# split a dataset into training set and testing set
pid_fname = osp.join('partition', '{}_{}_{}.npy'.format(
dataset, pid_num, pid_shuffle))
if not osp.exists(pid_fname):
pid_list = sorted(list(set(label)))
if pid_shuffle:
np.random.shuffle(pid_list)
# pid_list split to 0-72, 73-end
pid_list = [pid_list[0:pid_num], pid_list[pid_num:]]
os.makedirs('partition', exist_ok=True)
# numpy.save(file, arr, allow_pickle=True, fix_imports=True)
# Save an array to a binary file in NumPy .npy format.
np.save(pid_fname, pid_list)

# Load arrays or pickled objects from .npy, .npz or pickled files
pid_list = np.load(pid_fname)
# train_list = [0,..,72]
train_list = pid_list[0]
# test_list = [73, ...]
test_list = pid_list[1]
# enumerate(): return a (index, value) list
# i: index, l: subject id
train_source = DataSet(
[seq_dir[i] for i, l in enumerate(label) if l in train_list],
[label[i] for i, l in enumerate(label) if l in train_list],
Expand Down
21 changes: 20 additions & 1 deletion model/utils/data_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import cv2
import xarray as xr


# extend torch.utils.data.Dataset,
# Build the dataset implementation for gait recognition,
# which include the parameters: label, type[NM, BG, CL], sequence id, view angle.
class DataSet(tordata.Dataset):
def __init__(self, seq_dir, label, seq_type, view, cache, resolution):
self.seq_dir = seq_dir
Expand All @@ -15,18 +17,22 @@ def __init__(self, seq_dir, label, seq_type, view, cache, resolution):
self.label = label
self.cache = cache
self.resolution = int(resolution)
# cut_padding = 10
self.cut_padding = int(float(resolution)/64*10)
self.data_size = len(self.label)
self.data = [None] * self.data_size
self.frame_set = [None] * self.data_size

# remove duplicated value
self.label_set = set(self.label)
self.seq_type_set = set(self.seq_type)
self.view_set = set(self.view)
# Return a new array of given shape and type, filled with zeros.
_ = np.zeros((len(self.label_set),
len(self.seq_type_set),
len(self.view_set))).astype('int')
_ -= 1
# DataArray(values, Coordinates, dims)
self.index_dict = xr.DataArray(
_,
coords={'label': sorted(list(self.label_set)),
Expand All @@ -38,6 +44,8 @@ def __init__(self, seq_dir, label, seq_type, view, cache, resolution):
_label = self.label[i]
_seq_type = self.seq_type[i]
_view = self.view[i]
# Attribute for location based indexing. Only supports __getitem__,
# and only when the key is a dict of the form {dim: labels}.
self.index_dict.loc[_label, _seq_type, _view] = i

def load_all_data(self):
Expand All @@ -47,11 +55,13 @@ def load_all_data(self):
def load_data(self, index):
return self.__getitem__(index)

# load the image to xarray
def __loader__(self, path):
return self.img2xarray(
path)[:, :, self.cut_padding:-self.cut_padding].astype(
'float32') / 255.0

# overwrite the getitem function in the subclass
def __getitem__(self, index):
# pose sequence sampling
if not self.cache:
Expand All @@ -71,8 +81,16 @@ def __getitem__(self, index):
return data, frame_set, self.view[
index], self.seq_type[index], self.label[index],

# Image to Xarray
def img2xarray(self, flie_path):
imgs = sorted(list(os.listdir(flie_path)))
# [:,:,0] Take the first dimension of the 3D data
# [:,:,1] Take the second dimension of the 3D data
# np.reshape the image data to [64, 64, -1]
# When using a -1, the dimension corresponding to the -1
# will be the product of the dimensions of the original array
# divided by the product of the dimensions given to reshape
# so as to maintain the same number of elements.
frame_list = [np.reshape(
cv2.imread(osp.join(flie_path, _img_path)),
[self.resolution, self.resolution, -1])[:, :, 0]
Expand All @@ -86,5 +104,6 @@ def img2xarray(self, flie_path):
)
return data_dict

# overwrite the len function in the subclass
def __len__(self):
return len(self.label)