Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ximitiejiang committed Dec 3, 2019
1 parent e130a87 commit 7e23899
Show file tree
Hide file tree
Showing 14 changed files with 153 additions and 29 deletions.
2 changes: 2 additions & 0 deletions demo/readme_base.md
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,8 @@
0. 先写结论:通过重写data_parallel_model以及里边的scatter()函数,来拆开data_container,并送model, batch_data到指定gpu设备。
并通过to_tensor函数,把数据转换成pytorch需要的float(), Long(),防止了训练出错。
整个过程封装得非常隐蔽,虽然减少了用户出错的几率,但也让使用者不清楚应该有什么是需要做的,有什么是系统帮忙做掉的,在哪做掉的。
并且mmdetection当前只支持dataparallel模式,即使gpu=1也必须设置成dataparallel模式,因为必须依靠dataparallel模式里边
的scatter函数对data container拆包。如果不是dataparallel模式则在前向计算result = model(img)时会报错。
具体来说:
- 定义了一个data container,用来打包数据
Expand Down
2 changes: 1 addition & 1 deletion demo/retinanet_voc/cfg_detector_retinanet_resnet50_voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
type='resnet',
params=dict(
depth=50,
pretrained= '/home/ubuntu/MyWeights/resnet50-19c8e357.pth',
pretrained= '/home/ubuntu/MyWeights/pytorch/resnet50-19c8e357.pth',
out_indices=(0, 1, 2, 3),
strides=(1, 2, 2, 2)))

Expand Down
106 changes: 88 additions & 18 deletions model/bbox_head/retinanet_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@
@author: ubuntu
"""
import torch.nn as nn
import torch
from functools import partial

from utils.init_weights import normal_init, bias_init_with_prob
from model.get_target_lib import get_anchor_target
from model.anchor_generator_lib import AnchorGenerator
from model.loss_lib import FocalLoss
from model.loss_lib import SigmoidFocalLoss, SmoothL1Loss
"""
header=dict(
type='retina_head',
Expand All @@ -21,37 +25,61 @@
target_means=(.0, .0, .0, .0),
target_stds=(0.1, 0.1, 0.2, 0.2)))
"""
def conv3x3(in_channels):
return nn.Sequential(nn.Conv2d(in_channels, out_channels),
nn.ReLU())
def conv3x3(in_channels, out_channels, stride, padding, bias):

return nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding, bias=bias),
nn.ReLU(inplace=True))

class ClassHead(nn.Module):
"""针对单层特征的分类模块"""
def __init__(self, in_channels, num_anchors, num_classes):
super().__init__()
self.num_classes = num_classes
self.conv3x3 = nn.Conv2d(in_channels, num_anchors * num_classes, 3, stride=1, padding=1)
self.cls_convs = nn.ModuleList()
for _ in range(4):
self.cls_convs.append(conv3x3(in_channels, in_channels, 1, 1, True))

self.cls_head = nn.Conv2d(in_channels, num_anchors * num_classes, 3, stride=1, padding=1)

def forward(self, x):
out = self.conv3x3(x)
for conv in self.cls_convs: # retinanet有4个conv3x3
x = conv(x)
out = self.cls_head(x)
out = out.permute(0, 2, 3, 1).contiguous()
# out = out.view(out.shape[0], -1, self.num_classes)
out = out.view(int(out.size(0)), int(-1), int(self.num_classes))
return out

def init_weight(self):
for m in self.cls_convs:
normal_init(m, std=0.01)
bias_cls = bias_init_with_prob(0.01)
normal_init(self.cls_head, std=0.01, bias=bias_cls)


class BboxHead(nn.Module):
"""针对单层特征的bbox回归模块"""
def __init__(self, in_channels, num_anchors):
super().__init__()
self.conv3x3 = nn.Conv2d(in_channels, num_anchors * 4, 3, stride=1, padding=1)
self.reg_convs = nn.ModuleList()
for _ in range(4):
self.reg_convs.append(conv3x3(in_channels, in_channels, 1, 1, True))
self.reg_head = nn.Conv2d(in_channels, num_anchors * 4, 3, stride=1, padding=1)

def forward(self, x):
out = self.conv3x3(x)
for conv in self.reg_convs:
x = conv(x)
out = self.reg_head(x)
out = out.permute(0, 2, 3, 1).contiguous()
# out = out.view(out.shape[0], -1, 4)
out = out.view(int(out.size(0)), int(-1), int(4))
return out

def init_weight(self):
for m in self.reg_convs:
normal_init(m, std=0.01)
bias_cls = bias_init_with_prob(0.01)
normal_init(self.reg_head, std=0.01, bias=bias_cls)



Expand All @@ -64,6 +92,8 @@ def __init__(self,
base_scale=4,
ratios = [1/2, 1, 2],
anchor_strides=(8, 16, 32, 64, 128),
target_means=(.0, .0, .0, .0),
target_stds=(0.1, 0.1, 0.2, 0.2),
loss_cls_cfg=None,
loss_reg_cfg=None,
**kwargs):
Expand All @@ -83,18 +113,58 @@ def __init__(self,
anchor_generator = AnchorGenerator(base_sizes[i], scales[i],
ratios[i], scale_major=False)
self.anchor_generators.append(anchor_generator)

# 创建分类回归头
self.cls_convs = nn.ModuleList()
self.reg_convs = nn.ModuleList()
for _ in range(4):
self.cls_convs.append(conv3x3())
self.reg_convs.append(conv3x3())
num_anchors = len(ratios) * len()
self.cls_head = ClassHead(in_channels, )
self.reg_head = BboxHead()
num_anchors = len(ratios) * len(scales)
self.cls_head = ClassHead(in_channels, num_anchors, num_classes-1)
self.reg_head = BboxHead(in_channels, num_anchors)

# 创建损失函数
self.loss_cls = CrossEntropyLoss()
self.loss_cls = SigmoidFocalLoss()
self.loss_bbox = SmoothL1Loss()

def init_weight(self):
self.cls_head.init_weight()
self.reg_head.init_weight()

def forward(self, x):
self.featmap_sizes = [feat.shape[2] for feat in x]
cls_scores = []
bbox_preds = []
for feat in x:
cls_scores.append(self.cls_head(feat))
bbox_preds.append(self.reg_head(feat))
return cls_scores, bbox_preds # 这是模型最终输出,最好不用dict,避免跟onnx inference冲突

def get_losses(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, cfg, **kwargs):
"""跟ssd的结构一样"""
num_imgs = len(gt_labels)
multi_layer_anchors = []
for i in range(len(self.featmap_sizes)):
device = cls_scores.device
anchors = self.anchor_generators[i].grid_anchors(
self.featmap_sizes[i], self.anchor_strides[i], device=device)
multi_layer_anchors.append(anchors) # (6,)(k, 4)
multi_layer_anchors = torch.cat(multi_layer_anchors, dim=0) # 堆叠(8732, 4)
anchor_list = [multi_layer_anchors for _ in range(num_imgs)] # (b,) (s,4)
# 计算target: None表示gt_landmarks=None
target_result = get_anchor_target(anchor_list, gt_bboxes, gt_labels, None,
cfg.assigner, cfg.sampler,
self.target_means, self.target_stds)
# 解析target
bboxes_t, bboxes_w, labels_t, labels_w, _, _, num_pos, num_neg = target_result # (b,-1,4)x2, (b,-1)x2
"""retinanet的变化:只取正样本数量作为total_sample"""

"""retinanet的变化:labels需要转换成独热编码方式输入focal loss"""

# bbox回归损失
pfunc = partial(self.loss_bbox, avg_factor=num_pos)
loss_bbox = list(map(pfunc, bbox_preds, bboxes_t, bboxes_w)) # (b,)
# cls分类损失
loss_cls = list(map(self.loss_cls, cls_scores, labels_t))
loss_cls = [loss_cls[i] * labels_w[i].float() for i in range(len(loss_cls))] # (b,)(8732,)
# cls loss的ohem
pfunc = partial(ohem, neg_pos_ratio=self.neg_pos_ratio, avg_factor=num_pos)
loss_cls = list(map(pfunc, loss_cls, labels_t)) # (b,)

return dict(loss_cls = loss_cls, loss_bbox = loss_bbox) # {(b,), (b,)} 每张图对应一个分类损失值和一个回归损失值。

4 changes: 2 additions & 2 deletions model/loss_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ def focal_loss(pred, target, alpha=0.25, gamma=2.0):
focal_loss = - at*(1-pt)^gamma * log(pt), 其中at = a*t+(1-a)(1-t), pt = p*t+(1-p)(1-t)
当t=1时
"""
pred_s = pred.sigmoid() # 用真实概率p进行weight的计算,但二值交叉熵输入非概率化p,因为内部自带了sigmoid
pt = pred_s * target + (1 - pred_s) * (1 - target)
pred_sig = pred.sigmoid() # 用真实概率p进行weight的计算,但二值交叉熵输入非概率化p,因为内部自带了sigmoid
pt = pred_sig * target + (1 - pred_sig) * (1 - target)
at = alpha * target + (1 - alpha) * (1 - target)
weight = at * (1 - pt).pow(gamma)
loss = F.binary_cross_entropy(pred, target, weight, reduction='none')
Expand Down
42 changes: 40 additions & 2 deletions utils/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,50 @@
@author: ubuntu
"""
import torch
import numpy as np
import os
import time
import six
import pickle
#from contextlib import ContextDecorator
from functools import wraps

# %%
def one_hot_encode(t, n_column=None):
"""pytorch版本的独热编码生成
args:
t (b, ) 代表b个样本的标签,取值要从0开始,比如[0,1,2,3]
one_hot_t (b, )
"""
t = t.long()
if n_column is None:
n_column = t.max().item() + 1
t = t.reshape(-1, 1) # 转为列
one_hot_t = torch.FloatTensor(len(t), n_column).zero_() # 先创建全0的独热编码
one_hot_t.scatter_(1, t, 1) # 生成独热编码
return one_hot_t


def label_to_onehot(labels):
"""numpy版本的标签转换为独热编码:输入的labels需要是从0开始的整数,比如[0,1,2,...]
输出的独热编码为[[1,0,0,...],
[0,1,0,...],
[0,0,1,...]] 分别代表0/1/2的独热编码
"""
assert labels.ndim ==1, 'labels should be 1-dim array.'
labels = labels.astype(np.int8)
n_col = int(np.max(labels) + 1) # 独热编码列数,这里可以额外增加列数,填0即可,默认是最少列数
one_hot = np.zeros((labels.shape[0], n_col))
one_hot[np.arange(labels.shape[0]), labels] = 1
return one_hot # (n_samples, n_col)


def onehot_to_label(one_hot_labels):
"""把独热编码变回0-k的数字编码"""
labels = np.argmax(one_hot_labels, axis=1) # 提取最大值1所在列即原始从0开始的标签
return labels


# %%
class ContextDecorator(object):
"""该类来自:from contextlib import ContextDecorator
Expand Down Expand Up @@ -326,5 +363,6 @@ def get_dist_info():
# %%

if __name__ == "__main__":
hello(1)
hello2(2)
t = torch.tensor([2, 0 , 9])
onehot = one_hot_encode(t)
print(onehot)
2 changes: 1 addition & 1 deletion v01/TRAIN_voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def train(cfg_path, dataset_class):
"""
# 初始化2个默认选项
distributed = False
parallel = True
parallel = True # 必须设置成dataparallel模式,否则data container无法拆包(因为data container的拆包是在data parallel的scatter函数中进行的)

# get cfg
cfg = Config.fromfile(cfg_path)
Expand Down
Binary file modified v01/config/__pycache__/cfg_retinanet_r50_fpn_voc.cpython-37.pyc
Binary file not shown.
4 changes: 2 additions & 2 deletions v01/config/cfg_retinanet_r50_fpn_voc.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# model settings
model = dict(
type='RetinaNet',
pretrained='modelzoo://resnet50', # 使用pytorch的resnet50的权重
pretrained='/home/ubuntu/MyWeights/pytorch/resnet50-19c8e357.pth', # 使用pytorch的resnet50的权重
backbone=dict(
type='ResNet',
depth=50,
Expand Down Expand Up @@ -50,7 +50,7 @@
max_per_img=100)
# dataset settings
dataset_type = 'VOCDataset'
data_root = './data/VOCdevkit/'
data_root = '/home/ubuntu/MyDatasets0/voc/VOCdevkit/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) # 采用的是pytorch的模型,但mean/std还是用的caffe的???
data = dict( # repeatdataset不加了,在coco训练12epoch,voc上调成24
Expand Down
Binary file modified v01/dataset/__pycache__/extra_aug.cpython-37.pyc
Binary file not shown.
6 changes: 3 additions & 3 deletions v01/utils/anchor_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,13 +531,13 @@ def unmap(data, count, inds, fill=0):
ret[inds, :] = data
return ret


# 把原始标签转换为独热标签one_hot_code: 因为该标签需要送入focal loss进行内部的二值交叉熵计算,必须采用独热标签。
def expand_binary_labels(labels, label_weights, label_channels):
bin_labels = labels.new_full(
(labels.size(0), label_channels), 0, dtype=torch.float32)
(labels.size(0), label_channels), 0, dtype=torch.float32) # (182403, 20)
inds = torch.nonzero(labels >= 1).squeeze()
if inds.numel() > 0:
bin_labels[inds, labels[inds] - 1] = 1
bin_labels[inds, labels[inds] - 1] = 1 # 注意这里需要把原始标签(1-20)转换到0-19,因为独热编码需要从0开始
bin_label_weights = label_weights.view(-1, 1).expand(
label_weights.size(0), label_channels)
return bin_labels, bin_label_weights
Expand Down
6 changes: 6 additions & 0 deletions v01/work_dirs/retinanet_voc/20191203_165500.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
2019-12-03 16:55:19,008 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
2019-12-03 16:55:19,201 - INFO - workflow: [('train', 1)], max: 20 epochs
2019-12-03 16:59:37,517 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
2019-12-03 16:59:37,960 - INFO - workflow: [('train', 1)], max: 20 epochs
2019-12-03 17:06:28,780 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
2019-12-03 17:06:28,781 - INFO - workflow: [('train', 1)], max: 20 epochs
4 changes: 4 additions & 0 deletions v01/work_dirs/retinanet_voc/20191203_165921.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
2019-12-03 16:59:37,517 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
2019-12-03 16:59:37,960 - INFO - workflow: [('train', 1)], max: 20 epochs
2019-12-03 17:06:28,780 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
2019-12-03 17:06:28,781 - INFO - workflow: [('train', 1)], max: 20 epochs
2 changes: 2 additions & 0 deletions v01/work_dirs/retinanet_voc/20191203_170628.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
2019-12-03 17:06:28,780 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
2019-12-03 17:06:28,781 - INFO - workflow: [('train', 1)], max: 20 epochs
2 changes: 2 additions & 0 deletions v01/work_dirs/retinanet_voc/20191203_170659.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
2019-12-03 17:06:59,705 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
2019-12-03 17:06:59,706 - INFO - workflow: [('train', 1)], max: 20 epochs

0 comments on commit 7e23899

Please sign in to comment.