Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ximitiejiang committed Dec 4, 2019
1 parent 7e23899 commit dea3f31
Show file tree
Hide file tree
Showing 28 changed files with 209 additions and 97 deletions.
7 changes: 7 additions & 0 deletions demo/readme_base.md
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,13 @@
直到得到bbox正常的数据才return
### 训练中报错,binary_cross_entropy_with_logits(pred, target, weight)损失函数报错:RuntimeError: the derivative for 'weight' is not implemented
1. 这个问题主要原因在于传入的weight是带有梯度反传标志的,但二值交叉熵公式并不支持对weight进行梯度反传更新。所以报错。
实际上我并不需要weight进行梯度反传,之前的旧版本pytorch似乎不会报错,现在pytorch1.1爆出来反而是好事。
解决方案:weight.detach()代替weight
参考:https://blog.csdn.net/sinat_24424445/article/details/101022092
### 训练中报错variable has been modified by an inplace operation:
Expand Down
Binary file not shown.
51 changes: 24 additions & 27 deletions demo/retinanet_voc/cfg_detector_retinanet_resnet50_voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@
@author: ubuntu
"""

gpus = 1
gpus = [0]
parallel = False
distribute = False
n_epochs = 1
imgs_per_core = 4 # 如果是gpu, 则core代表gpu,否则core代表cpu(等效于batch_size)
workers_per_core = 2
save_checkpoint_interval = 2 # 每多少个epoch保存一次epoch
save_checkpoint_interval = 1 # 每多少个epoch保存一次epoch
work_dir = '/home/ubuntu/mytrain/retinanet_resnet50_voc/'
resume_from = None # 恢复到前面指定的设备
load_from = None
load_device = 'cuda' # 额外定义用于评估预测的设备: ['cpu', 'cuda'],可在cpu预测

lr = 0.001
img_size = (1333, 800)

lr_processor = dict(
type='list',
Expand Down Expand Up @@ -48,18 +48,26 @@
neck=dict(
type='fpn',
params=dict(
in_channels=(256, 512, 1024, 2048),
out_channels=256,
use_levels=(0, 1, 2, 3), # 表示作用在哪几层,默认4层都是,但新的FPN只使用了1,2,3层,0层丢弃
num_outs=5, # 额外输出一层
extra_convs_on_inputs=True
))

header=dict(
type='retina_head',
head=dict(
type='retinanet_head',
params=dict(
input_size=300,
input_size=img_size,
num_classes=21,
in_channels=(512, 1024, 512, 256, 256, 256),
num_anchors=(4, 6, 6, 6, 4, 4),
anchor_strides=(8, 16, 32, 64, 100, 300),
in_channels=(256, 256, 256, 256, 256),
base_scale=4,
ratios = [1/2, 1, 2],
anchor_strides=(8, 16, 32, 64, 128),
target_means=(.0, .0, .0, .0),
target_stds=(0.1, 0.1, 0.2, 0.2)))
target_stds=(0.1, 0.1, 0.2, 0.2),
alpha=0.25,
gamma=2))

transform = dict(
img_params=dict(
Expand All @@ -70,7 +78,7 @@
to_tensor=True, # numpy to tensor
to_chw=True, # hwc to chw
flip_ratio=None,
scale=[1333, 800], # 选择300的小尺寸
scale=img_size, # 选择300的小尺寸
size_divisor=32,
keep_ratio=True),
label_params=dict(
Expand Down Expand Up @@ -98,7 +106,7 @@
to_onehot=None),
bbox_params=None)

data_root_path='/home/ubuntu/MyDatasets/voc/VOCdevkit/'
data_root_path='/home/ubuntu/MyDatasets0/voc/VOCdevkit/'
trainset = dict(
type='voc',
repeat=0,
Expand All @@ -121,8 +129,8 @@
trainloader = dict(
params=dict(
shuffle=True,
batch_size=gpus * imgs_per_core if gpus>0 else imgs_per_core,
num_workers=gpus * workers_per_core if gpus>0 else imgs_per_core,
batch_size=imgs_per_core,
num_workers=workers_per_core,
pin_memory=False, # 数据送入GPU进行加速(默认False)
drop_last=False,
collate_fn='dict_collate', # 'default_collate','multi_collate', 'dict_collate'
Expand All @@ -131,8 +139,8 @@
valloader = dict(
params=dict(
shuffle=False,
batch_size=gpus * imgs_per_core if gpus>0 else imgs_per_core,
num_workers=gpus * workers_per_core if gpus>0 else imgs_per_core,
batch_size=imgs_per_core,
num_workers=workers_per_core,
pin_memory=False, # 数据送入GPU进行加速(默认False)
drop_last=False,
collate_fn='dict_collate', # 'default_collate','multi_collate', 'dict_collate'
Expand All @@ -145,14 +153,3 @@
momentum=0.9,
weight_decay=5e-4))

loss_clf = dict(
type='cross_entropy',
params=dict(
reduction='mean'
))

loss_reg = dict(
type='smooth_l1',
params=dict(
reduction='mean'
))
61 changes: 61 additions & 0 deletions demo/retinanet_voc/test_detector_retinanet_voc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 3 21:29:33 2019
@author: ubuntu
"""
import cv2
from model.runner_lib import Runner
from utils.prepare_training import get_config
from utils.evaluation import eval_dataset_det, DetPredictor
from utils.tools import parse_log
from utils.dataset_classes import get_classes
from utils.visualization import vis_all_opencv, vis_all_pyplot, vis_cam

def train_ssd(cfg_path, resume_from=None):

runner = Runner(cfg, resume_from)
runner.train()



if __name__ == "__main__":

task = 'train'
cfg_path = './cfg_detector_retinanet_resnet50_voc.py'
cfg = get_config(cfg_path)

if task == 'train': # 模型训练
train_ssd(cfg,
resume_from=None)
#
# if task == 'log':
# parse_log(paths = ['/home/ubuntu/mytrain/ssd_vgg_voc/20191025_182352.log'])
#
# if task == 'eval': # 数据集评估
# eval_dataset_det(cfg_path=cfg_path,
# load_from = '/home/ubuntu/mytrain/ssd_vgg_voc/epoch_11.pth',
# load_device='cuda')
#
# if task == 'load': # 已有数据集评估文件,重新载入进行评估
# eval_dataset_det(cfg_path=cfg_path,
# load_from = '/home/ubuntu/mytrain/ssd_vgg_voc/epoch_11.pth',
# load_device='cuda',
# result_file='/home/ubuntu/mytrain/ssd_vgg_voc/20190928_084133_eval_result.pkl')
#
# if task == 'test': # 测试单张图或多张图的结果: cpu上0.649 sec, gpu上0.388 sec
# img = cv2.imread('/home/ubuntu/MyDatasets/misc/test13.jpg')
# predictor = DetPredictor(cfg_path,
# load_from = '/home/ubuntu/mytrain/ssd_vgg_voc/epoch_61.pth',
# load_device='cpu')
# for results in predictor([img]):
# vis_all_pyplot(*results, class_names=get_classes('voc'), score_thr=0.5)
#
# if task == 'video': # 测试视频预测结果:注意方法稍有不同,vis_cam需要传入一个predictor
# src = 0 # src可以等于int(cam_id), str(video path), list(img_list)
# predictor = DetPredictor(cfg_path,
# load_from = '/home/ubuntu/mytrain/ssd_vgg_voc/epoch_11.pth',
# load_device='cpu')
# vis_cam(src, predictor, class_names=get_classes('voc'), score_thr=0.2)
#
Binary file modified model/__pycache__/get_target_lib.cpython-37.pyc
Binary file not shown.
Binary file modified model/__pycache__/loss_lib.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file modified model/bbox_head/__pycache__/ssd_head.cpython-37.pyc
Binary file not shown.
106 changes: 62 additions & 44 deletions model/bbox_head/retinanet_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from functools import partial

from utils.init_weights import normal_init, bias_init_with_prob
from utils.tools import one_hot_encode
from model.get_target_lib import get_anchor_target
from model.anchor_generator_lib import AnchorGenerator
from model.loss_lib import SigmoidFocalLoss, SmoothL1Loss
Expand All @@ -25,34 +26,38 @@
target_means=(.0, .0, .0, .0),
target_stds=(0.1, 0.1, 0.2, 0.2)))
"""
def conv3x3(in_channels, out_channels, stride, padding, bias):

return nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding, bias=bias),
nn.ReLU(inplace=True))
#def conv3x3(in_channels, out_channels, stride, padding, bias):
#
# return nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding, bias=bias),
# nn.ReLU(inplace=True))

class ClassHead(nn.Module):
"""针对单层特征的分类模块"""
def __init__(self, in_channels, num_anchors, num_classes):
super().__init__()
self.num_classes = num_classes
self.cls_convs = nn.ModuleList()
for _ in range(4):
self.cls_convs.append(conv3x3(in_channels, in_channels, 1, 1, True))

self.cls_convs = nn.Sequential(nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
nn.ReLU(inplace=True))
self.cls_head = nn.Conv2d(in_channels, num_anchors * num_classes, 3, stride=1, padding=1)

def forward(self, x):
for conv in self.cls_convs: # retinanet有4个conv3x3
x = conv(x)
x = self.cls_convs(x)
out = self.cls_head(x)
out = out.permute(0, 2, 3, 1).contiguous()
# out = out.view(out.shape[0], -1, self.num_classes)
out = out.view(int(out.size(0)), int(-1), int(self.num_classes))
return out

def init_weight(self):
def init_weights(self):
for m in self.cls_convs:
normal_init(m, std=0.01)
if isinstance(m, nn.Conv2d):
normal_init(m, std=0.01)
bias_cls = bias_init_with_prob(0.01)
normal_init(self.cls_head, std=0.01, bias=bias_cls)

Expand All @@ -61,73 +66,77 @@ class BboxHead(nn.Module):
"""针对单层特征的bbox回归模块"""
def __init__(self, in_channels, num_anchors):
super().__init__()
self.reg_convs = nn.ModuleList()
for _ in range(4):
self.reg_convs.append(conv3x3(in_channels, in_channels, 1, 1, True))
self.reg_convs = nn.Sequential(nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
nn.ReLU(inplace=True))
self.reg_head = nn.Conv2d(in_channels, num_anchors * 4, 3, stride=1, padding=1)

def forward(self, x):
for conv in self.reg_convs:
x = conv(x)
x = self.reg_convs(x)
out = self.reg_head(x)
out = out.permute(0, 2, 3, 1).contiguous()
# out = out.view(out.shape[0], -1, 4)
out = out.view(int(out.size(0)), int(-1), int(4))
return out

def init_weight(self):
def init_weights(self):
for m in self.reg_convs:
normal_init(m, std=0.01)
if isinstance(m, nn.Conv2d):
normal_init(m, std=0.01)
bias_cls = bias_init_with_prob(0.01)
normal_init(self.reg_head, std=0.01, bias=bias_cls)



class RetinaNetHead(nn.Module):
"""retina head"""
def __init__(self,
input_size=(1333, 800),
num_classes=21,
in_channels=256,
in_channels=(256, 256, 256, 256, 256),
base_scale=4,
ratios = [1/2, 1, 2],
anchor_strides=(8, 16, 32, 64, 128),
target_means=(.0, .0, .0, .0),
target_stds=(0.1, 0.1, 0.2, 0.2),
loss_cls_cfg=None,
loss_reg_cfg=None,
alpha=0.25,
gamma=2,
**kwargs):

super().__init__()

self.num_classes = num_classes
# 参数
"""retinanet生成anchor的逻辑:3个核心参数的定义过程
base_size = [8, 16, 32, 64, 128] 采用的就是strides
scales = 4*[2**(i/3) for i in range(3)] 采用的是在基础比例[1, 1.2, 1.5]的基础上乘以4, 其中基础比例的定义感觉是经验,乘以4感觉是为了匹配原图
定义了一个octave_base_scale=4,然后定义了sctave_scales=[1, 1.2599, 1.5874]"""
scales = base_scale * [2**(i / 3) for i in range(3)]
scales = [base_scale * 2**(i / 3) for i in range(3)]
base_sizes = anchor_strides
# 创建anchor生成器
self.anchor_generators = []
for i in range(len(in_channels)):
anchor_generator = AnchorGenerator(base_sizes[i], scales[i],
ratios[i], scale_major=False)
for i in range(len(anchor_strides)):
anchor_generator = AnchorGenerator(base_sizes[i], scales,
ratios, scale_major=False)
self.anchor_generators.append(anchor_generator)
# 创建分类回归头
num_anchors = len(ratios) * len(scales)
self.cls_head = ClassHead(in_channels, num_anchors, num_classes-1)
self.reg_head = BboxHead(in_channels, num_anchors)
self.cls_head = ClassHead(in_channels[0], num_anchors, num_classes-1)
self.reg_head = BboxHead(in_channels[0], num_anchors)

# 创建损失函数
self.loss_cls = SigmoidFocalLoss()
self.loss_cls = SigmoidFocalLoss(alpha=alpha, gamma=gamma)
self.loss_bbox = SmoothL1Loss()

def init_weight(self):
self.cls_head.init_weight()
self.reg_head.init_weight()
def init_weights(self):
self.cls_head.init_weights()
self.reg_head.init_weights()

def forward(self, x):
self.featmap_sizes = [feat.shape[2] for feat in x]
self.featmap_sizes = [feat.shape[2:] for feat in x]
cls_scores = []
bbox_preds = []
for feat in x:
Expand All @@ -136,7 +145,12 @@ def forward(self, x):
return cls_scores, bbox_preds # 这是模型最终输出,最好不用dict,避免跟onnx inference冲突

def get_losses(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, cfg, **kwargs):
"""跟ssd的结构一样"""
"""retinanet
cls_scores()
bbox_preds()
gt_bboxes()
gt_labels()
"""
num_imgs = len(gt_labels)
multi_layer_anchors = []
for i in range(len(self.featmap_sizes)):
Expand All @@ -155,16 +169,20 @@ def get_losses(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, cfg, **kwargs
"""retinanet的变化:只取正样本数量作为total_sample"""

"""retinanet的变化:labels需要转换成独热编码方式输入focal loss"""

labels_t = one_hot_encode(labels_t, self.num_classes-1)
labels_w = labels_w.view(-1, 1).expand(labels_w.size(0), self.num_classes-1)
"""retinanet的变化:计算损失时是把1个batch的比如4张图的某一特征层的labels, weights放在一起算,即(b, -1, 20)reshape成(-1, 20)
但我这里调整了,改成一张图的5个featmap放在一起算,4张图就map4次
"""
# cls分类损失
pfunc = partial(self.loss_cls, avg_factor=num_pos)
loss_cls = list(map(pfunc, cls_scores, labels_t, labels_w))
# loss_cls = [loss_cls[i] * labels_w[i].float() for i in range(len(loss_cls))] # (b,)(8732,)
# cls loss的ohem
# pfunc = partial(ohem, neg_pos_ratio=self.neg_pos_ratio, avg_factor=num_pos)
# loss_cls = list(map(pfunc, loss_cls, labels_t)) # (b,)
# bbox回归损失
pfunc = partial(self.loss_bbox, avg_factor=num_pos)
loss_bbox = list(map(pfunc, bbox_preds, bboxes_t, bboxes_w)) # (b,)
# cls分类损失
loss_cls = list(map(self.loss_cls, cls_scores, labels_t))
loss_cls = [loss_cls[i] * labels_w[i].float() for i in range(len(loss_cls))] # (b,)(8732,)
# cls loss的ohem
pfunc = partial(ohem, neg_pos_ratio=self.neg_pos_ratio, avg_factor=num_pos)
loss_cls = list(map(pfunc, loss_cls, labels_t)) # (b,)

return dict(loss_cls = loss_cls, loss_bbox = loss_bbox) # {(b,), (b,)} 每张图对应一个分类损失值和一个回归损失值。

Loading

0 comments on commit dea3f31

Please sign in to comment.