update

ximitiejiang · Dec 4, 2019 · dea3f31 · dea3f31
1 parent 7e23899
commit dea3f31
Show file tree

Hide file tree

Showing 28 changed files with 209 additions and 97 deletions.
diff --git a/demo/readme_base.md b/demo/readme_base.md
@@ -509,6 +509,13 @@
 直到得到bbox正常的数据才return
 
 
+### 训练中报错，binary_cross_entropy_with_logits(pred, target, weight)损失函数报错：RuntimeError: the derivative for 'weight' is not implemented
+
+1. 这个问题主要原因在于传入的weight是带有梯度反传标志的，但二值交叉熵公式并不支持对weight进行梯度反传更新。所以报错。
+实际上我并不需要weight进行梯度反传，之前的旧版本pytorch似乎不会报错，现在pytorch1.1爆出来反而是好事。
+解决方案：weight.detach()代替weight
+参考：https://blog.csdn.net/sinat_24424445/article/details/101022092
+
 
 ### 训练中报错variable has been modified by an inplace operation：
 

diff --git a/demo/retinanet_voc/__pycache__/cfg_detector_retinanet_resnet50_voc.cpython-37.pyc b/demo/retinanet_voc/__pycache__/cfg_detector_retinanet_resnet50_voc.cpython-37.pyc
diff --git a/demo/retinanet_voc/cfg_detector_retinanet_resnet50_voc.py b/demo/retinanet_voc/cfg_detector_retinanet_resnet50_voc.py
@@ -6,19 +6,19 @@
 @author: ubuntu
 """
 
-gpus = 1
+gpus = [0]
 parallel = False
 distribute = False                       
 n_epochs = 1
 imgs_per_core = 4               # 如果是gpu, 则core代表gpu，否则core代表cpu(等效于batch_size)
 workers_per_core = 2
-save_checkpoint_interval = 2     # 每多少个epoch保存一次epoch
+save_checkpoint_interval = 1     # 每多少个epoch保存一次epoch
 work_dir = '/home/ubuntu/mytrain/retinanet_resnet50_voc/'
 resume_from = None               # 恢复到前面指定的设备
 load_from = None
 load_device = 'cuda'             # 额外定义用于评估预测的设备: ['cpu', 'cuda']，可在cpu预测
-
 lr = 0.001
+img_size = (1333, 800)
 
 lr_processor = dict(
         type='list',
@@ -48,18 +48,26 @@
 neck=dict(
         type='fpn',
         params=dict(
+                in_channels=(256, 512, 1024, 2048),
+                out_channels=256,
+                use_levels=(0, 1, 2, 3),  # 表示作用在哪几层，默认4层都是，但新的FPN只使用了1,2,3层，0层丢弃
+                num_outs=5,  # 额外输出一层
+                extra_convs_on_inputs=True
                 ))
 
-header=dict(
-        type='retina_head',
+head=dict(
+        type='retinanet_head',
         params=dict(
-                input_size=300,
+                input_size=img_size,
                 num_classes=21,
-                in_channels=(512, 1024, 512, 256, 256, 256),
-                num_anchors=(4, 6, 6, 6, 4, 4),
-                anchor_strides=(8, 16, 32, 64, 100, 300),
+                in_channels=(256, 256, 256, 256, 256),
+                base_scale=4,
+                ratios = [1/2, 1, 2],
+                anchor_strides=(8, 16, 32, 64, 128),
                 target_means=(.0, .0, .0, .0),
-                target_stds=(0.1, 0.1, 0.2, 0.2)))
+                target_stds=(0.1, 0.1, 0.2, 0.2),
+                alpha=0.25,
+                gamma=2))
 
 transform = dict(
         img_params=dict(
@@ -70,7 +78,7 @@
                 to_tensor=True, # numpy to tensor 
                 to_chw=True,    # hwc to chw
                 flip_ratio=None,
-                scale=[1333, 800],  # 选择300的小尺寸
+                scale=img_size,  # 选择300的小尺寸
                 size_divisor=32,
                 keep_ratio=True),
         label_params=dict(
@@ -98,7 +106,7 @@
                 to_onehot=None),
         bbox_params=None)
 
-data_root_path='/home/ubuntu/MyDatasets/voc/VOCdevkit/'
+data_root_path='/home/ubuntu/MyDatasets0/voc/VOCdevkit/'
 trainset = dict(
         type='voc',
         repeat=0,
@@ -121,8 +129,8 @@
 trainloader = dict(
         params=dict(
                 shuffle=True,
-                batch_size=gpus * imgs_per_core if gpus>0 else imgs_per_core,
-                num_workers=gpus * workers_per_core if gpus>0 else imgs_per_core,
+                batch_size=imgs_per_core,
+                num_workers=workers_per_core,
                 pin_memory=False,   # 数据送入GPU进行加速(默认False)
                 drop_last=False,
                 collate_fn='dict_collate',    # 'default_collate','multi_collate', 'dict_collate'
@@ -131,8 +139,8 @@
 valloader = dict(        
         params=dict(
                 shuffle=False,
-                batch_size=gpus * imgs_per_core if gpus>0 else imgs_per_core,
-                num_workers=gpus * workers_per_core if gpus>0 else imgs_per_core,
+                batch_size=imgs_per_core,
+                num_workers=workers_per_core,
                 pin_memory=False,   # 数据送入GPU进行加速(默认False)
                 drop_last=False,
                 collate_fn='dict_collate',    # 'default_collate','multi_collate', 'dict_collate'
@@ -145,14 +153,3 @@
                 momentum=0.9, 
                 weight_decay=5e-4))
 
-loss_clf = dict(
-        type='cross_entropy',
-        params=dict(
-                reduction='mean'
-                ))
-
-loss_reg = dict(
-        type='smooth_l1',
-        params=dict(
-                reduction='mean'
-                ))
diff --git a/demo/retinanet_voc/test_detector_retinanet_voc.py b/demo/retinanet_voc/test_detector_retinanet_voc.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Sep  3 21:29:33 2019
+
+@author: ubuntu
+"""
+import cv2
+from model.runner_lib import Runner
+from utils.prepare_training import get_config
+from utils.evaluation import eval_dataset_det, DetPredictor
+from utils.tools import parse_log
+from utils.dataset_classes import get_classes
+from utils.visualization import vis_all_opencv, vis_all_pyplot, vis_cam
+
+def train_ssd(cfg_path, resume_from=None):
+
+    runner = Runner(cfg, resume_from)
+    runner.train()    
+
+
+
+if __name__ == "__main__":
+
+    task = 'train'
+    cfg_path = './cfg_detector_retinanet_resnet50_voc.py'
+    cfg = get_config(cfg_path)
+
+    if task == 'train':  # 模型训练
+        train_ssd(cfg, 
+                  resume_from=None)
+#    
+#    if task == 'log':
+#        parse_log(paths = ['/home/ubuntu/mytrain/ssd_vgg_voc/20191025_182352.log'])
+#        
+#    if task == 'eval':  # 数据集评估
+#        eval_dataset_det(cfg_path=cfg_path,
+#                         load_from = '/home/ubuntu/mytrain/ssd_vgg_voc/epoch_11.pth',
+#                         load_device='cuda')
+#    
+#    if task == 'load':  # 已有数据集评估文件，重新载入进行评估
+#        eval_dataset_det(cfg_path=cfg_path,
+#                         load_from = '/home/ubuntu/mytrain/ssd_vgg_voc/epoch_11.pth',
+#                         load_device='cuda',
+#                         result_file='/home/ubuntu/mytrain/ssd_vgg_voc/20190928_084133_eval_result.pkl')
+#    
+#    if task == 'test':  # 测试单张图或多张图的结果： cpu上0.649 sec， gpu上0.388 sec
+#        img = cv2.imread('/home/ubuntu/MyDatasets/misc/test13.jpg')
+#        predictor = DetPredictor(cfg_path,                         
+#                                 load_from = '/home/ubuntu/mytrain/ssd_vgg_voc/epoch_61.pth',
+#                                 load_device='cpu')
+#        for results in predictor([img]):
+#            vis_all_pyplot(*results, class_names=get_classes('voc'), score_thr=0.5)
+#    
+#    if task == 'video': # 测试视频预测结果：注意方法稍有不同，vis_cam需要传入一个predictor
+#        src = 0  # src可以等于int(cam_id), str(video path), list(img_list)
+#        predictor = DetPredictor(cfg_path,                         
+#                                 load_from = '/home/ubuntu/mytrain/ssd_vgg_voc/epoch_11.pth',
+#                                 load_device='cpu')
+#        vis_cam(src, predictor, class_names=get_classes('voc'), score_thr=0.2)
+#            
diff --git a/model/__pycache__/get_target_lib.cpython-37.pyc b/model/__pycache__/get_target_lib.cpython-37.pyc
diff --git a/model/__pycache__/loss_lib.cpython-37.pyc b/model/__pycache__/loss_lib.cpython-37.pyc
diff --git a/model/bbox_head/__pycache__/retinanet_head.cpython-37.pyc b/model/bbox_head/__pycache__/retinanet_head.cpython-37.pyc
diff --git a/model/bbox_head/__pycache__/ssd_head.cpython-37.pyc b/model/bbox_head/__pycache__/ssd_head.cpython-37.pyc
diff --git a/model/bbox_head/retinanet_head.py b/model/bbox_head/retinanet_head.py
@@ -10,6 +10,7 @@
 from functools import partial
 
 from utils.init_weights import normal_init, bias_init_with_prob
+from utils.tools import one_hot_encode
 from model.get_target_lib import get_anchor_target
 from model.anchor_generator_lib import AnchorGenerator
 from model.loss_lib import SigmoidFocalLoss, SmoothL1Loss
@@ -25,34 +26,38 @@
                 target_means=(.0, .0, .0, .0),
                 target_stds=(0.1, 0.1, 0.2, 0.2)))
 """
-def conv3x3(in_channels, out_channels, stride, padding, bias):
-
-    return nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding, bias=bias),
-                         nn.ReLU(inplace=True))
+#def conv3x3(in_channels, out_channels, stride, padding, bias):
+#    
+#    return nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding, bias=bias),
+#                         nn.ReLU(inplace=True))
 
 class ClassHead(nn.Module):
     """针对单层特征的分类模块"""
     def __init__(self, in_channels, num_anchors, num_classes):
         super().__init__()
         self.num_classes = num_classes
-        self.cls_convs = nn.ModuleList()
-        for _ in range(4):
-            self.cls_convs.append(conv3x3(in_channels, in_channels, 1, 1, True))
-
+        self.cls_convs = nn.Sequential(nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
+                                       nn.ReLU(inplace=True),
+                                       nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
+                                       nn.ReLU(inplace=True),
+                                       nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
+                                       nn.ReLU(inplace=True),
+                                       nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
+                                       nn.ReLU(inplace=True))
         self.cls_head = nn.Conv2d(in_channels, num_anchors * num_classes, 3, stride=1, padding=1)
 
     def forward(self, x):
-        for conv in self.cls_convs:  # retinanet有4个conv3x3
-            x = conv(x)
+        x = self.cls_convs(x)
         out = self.cls_head(x)
         out = out.permute(0, 2, 3, 1).contiguous()
 #        out = out.view(out.shape[0], -1, self.num_classes)  
         out = out.view(int(out.size(0)), int(-1), int(self.num_classes))
         return out
 
-    def init_weight(self):
+    def init_weights(self):
         for m in self.cls_convs:
-            normal_init(m, std=0.01)
+            if isinstance(m, nn.Conv2d):
+                normal_init(m, std=0.01)
         bias_cls = bias_init_with_prob(0.01)
         normal_init(self.cls_head, std=0.01, bias=bias_cls)
 
@@ -61,73 +66,77 @@ class BboxHead(nn.Module):
     """针对单层特征的bbox回归模块"""
     def __init__(self, in_channels, num_anchors):
         super().__init__()
-        self.reg_convs = nn.ModuleList()
-        for _ in range(4):
-            self.reg_convs.append(conv3x3(in_channels, in_channels, 1, 1, True))
+        self.reg_convs = nn.Sequential(nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
+                                       nn.ReLU(inplace=True),
+                                       nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
+                                       nn.ReLU(inplace=True),
+                                       nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
+                                       nn.ReLU(inplace=True),
+                                       nn.Conv2d(in_channels, in_channels, 3, 1, 1, True),
+                                       nn.ReLU(inplace=True))
         self.reg_head = nn.Conv2d(in_channels, num_anchors * 4, 3, stride=1, padding=1)
 
     def forward(self, x):
-        for conv in self.reg_convs:
-            x = conv(x)
+        x = self.reg_convs(x)
         out = self.reg_head(x)
         out = out.permute(0, 2, 3, 1).contiguous()
 #        out = out.view(out.shape[0], -1, 4)
         out = out.view(int(out.size(0)), int(-1), int(4))
         return out     
 
-    def init_weight(self):
+    def init_weights(self):
         for m in self.reg_convs:
-            normal_init(m, std=0.01)
+            if isinstance(m, nn.Conv2d):
+                normal_init(m, std=0.01)
         bias_cls = bias_init_with_prob(0.01)
         normal_init(self.reg_head, std=0.01, bias=bias_cls)
 
 
-
 class RetinaNetHead(nn.Module):
     """retina head"""
     def __init__(self, 
                  input_size=(1333, 800),
                  num_classes=21,
-                 in_channels=256,
+                 in_channels=(256, 256, 256, 256, 256),
                  base_scale=4,
                  ratios = [1/2, 1, 2],
                  anchor_strides=(8, 16, 32, 64, 128),
                  target_means=(.0, .0, .0, .0),
                  target_stds=(0.1, 0.1, 0.2, 0.2),
-                 loss_cls_cfg=None,
-                 loss_reg_cfg=None,
+                 alpha=0.25,
+                 gamma=2,
                  **kwargs):
 
         super().__init__()        
-
+        self.num_classes = num_classes
         # 参数
         """retinanet生成anchor的逻辑：3个核心参数的定义过程
         base_size = [8, 16, 32, 64, 128] 采用的就是strides
         scales = 4*[2**(i/3) for i in range(3)] 采用的是在基础比例[1, 1.2, 1.5]的基础上乘以4, 其中基础比例的定义感觉是经验，乘以4感觉是为了匹配原图
         定义了一个octave_base_scale=4，然后定义了sctave_scales=[1, 1.2599, 1.5874]"""
-        scales = base_scale * [2**(i / 3) for i in range(3)]
+        scales =  [base_scale * 2**(i / 3) for i in range(3)]
         base_sizes = anchor_strides
         # 创建anchor生成器
         self.anchor_generators = []
-        for i in range(len(in_channels)):
-            anchor_generator = AnchorGenerator(base_sizes[i], scales[i], 
-                                               ratios[i], scale_major=False) 
+        for i in range(len(anchor_strides)):
+            anchor_generator = AnchorGenerator(base_sizes[i], scales, 
+                                               ratios, scale_major=False) 
             self.anchor_generators.append(anchor_generator)
         # 创建分类回归头
         num_anchors = len(ratios) * len(scales)
-        self.cls_head = ClassHead(in_channels, num_anchors, num_classes-1)
-        self.reg_head = BboxHead(in_channels, num_anchors)
+        self.cls_head = ClassHead(in_channels[0], num_anchors, num_classes-1)
+        self.reg_head = BboxHead(in_channels[0], num_anchors)
 
         # 创建损失函数
-        self.loss_cls = SigmoidFocalLoss()
+        self.loss_cls = SigmoidFocalLoss(alpha=alpha, gamma=gamma)
         self.loss_bbox = SmoothL1Loss()
 
-    def init_weight(self):
-        self.cls_head.init_weight()
-        self.reg_head.init_weight()
+    def init_weights(self):
+        self.cls_head.init_weights()
+        self.reg_head.init_weights()
 
     def forward(self, x):
-        self.featmap_sizes = [feat.shape[2] for feat in x]
+        self.featmap_sizes = [feat.shape[2:] for feat in x] 
         cls_scores = []
         bbox_preds = []
         for feat in x:
@@ -136,7 +145,12 @@ def forward(self, x):
         return cls_scores, bbox_preds  # 这是模型最终输出，最好不用dict，避免跟onnx inference冲突
 
     def get_losses(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, cfg, **kwargs):
-        """跟ssd的结构一样"""
+        """retinanet
+        cls_scores()
+        bbox_preds()
+        gt_bboxes()
+        gt_labels()
+        """
         num_imgs = len(gt_labels)
         multi_layer_anchors = []
         for i in range(len(self.featmap_sizes)):
@@ -155,16 +169,20 @@ def get_losses(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, cfg, **kwargs
         """retinanet的变化：只取正样本数量作为total_sample"""
 
         """retinanet的变化：labels需要转换成独热编码方式输入focal loss"""
-
+        labels_t = one_hot_encode(labels_t, self.num_classes-1)
+        labels_w = labels_w.view(-1, 1).expand(labels_w.size(0), self.num_classes-1)
+        """retinanet的变化：计算损失时是把1个batch的比如4张图的某一特征层的labels, weights放在一起算，即(b, -1, 20)reshape成(-1, 20)
+        但我这里调整了，改成一张图的5个featmap放在一起算，4张图就map4次
+        """
+        # cls分类损失
+        pfunc = partial(self.loss_cls, avg_factor=num_pos)
+        loss_cls = list(map(pfunc, cls_scores, labels_t, labels_w))
+#        loss_cls = [loss_cls[i] * labels_w[i].float() for i in range(len(loss_cls))]  # (b,)(8732,)
+        # cls loss的ohem
+#        pfunc = partial(ohem, neg_pos_ratio=self.neg_pos_ratio, avg_factor=num_pos)
+#        loss_cls = list(map(pfunc, loss_cls, labels_t))   # (b,)
         # bbox回归损失
         pfunc = partial(self.loss_bbox, avg_factor=num_pos)
         loss_bbox = list(map(pfunc, bbox_preds, bboxes_t, bboxes_w))  # (b,)
-        # cls分类损失
-        loss_cls = list(map(self.loss_cls, cls_scores, labels_t))
-        loss_cls = [loss_cls[i] * labels_w[i].float() for i in range(len(loss_cls))]  # (b,)(8732,)
-        # cls loss的ohem
-        pfunc = partial(ohem, neg_pos_ratio=self.neg_pos_ratio, avg_factor=num_pos)
-        loss_cls = list(map(pfunc, loss_cls, labels_t))   # (b,)
-
         return dict(loss_cls = loss_cls, loss_bbox = loss_bbox)  # {(b,), (b,)} 每张图对应一个分类损失值和一个回归损失值。