update

ximitiejiang · Dec 3, 2019 · 7e23899 · 7e23899
1 parent e130a87
commit 7e23899
Show file tree

Hide file tree

Showing 14 changed files with 153 additions and 29 deletions.
diff --git a/demo/readme_base.md b/demo/readme_base.md
@@ -412,6 +412,8 @@
 0. 先写结论：通过重写data_parallel_model以及里边的scatter()函数，来拆开data_container，并送model, batch_data到指定gpu设备。
    并通过to_tensor函数，把数据转换成pytorch需要的float(), Long()，防止了训练出错。
    整个过程封装得非常隐蔽，虽然减少了用户出错的几率，但也让使用者不清楚应该有什么是需要做的，有什么是系统帮忙做掉的，在哪做掉的。
+   并且mmdetection当前只支持dataparallel模式，即使gpu=1也必须设置成dataparallel模式，因为必须依靠dataparallel模式里边
+   的scatter函数对data container拆包。如果不是dataparallel模式则在前向计算result = model(img)时会报错。
    
    具体来说：
     - 定义了一个data container，用来打包数据

diff --git a/demo/retinanet_voc/cfg_detector_retinanet_resnet50_voc.py b/demo/retinanet_voc/cfg_detector_retinanet_resnet50_voc.py
@@ -41,7 +41,7 @@
         type='resnet',
         params=dict(
                 depth=50,
-                pretrained= '/home/ubuntu/MyWeights/resnet50-19c8e357.pth',
+                pretrained= '/home/ubuntu/MyWeights/pytorch/resnet50-19c8e357.pth',
                 out_indices=(0, 1, 2, 3),
                 strides=(1, 2, 2, 2)))
 

diff --git a/model/bbox_head/retinanet_head.py b/model/bbox_head/retinanet_head.py
@@ -6,9 +6,13 @@
 @author: ubuntu
 """
 import torch.nn as nn
+import torch
+from functools import partial
 
+from utils.init_weights import normal_init, bias_init_with_prob
+from model.get_target_lib import get_anchor_target
 from model.anchor_generator_lib import AnchorGenerator
-from model.loss_lib import FocalLoss
+from model.loss_lib import SigmoidFocalLoss, SmoothL1Loss
 """
 header=dict(
         type='retina_head',
@@ -21,37 +25,61 @@
                 target_means=(.0, .0, .0, .0),
                 target_stds=(0.1, 0.1, 0.2, 0.2)))
 """
-def conv3x3(in_channels):
-    return nn.Sequential(nn.Conv2d(in_channels, out_channels),
-                         nn.ReLU())
+def conv3x3(in_channels, out_channels, stride, padding, bias):
+
+    return nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding, bias=bias),
+                         nn.ReLU(inplace=True))
 
 class ClassHead(nn.Module):
     """针对单层特征的分类模块"""
     def __init__(self, in_channels, num_anchors, num_classes):
         super().__init__()
         self.num_classes = num_classes
-        self.conv3x3 = nn.Conv2d(in_channels, num_anchors * num_classes, 3, stride=1, padding=1)
+        self.cls_convs = nn.ModuleList()
+        for _ in range(4):
+            self.cls_convs.append(conv3x3(in_channels, in_channels, 1, 1, True))
+
+        self.cls_head = nn.Conv2d(in_channels, num_anchors * num_classes, 3, stride=1, padding=1)
 
     def forward(self, x):
-        out = self.conv3x3(x)
+        for conv in self.cls_convs:  # retinanet有4个conv3x3
+            x = conv(x)
+        out = self.cls_head(x)
         out = out.permute(0, 2, 3, 1).contiguous()
 #        out = out.view(out.shape[0], -1, self.num_classes)  
         out = out.view(int(out.size(0)), int(-1), int(self.num_classes))
         return out
+
+    def init_weight(self):
+        for m in self.cls_convs:
+            normal_init(m, std=0.01)
+        bias_cls = bias_init_with_prob(0.01)
+        normal_init(self.cls_head, std=0.01, bias=bias_cls)
 
 
 class BboxHead(nn.Module):
     """针对单层特征的bbox回归模块"""
     def __init__(self, in_channels, num_anchors):
         super().__init__()
-        self.conv3x3 = nn.Conv2d(in_channels, num_anchors * 4, 3, stride=1, padding=1)
+        self.reg_convs = nn.ModuleList()
+        for _ in range(4):
+            self.reg_convs.append(conv3x3(in_channels, in_channels, 1, 1, True))
+        self.reg_head = nn.Conv2d(in_channels, num_anchors * 4, 3, stride=1, padding=1)
 
     def forward(self, x):
-        out = self.conv3x3(x)
+        for conv in self.reg_convs:
+            x = conv(x)
+        out = self.reg_head(x)
         out = out.permute(0, 2, 3, 1).contiguous()
 #        out = out.view(out.shape[0], -1, 4)
         out = out.view(int(out.size(0)), int(-1), int(4))
         return out     
+
+    def init_weight(self):
+        for m in self.reg_convs:
+            normal_init(m, std=0.01)
+        bias_cls = bias_init_with_prob(0.01)
+        normal_init(self.reg_head, std=0.01, bias=bias_cls)
 
 
 
@@ -64,6 +92,8 @@ def __init__(self,
                  base_scale=4,
                  ratios = [1/2, 1, 2],
                  anchor_strides=(8, 16, 32, 64, 128),
+                 target_means=(.0, .0, .0, .0),
+                 target_stds=(0.1, 0.1, 0.2, 0.2),
                  loss_cls_cfg=None,
                  loss_reg_cfg=None,
                  **kwargs):
@@ -83,18 +113,58 @@ def __init__(self,
             anchor_generator = AnchorGenerator(base_sizes[i], scales[i], 
                                                ratios[i], scale_major=False) 
             self.anchor_generators.append(anchor_generator)
-
         # 创建分类回归头
-        self.cls_convs = nn.ModuleList()
-        self.reg_convs = nn.ModuleList()
-        for _ in range(4):
-            self.cls_convs.append(conv3x3())
-            self.reg_convs.append(conv3x3())
-        num_anchors = len(ratios) * len()
-        self.cls_head = ClassHead(in_channels, )
-        self.reg_head = BboxHead()
+        num_anchors = len(ratios) * len(scales)
+        self.cls_head = ClassHead(in_channels, num_anchors, num_classes-1)
+        self.reg_head = BboxHead(in_channels, num_anchors)
 
         # 创建损失函数
-        self.loss_cls = CrossEntropyLoss()
+        self.loss_cls = SigmoidFocalLoss()
         self.loss_bbox = SmoothL1Loss()
+
+    def init_weight(self):
+        self.cls_head.init_weight()
+        self.reg_head.init_weight()
+
+    def forward(self, x):
+        self.featmap_sizes = [feat.shape[2] for feat in x]
+        cls_scores = []
+        bbox_preds = []
+        for feat in x:
+            cls_scores.append(self.cls_head(feat))
+            bbox_preds.append(self.reg_head(feat))
+        return cls_scores, bbox_preds  # 这是模型最终输出，最好不用dict，避免跟onnx inference冲突
+
+    def get_losses(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, cfg, **kwargs):
+        """跟ssd的结构一样"""
+        num_imgs = len(gt_labels)
+        multi_layer_anchors = []
+        for i in range(len(self.featmap_sizes)):
+            device = cls_scores.device
+            anchors = self.anchor_generators[i].grid_anchors(
+                self.featmap_sizes[i], self.anchor_strides[i], device=device)
+            multi_layer_anchors.append(anchors)  # (6,)(k, 4)
+        multi_layer_anchors = torch.cat(multi_layer_anchors, dim=0)  # 堆叠(8732, 4)    
+        anchor_list = [multi_layer_anchors for _ in range(num_imgs)]  # (b,) (s,4)
+        # 计算target: None表示gt_landmarks=None
+        target_result = get_anchor_target(anchor_list, gt_bboxes, gt_labels, None,
+                                          cfg.assigner, cfg.sampler,
+                                          self.target_means, self.target_stds)
+        # 解析target
+        bboxes_t, bboxes_w, labels_t, labels_w, _, _, num_pos, num_neg = target_result  # (b,-1,4)x2, (b,-1)x2
+        """retinanet的变化：只取正样本数量作为total_sample"""
+
+        """retinanet的变化：labels需要转换成独热编码方式输入focal loss"""
+
+        # bbox回归损失
+        pfunc = partial(self.loss_bbox, avg_factor=num_pos)
+        loss_bbox = list(map(pfunc, bbox_preds, bboxes_t, bboxes_w))  # (b,)
+        # cls分类损失
+        loss_cls = list(map(self.loss_cls, cls_scores, labels_t))
+        loss_cls = [loss_cls[i] * labels_w[i].float() for i in range(len(loss_cls))]  # (b,)(8732,)
+        # cls loss的ohem
+        pfunc = partial(ohem, neg_pos_ratio=self.neg_pos_ratio, avg_factor=num_pos)
+        loss_cls = list(map(pfunc, loss_cls, labels_t))   # (b,)
+
+        return dict(loss_cls = loss_cls, loss_bbox = loss_bbox)  # {(b,), (b,)} 每张图对应一个分类损失值和一个回归损失值。        
 
diff --git a/model/loss_lib.py b/model/loss_lib.py
@@ -92,8 +92,8 @@ def focal_loss(pred, target, alpha=0.25, gamma=2.0):
     focal_loss = - at*(1-pt)^gamma * log(pt), 其中at = a*t+(1-a)(1-t), pt = p*t+(1-p)(1-t)
     当t=1时
     """
-    pred_s = pred.sigmoid()  # 用真实概率p进行weight的计算，但二值交叉熵输入非概率化p，因为内部自带了sigmoid
-    pt = pred_s * target + (1 - pred_s) * (1 - target)
+    pred_sig = pred.sigmoid()  # 用真实概率p进行weight的计算，但二值交叉熵输入非概率化p，因为内部自带了sigmoid
+    pt = pred_sig * target + (1 - pred_sig) * (1 - target)
     at = alpha * target + (1 - alpha) * (1 - target)
     weight = at * (1 - pt).pow(gamma)
     loss = F.binary_cross_entropy(pred, target, weight, reduction='none')

diff --git a/utils/tools.py b/utils/tools.py
@@ -6,13 +6,50 @@
 @author: ubuntu
 """
 import torch
+import numpy as np
 import os
 import time
 import six
 import pickle
 #from contextlib import ContextDecorator
 from functools import wraps
 
+# %%
+def one_hot_encode(t, n_column=None):
+    """pytorch版本的独热编码生成
+    args:
+        t (b, ) 代表b个样本的标签，取值要从0开始，比如[0,1,2,3]
+        one_hot_t (b, )
+    """
+    t = t.long()
+    if n_column is None:
+        n_column = t.max().item() + 1
+    t = t.reshape(-1, 1)  # 转为列
+    one_hot_t = torch.FloatTensor(len(t), n_column).zero_()  # 先创建全0的独热编码
+    one_hot_t.scatter_(1, t, 1)    # 生成独热编码
+    return one_hot_t
+
+
+def label_to_onehot(labels):
+    """numpy版本的标签转换为独热编码：输入的labels需要是从0开始的整数，比如[0,1,2,...]
+    输出的独热编码为[[1,0,0,...],
+                  [0,1,0,...],
+                  [0,0,1,...]]  分别代表0/1/2的独热编码
+    """
+    assert labels.ndim ==1, 'labels should be 1-dim array.'
+    labels = labels.astype(np.int8)
+    n_col = int(np.max(labels) + 1)   # 独热编码列数，这里可以额外增加列数，填0即可，默认是最少列数
+    one_hot = np.zeros((labels.shape[0], n_col))
+    one_hot[np.arange(labels.shape[0]), labels] = 1
+    return one_hot  # (n_samples, n_col)
+
+
+def onehot_to_label(one_hot_labels):
+    """把独热编码变回0-k的数字编码"""
+    labels = np.argmax(one_hot_labels, axis=1)  # 提取最大值1所在列即原始从0开始的标签
+    return labels
+
+
 # %%
 class ContextDecorator(object):
     """该类来自：from contextlib import ContextDecorator
@@ -326,5 +363,6 @@ def get_dist_info():
 # %%
 
 if __name__ == "__main__":
-    hello(1)
-    hello2(2)
+    t = torch.tensor([2, 0 , 9])
+    onehot = one_hot_encode(t)
+    print(onehot)
diff --git a/v01/TRAIN_voc.py b/v01/TRAIN_voc.py
@@ -92,7 +92,7 @@ def train(cfg_path, dataset_class):
     """
     # 初始化2个默认选项
     distributed = False
-    parallel = True
+    parallel = True      # 必须设置成dataparallel模式，否则data container无法拆包(因为data container的拆包是在data parallel的scatter函数中进行的)
 
     # get cfg
     cfg = Config.fromfile(cfg_path)

diff --git a/v01/config/__pycache__/cfg_retinanet_r50_fpn_voc.cpython-37.pyc b/v01/config/__pycache__/cfg_retinanet_r50_fpn_voc.cpython-37.pyc
diff --git a/v01/config/cfg_retinanet_r50_fpn_voc.py b/v01/config/cfg_retinanet_r50_fpn_voc.py
@@ -1,7 +1,7 @@
 # model settings
 model = dict(
     type='RetinaNet',
-    pretrained='modelzoo://resnet50',  # 使用pytorch的resnet50的权重
+    pretrained='/home/ubuntu/MyWeights/pytorch/resnet50-19c8e357.pth',  # 使用pytorch的resnet50的权重
     backbone=dict(
         type='ResNet',
         depth=50,
@@ -50,7 +50,7 @@
     max_per_img=100)
 # dataset settings
 dataset_type = 'VOCDataset'
-data_root = './data/VOCdevkit/'
+data_root = '/home/ubuntu/MyDatasets0/voc/VOCdevkit/'
 img_norm_cfg = dict(
     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)  # 采用的是pytorch的模型，但mean/std还是用的caffe的？？？
 data = dict(           # repeatdataset不加了，在coco训练12epoch，voc上调成24

diff --git a/v01/dataset/__pycache__/extra_aug.cpython-37.pyc b/v01/dataset/__pycache__/extra_aug.cpython-37.pyc
diff --git a/v01/utils/anchor_target.py b/v01/utils/anchor_target.py
@@ -531,13 +531,13 @@ def unmap(data, count, inds, fill=0):
         ret[inds, :] = data
     return ret
 
-
+# 把原始标签转换为独热标签one_hot_code： 因为该标签需要送入focal loss进行内部的二值交叉熵计算，必须采用独热标签。
 def expand_binary_labels(labels, label_weights, label_channels):
     bin_labels = labels.new_full(
-        (labels.size(0), label_channels), 0, dtype=torch.float32)
+        (labels.size(0), label_channels), 0, dtype=torch.float32)  # (182403, 20)
     inds = torch.nonzero(labels >= 1).squeeze()
     if inds.numel() > 0:
-        bin_labels[inds, labels[inds] - 1] = 1
+        bin_labels[inds, labels[inds] - 1] = 1   # 注意这里需要把原始标签(1-20)转换到0-19，因为独热编码需要从0开始
     bin_label_weights = label_weights.view(-1, 1).expand(
         label_weights.size(0), label_channels)
     return bin_labels, bin_label_weights

diff --git a/v01/work_dirs/retinanet_voc/20191203_165500.log b/v01/work_dirs/retinanet_voc/20191203_165500.log
@@ -0,0 +1,6 @@
+2019-12-03 16:55:19,008 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
+2019-12-03 16:55:19,201 - INFO - workflow: [('train', 1)], max: 20 epochs
+2019-12-03 16:59:37,517 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
+2019-12-03 16:59:37,960 - INFO - workflow: [('train', 1)], max: 20 epochs
+2019-12-03 17:06:28,780 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
+2019-12-03 17:06:28,781 - INFO - workflow: [('train', 1)], max: 20 epochs
diff --git a/v01/work_dirs/retinanet_voc/20191203_165921.log b/v01/work_dirs/retinanet_voc/20191203_165921.log
@@ -0,0 +1,4 @@
+2019-12-03 16:59:37,517 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
+2019-12-03 16:59:37,960 - INFO - workflow: [('train', 1)], max: 20 epochs
+2019-12-03 17:06:28,780 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
+2019-12-03 17:06:28,781 - INFO - workflow: [('train', 1)], max: 20 epochs
diff --git a/v01/work_dirs/retinanet_voc/20191203_170628.log b/v01/work_dirs/retinanet_voc/20191203_170628.log
@@ -0,0 +1,2 @@
+2019-12-03 17:06:28,780 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
+2019-12-03 17:06:28,781 - INFO - workflow: [('train', 1)], max: 20 epochs
diff --git a/v01/work_dirs/retinanet_voc/20191203_170659.log b/v01/work_dirs/retinanet_voc/20191203_170659.log
@@ -0,0 +1,2 @@
+2019-12-03 17:06:59,705 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
+2019-12-03 17:06:59,706 - INFO - workflow: [('train', 1)], max: 20 epochs
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		2019-12-03 17:06:28,780 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
		2019-12-03 17:06:28,781 - INFO - workflow: [('train', 1)], max: 20 epochs
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		2019-12-03 17:06:59,705 - INFO - Start running, host: ubuntu@ubun, work_dir: /home/ubuntu/suliang_git/deep_learning_algorithm/v01/work_dirs/retinanet_voc
		2019-12-03 17:06:59,706 - INFO - workflow: [('train', 1)], max: 20 epochs