Support amp and resume training in fastface

L1aoXingyu · L1aoXingyu · commit c3ac4f504cba · 2021-05-31T17:30:43.000+08:00
AMP in partial-fc needs to be done only on backbone; In order to impl `resume training`, need to save &amp; load different part of classifier weight in each GPU.
diff --git a/projects/FastFace/configs/face_base.yml b/projects/FastFace/configs/face_base.yml
@@ -4,6 +4,9 @@ MODEL:
   PIXEL_MEAN: [127.5, 127.5, 127.5]
   PIXEL_STD: [127.5, 127.5, 127.5]
 
+  BACKBONE:
+    NAME: build_iresnet_backbone
+
   HEADS:
     NAME: FaceHead
     WITH_BNNECK: True
@@ -30,7 +33,7 @@ MODEL:
 DATASETS:
   REC_PATH: /export/home/DATA/Glint360k/train.rec
   NAMES: ("MS1MV2",)
-  TESTS: ("CPLFW", "VGG2_FP", "CALFW", "CFP_FF", "CFP_FP", "AgeDB_30", "LFW")
+  TESTS: ("CFP_FP", "AgeDB_30", "LFW")
 
 INPUT:
   SIZE_TRAIN: [0,]  # No need of resize
@@ -47,10 +50,10 @@ DATALOADER:
 SOLVER:
   MAX_EPOCH: 20
   AMP:
-    ENABLED: False
+    ENABLED: True
 
   OPT: SGD
-  BASE_LR: 0.1
+  BASE_LR: 0.05
   MOMENTUM: 0.9
 
   SCHED: MultiStepLR
@@ -59,10 +62,10 @@ SOLVER:
   BIAS_LR_FACTOR: 1.
   WEIGHT_DECAY: 0.0005
   WEIGHT_DECAY_BIAS: 0.0005
-  IMS_PER_BATCH: 512
+  IMS_PER_BATCH: 256
 
   WARMUP_FACTOR: 0.1
-  WARMUP_ITERS: 5000
+  WARMUP_ITERS: 0
 
   CHECKPOINT_PERIOD: 1
 
diff --git a/projects/FastFace/configs/r50_ir.yml b/projects/FastFace/configs/r50_ir.yml
@@ -3,13 +3,12 @@ _BASE_: face_base.yml
 MODEL:
 
   BACKBONE:
-    NAME: build_resnetIR_backbone
     DEPTH: 50x
     FEAT_DIM: 25088 # 512x7x7
-    WITH_SE: True
+    DROPOUT: 0.
 
   HEADS:
     PFC:
       ENABLED: True
 
-OUTPUT_DIR: projects/FastFace/logs/ir_se50-glink360k-pfc0.1
+OUTPUT_DIR: projects/FastFace/logs/pfc0.1_insightface
diff --git a/projects/FastFace/fastface/__init__.py b/projects/FastFace/fastface/__init__.py
@@ -7,3 +7,4 @@
 from .modeling import *
 from .config import add_face_cfg
 from .trainer import FaceTrainer
+from .datasets import *
diff --git a/projects/FastFace/fastface/config.py b/projects/FastFace/fastface/config.py
@@ -12,5 +12,7 @@ def add_face_cfg(cfg):
 
     _C.DATASETS.REC_PATH = ""
 
+    _C.MODEL.BACKBONE.DROPOUT = 0.
+
     _C.MODEL.HEADS.PFC = CN({"ENABLED": False})
     _C.MODEL.HEADS.PFC.SAMPLE_RATE = 0.1
diff --git a/projects/FastFace/fastface/datasets/ms1mv2.py b/projects/FastFace/fastface/datasets/ms1mv2.py
@@ -23,7 +23,7 @@ def __init__(self, root="datasets", **kwargs):
         required_files = [self.dataset_dir]
         self.check_before_run(required_files)
 
-        train = self.process_dirs()
+        train = self.process_dirs()[:10000]
         super().__init__(train, [], [], **kwargs)
 
     def process_dirs(self):
diff --git a/projects/FastFace/fastface/modeling/__init__.py b/projects/FastFace/fastface/modeling/__init__.py
@@ -7,4 +7,4 @@
 from .partial_fc import PartialFC
 from .face_baseline import FaceBaseline
 from .face_head import FaceHead
-from .resnet_ir import build_resnetIR_backbone
+from .iresnet import build_iresnet_backbone
diff --git a/projects/FastFace/fastface/modeling/face_baseline.py b/projects/FastFace/fastface/modeling/face_baseline.py
@@ -4,6 +4,7 @@
 @contact: sherlockliao01@gmail.com
 """
 
+import torch
 from fastreid.modeling.meta_arch import Baseline
 from fastreid.modeling.meta_arch import META_ARCH_REGISTRY
 
@@ -13,12 +14,28 @@ class FaceBaseline(Baseline):
     def __init__(self, cfg):
         super().__init__(cfg)
         self.pfc_enabled = cfg.MODEL.HEADS.PFC.ENABLED
+        self.amp_enabled = cfg.SOLVER.AMP.ENABLED
 
-    def losses(self, outputs, gt_labels):
+    def forward(self, batched_inputs):
         if not self.pfc_enabled:
-            return super().losses(outputs, gt_labels)
+            return super().forward(batched_inputs)
+
+        images = self.preprocess_image(batched_inputs)
+        with torch.cuda.amp.autocast(self.amp_enabled):
+            features = self.backbone(images)
+        features = features.float() if self.amp_enabled else features
+
+        if self.training:
+            assert "targets" in batched_inputs, "Person ID annotation are missing in training!"
+            targets = batched_inputs["targets"]
+
+            # PreciseBN flag, When do preciseBN on different dataset, the number of classes in new dataset
+            # may be larger than that in the original dataset, so the circle/arcface will
+            # throw an error. We just set all the targets to 0 to avoid this problem.
+            if targets.sum() < 0: targets.zero_()
+
+            outputs = self.heads(features, targets)
+            return outputs, targets
         else:
-            # model parallel with partial-fc
-            # cls layer and loss computation in partial_fc.py
-            pred_features = outputs["features"]
-            return pred_features, gt_labels
+            outputs = self.heads(features)
+            return outputs
diff --git a/projects/FastFace/fastface/modeling/face_head.py b/projects/FastFace/fastface/modeling/face_head.py
@@ -30,10 +30,4 @@ def forward(self, features, targets=None):
             pool_feat = self.pool_layer(features)
             neck_feat = self.bottleneck(pool_feat)
             neck_feat = neck_feat[..., 0, 0]
-
-            if not self.training:
-                return neck_feat
-
-            return {
-                "features": neck_feat,
-            }
+            return neck_feat
diff --git a/projects/FastFace/fastface/modeling/iresnet.py b/projects/FastFace/fastface/modeling/iresnet.py
@@ -0,0 +1,179 @@
+# encoding: utf-8
+"""
+@author:  xingyu liao
+@contact: sherlockliao01@gmail.com
+"""
+
+import torch
+from torch import nn
+
+from fastreid.layers import get_norm
+from fastreid.modeling.backbones import BACKBONE_REGISTRY
+
+
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes,
+                     out_planes,
+                     kernel_size=1,
+                     stride=stride,
+                     bias=False)
+
+
+class IBasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, bn_norm, stride=1, downsample=None,
+                 groups=1, base_width=64, dilation=1):
+        super().__init__()
+        if groups != 1 or base_width != 64:
+            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        self.bn1 = get_norm(bn_norm, inplanes)
+        self.conv1 = conv3x3(inplanes, planes)
+        self.bn2 = get_norm(bn_norm, planes)
+        self.prelu = nn.PReLU(planes)
+        self.conv2 = conv3x3(planes, planes, stride)
+        self.bn3 = get_norm(bn_norm, planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+        out = self.bn1(x)
+        out = self.conv1(out)
+        out = self.bn2(out)
+        out = self.prelu(out)
+        out = self.conv2(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        return out
+
+
+class IResNet(nn.Module):
+    fc_scale = 7 * 7
+
+    def __init__(self, block, layers, bn_norm, dropout=0, zero_init_residual=False,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False):
+        super().__init__()
+        self.inplanes = 64
+        self.dilation = 1
+        self.fp16 = fp16
+        if replace_stride_with_dilation is None:
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = get_norm(bn_norm, self.inplanes)
+        self.prelu = nn.PReLU(self.inplanes)
+        self.layer1 = self._make_layer(block, 64, layers[0], bn_norm, stride=2)
+        self.layer2 = self._make_layer(block,
+                                       128,
+                                       layers[1],
+                                       bn_norm,
+                                       stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block,
+                                       256,
+                                       layers[2],
+                                       bn_norm,
+                                       stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.layer4 = self._make_layer(block,
+                                       512,
+                                       layers[3],
+                                       bn_norm,
+                                       stride=2,
+                                       dilate=replace_stride_with_dilation[2])
+        self.bn2 = get_norm(bn_norm, 512 * block.expansion)
+        self.dropout = nn.Dropout(p=dropout, inplace=True)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.normal_(m.weight, 0, 0.1)
+            elif m.__class__.__name__.find('Norm') != -1:
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, IBasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+
+    def _make_layer(self, block, planes, blocks, bn_norm, stride=1, dilate=False):
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                get_norm(bn_norm, planes * block.expansion),
+            )
+        layers = []
+        layers.append(
+            block(self.inplanes, planes, bn_norm, stride, downsample, self.groups,
+                  self.base_width, previous_dilation))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(
+                block(self.inplanes,
+                      planes,
+                      bn_norm,
+                      groups=self.groups,
+                      base_width=self.base_width,
+                      dilation=self.dilation))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.prelu(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.bn2(x)
+        x = self.dropout(x)
+        return x
+
+
+@BACKBONE_REGISTRY.register()
+def build_iresnet_backbone(cfg):
+    """
+    Create a IResNet instance from config.
+    Returns:
+        ResNet: a :class:`ResNet` instance.
+    """
+
+    # fmt: off
+    bn_norm = cfg.MODEL.BACKBONE.NORM
+    depth   = cfg.MODEL.BACKBONE.DEPTH
+    dropout = cfg.MODEL.BACKBONE.DROPOUT
+    fp16    = cfg.SOLVER.AMP.ENABLED
+    # fmt: on
+
+    num_blocks_per_stage = {
+        '18x': [2, 2, 2, 2],
+        '34x': [3, 4, 6, 3],
+        '50x': [3, 4, 14, 3],
+        '100x': [3, 13, 30, 3],
+        '200x': [6, 26, 60, 6],
+    }[depth]
+
+    model = IResNet(IBasicBlock, num_blocks_per_stage, bn_norm, dropout, fp16=fp16)
+    return model
diff --git a/projects/FastFace/fastface/modeling/partial_fc.py b/projects/FastFace/fastface/modeling/partial_fc.py
@@ -52,23 +52,6 @@ def __init__(
 
         self.cls_layer = getattr(any_softmax, cls_type)(num_classes, scale, margin)
 
-        """ TODO: consider resume training
-        if resume:
-            try:
-                self.weight: torch.Tensor = torch.load(self.weight_name)
-                logging.info("softmax weight resume successfully!")
-            except (FileNotFoundError, KeyError, IndexError):
-                self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device)
-                logging.info("softmax weight resume fail!")
-
-            try:
-                self.weight_mom: torch.Tensor = torch.load(self.weight_mom_name)
-                logging.info("softmax weight mom resume successfully!")
-            except (FileNotFoundError, KeyError, IndexError):
-                self.weight_mom: torch.Tensor = torch.zeros_like(self.weight)
-                logging.info("softmax weight mom resume fail!")
-        else:
-        """
         self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device)
         self.weight_mom: torch.Tensor = torch.zeros_like(self.weight)
         logger.info("softmax weight init successfully!")
diff --git a/projects/FastFace/fastface/modeling/resnet_ir.py b/projects/FastFace/fastface/modeling/resnet_ir.py
diff --git a/projects/FastFace/fastface/pfc_checkpointer.py b/projects/FastFace/fastface/pfc_checkpointer.py
diff --git a/projects/FastFace/fastface/trainer.py b/projects/FastFace/fastface/trainer.py
diff --git a/projects/FastFace/fastface/utils_amp.py b/projects/FastFace/fastface/utils_amp.py
diff --git a/projects/FastFace/train_net.py b/projects/FastFace/train_net.py