Merge pull request #43 from undertherain/emil-WIP

Add ssd300
undertherain · Jun 9, 2020 · f7c7e1e · f7c7e1e
2 parents f5830c7 + bbabf73
commit f7c7e1e
Show file tree

Hide file tree

Showing 7 changed files with 271 additions and 0 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -19,6 +19,7 @@ install:
   - mkdir -p ~/.cache/benchmarker/models
   - wget https://github.com/thegopieffect/computer_vision/raw/master/CAFFE_DNN/deploy.prototxt.txt -O ~/.cache/benchmarker/models/res10_300x300_ssd_deploy.prototxt.txt
   - wget https://github.com/thegopieffect/computer_vision/raw/master/CAFFE_DNN/res10_300x300_ssd_iter_140000.caffemodel -O ~/.cache/benchmarker/models/res10_300x300_ssd_iter_140000.caffemodel
+  - wget https://api.ngc.nvidia.com/v2/models/nvidia/ssdpyt_fp32/versions/1/files/nvidia_ssdpyt_fp32_20190225.pt -O ~/.cache/benchmarker/models/nvidia_ssdpyt_fp32_20190225.pt
   - pip install -U -r requirements.txt
 
 script:

diff --git a/benchmarker/modules/problems/ssd300/__init__.py b/benchmarker/modules/problems/ssd300/__init__.py
diff --git a/benchmarker/modules/problems/ssd300/data.py b/benchmarker/modules/problems/ssd300/data.py
@@ -0,0 +1 @@
+from benchmarker.util.data.synthetic.img_300_cls import get_data
diff --git a/benchmarker/modules/problems/ssd300/pytorch.py b/benchmarker/modules/problems/ssd300/pytorch.py
@@ -0,0 +1,49 @@
+from pathlib import Path
+
+import torch
+
+from .pytorch_nvidia.model import SSD300
+
+
+# from https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/SpeechSynthesis/Tacotron2/inference.py
+def checkpoint_from_distributed(state_dict):
+    """
+    Checks whether checkpoint was generated by DistributedDataParallel. DDP
+    wraps model in additional "module.", it needs to be unwrapped for single
+    GPU inference.
+    :param state_dict: model's state dict
+    """
+    ret = False
+    for key, _ in state_dict.items():
+        if key.find("module.") != -1:
+            ret = True
+            break
+    return ret
+
+
+# from https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/SpeechSynthesis/Tacotron2/inference.py
+def unwrap_distributed(state_dict):
+    """
+    Unwraps model from DistributedDataParallel.
+    DDP wraps model in additional "module.", it needs to be removed for single
+    GPU inference.
+    :param state_dict: model's state dict
+    """
+    new_state_dict = {}
+    for key, value in state_dict.items():
+        new_key = key.replace("module.1.", "")
+        new_key = new_key.replace("module.", "")
+        new_state_dict[new_key] = value
+    return new_state_dict
+
+
+def get_kernel(params, unparsed_args=None):
+    ssd_cpu = SSD300()
+    CACHE = Path("~/.cache/benchmarker").expanduser()
+    PATH = CACHE.joinpath("models/nvidia_ssdpyt_fp32_20190225.pt")
+    ckpt = torch.load(PATH, map_location=lambda storage, loc: storage)
+    ckpt = ckpt["model"]
+    if checkpoint_from_distributed(ckpt):
+        ckpt = unwrap_distributed(ckpt)
+    ssd_cpu.load_state_dict(ckpt)
+    return ssd_cpu
diff --git a/benchmarker/modules/problems/ssd300/pytorch_nvidia/__init__.py b/benchmarker/modules/problems/ssd300/pytorch_nvidia/__init__.py
diff --git a/benchmarker/modules/problems/ssd300/pytorch_nvidia/model.py b/benchmarker/modules/problems/ssd300/pytorch_nvidia/model.py
@@ -0,0 +1,197 @@
+# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torch.nn as nn
+from torchvision.models.resnet import resnet18, resnet34, resnet50, resnet101, resnet152
+
+
+class ResNet(nn.Module):
+    def __init__(self, backbone='resnet50', backbone_path=None):
+        super().__init__()
+        if backbone == 'resnet18':
+            backbone = resnet18(pretrained=not backbone_path)
+            self.out_channels = [256, 512, 512, 256, 256, 128]
+        elif backbone == 'resnet34':
+            backbone = resnet34(pretrained=not backbone_path)
+            self.out_channels = [256, 512, 512, 256, 256, 256]
+        elif backbone == 'resnet50':
+            backbone = resnet50(pretrained=not backbone_path)
+            self.out_channels = [1024, 512, 512, 256, 256, 256]
+        elif backbone == 'resnet101':
+            backbone = resnet101(pretrained=not backbone_path)
+            self.out_channels = [1024, 512, 512, 256, 256, 256]
+        else:  # backbone == 'resnet152':
+            backbone = resnet152(pretrained=not backbone_path)
+            self.out_channels = [1024, 512, 512, 256, 256, 256]
+        if backbone_path:
+            backbone.load_state_dict(torch.load(backbone_path))
+
+
+        self.feature_extractor = nn.Sequential(*list(backbone.children())[:7])
+
+        conv4_block1 = self.feature_extractor[-1][0]
+
+        conv4_block1.conv1.stride = (1, 1)
+        conv4_block1.conv2.stride = (1, 1)
+        conv4_block1.downsample[0].stride = (1, 1)
+
+    def forward(self, x):
+        x = self.feature_extractor(x)
+        return x
+
+
+class SSD300(nn.Module):
+    def __init__(self, backbone=ResNet('resnet50')):
+        super().__init__()
+
+        self.feature_extractor = backbone
+
+        self.label_num = 81  # number of COCO classes
+        self._build_additional_features(self.feature_extractor.out_channels)
+        self.num_defaults = [4, 6, 6, 6, 4, 4]
+        self.loc = []
+        self.conf = []
+
+        for nd, oc in zip(self.num_defaults, self.feature_extractor.out_channels):
+            self.loc.append(nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1))
+            self.conf.append(nn.Conv2d(oc, nd * self.label_num, kernel_size=3, padding=1))
+
+        self.loc = nn.ModuleList(self.loc)
+        self.conf = nn.ModuleList(self.conf)
+        self._init_weights()
+
+    def _build_additional_features(self, input_size):
+        self.additional_blocks = []
+        for i, (input_size, output_size, channels) in enumerate(zip(input_size[:-1], input_size[1:], [256, 256, 128, 128, 128])):
+            if i < 3:
+                layer = nn.Sequential(
+                    nn.Conv2d(input_size, channels, kernel_size=1, bias=False),
+                    nn.BatchNorm2d(channels),
+                    nn.ReLU(inplace=True),
+                    nn.Conv2d(channels, output_size, kernel_size=3, padding=1, stride=2, bias=False),
+                    nn.BatchNorm2d(output_size),
+                    nn.ReLU(inplace=True),
+                )
+            else:
+                layer = nn.Sequential(
+                    nn.Conv2d(input_size, channels, kernel_size=1, bias=False),
+                    nn.BatchNorm2d(channels),
+                    nn.ReLU(inplace=True),
+                    nn.Conv2d(channels, output_size, kernel_size=3, bias=False),
+                    nn.BatchNorm2d(output_size),
+                    nn.ReLU(inplace=True),
+                )
+
+            self.additional_blocks.append(layer)
+
+        self.additional_blocks = nn.ModuleList(self.additional_blocks)
+
+    def _init_weights(self):
+        layers = [*self.additional_blocks, *self.loc, *self.conf]
+        for layer in layers:
+            for param in layer.parameters():
+                if param.dim() > 1: nn.init.xavier_uniform_(param)
+
+    # Shape the classifier to the view of bboxes
+    def bbox_view(self, src, loc, conf):
+        ret = []
+        for s, l, c in zip(src, loc, conf):
+            ret.append((l(s).view(s.size(0), 4, -1), c(s).view(s.size(0), self.label_num, -1)))
+
+        locs, confs = list(zip(*ret))
+        locs, confs = torch.cat(locs, 2).contiguous(), torch.cat(confs, 2).contiguous()
+        return locs, confs
+
+    def forward(self, x):
+        x = self.feature_extractor(x)
+
+        detection_feed = [x]
+        for l in self.additional_blocks:
+            x = l(x)
+            detection_feed.append(x)
+
+        # Feature Map 38x38x4, 19x19x6, 10x10x6, 5x5x6, 3x3x4, 1x1x4
+        locs, confs = self.bbox_view(detection_feed, self.loc, self.conf)
+
+        # For SSD 300, shall return nbatch x 8732 x {nlabels, nlocs} results
+        return locs, confs
+
+
+class Loss(nn.Module):
+    """
+        Implements the loss as the sum of the followings:
+        1. Confidence Loss: All labels, with hard negative mining
+        2. Localization Loss: Only on positive labels
+        Suppose input dboxes has the shape 8732x4
+    """
+    def __init__(self, dboxes):
+        super(Loss, self).__init__()
+        self.scale_xy = 1.0/dboxes.scale_xy
+        self.scale_wh = 1.0/dboxes.scale_wh
+
+        self.sl1_loss = nn.SmoothL1Loss(reduce=False)
+        self.dboxes = nn.Parameter(dboxes(order="xywh").transpose(0, 1).unsqueeze(dim = 0),
+            requires_grad=False)
+        # Two factor are from following links
+        # http://jany.st/post/2017-11-05-single-shot-detector-ssd-from-scratch-in-tensorflow.html
+        self.con_loss = nn.CrossEntropyLoss(reduce=False)
+
+    def _loc_vec(self, loc):
+        """
+            Generate Location Vectors
+        """
+        gxy = self.scale_xy*(loc[:, :2, :] - self.dboxes[:, :2, :])/self.dboxes[:, 2:, ]
+        gwh = self.scale_wh*(loc[:, 2:, :]/self.dboxes[:, 2:, :]).log()
+        return torch.cat((gxy, gwh), dim=1).contiguous()
+
+    def forward(self, ploc, plabel, gloc, glabel):
+        """
+            ploc, plabel: Nx4x8732, Nxlabel_numx8732
+                predicted location and labels
+
+            gloc, glabel: Nx4x8732, Nx8732
+                ground truth location and labels
+        """
+        mask = glabel > 0
+        pos_num = mask.sum(dim=1)
+
+        vec_gd = self._loc_vec(gloc)
+
+        # sum on four coordinates, and mask
+        sl1 = self.sl1_loss(ploc, vec_gd).sum(dim=1)
+        sl1 = (mask.float()*sl1).sum(dim=1)
+
+        # hard negative mining
+        con = self.con_loss(plabel, glabel)
+
+        # postive mask will never selected
+        con_neg = con.clone()
+        con_neg[mask] = 0
+        _, con_idx = con_neg.sort(dim=1, descending=True)
+        _, con_rank = con_idx.sort(dim=1)
+
+        # number of negative three times positive
+        neg_num = torch.clamp(3*pos_num, max=mask.size(1)).unsqueeze(-1)
+        neg_mask = con_rank < neg_num
+
+        #print(con.shape, mask.shape, neg_mask.shape)
+        closs = (con*(mask.float() + neg_mask.float())).sum(dim=1)
+
+        # avoid no object detected
+        total_loss = sl1 + closs
+        num_mask = (pos_num > 0).float()
+        pos_num = pos_num.float().clamp(min=1e-6)
+        ret = (total_loss*num_mask/pos_num).mean(dim=0)
+        return ret
diff --git a/test/pytorch/test_ssd300.py b/test/pytorch/test_ssd300.py
@@ -0,0 +1,23 @@
+# python -m benchmarker --framework=pytorch --problem=ssd300 --problem_size=4 --batch_size=2 --mode=inference
+
+import logging
+import unittest
+
+from ..helpers import run_module
+
+logging.basicConfig(level=logging.DEBUG)
+
+
+class PytorchSsd300Tests(unittest.TestCase):
+    def setUp(self):
+        self.args = [
+            "benchmarker",
+            "--problem=ssd300",
+            "--framework=pytorch",
+            "--problem_size=4",
+            "--batch_size=2",
+            "--mode=inference",
+        ]
+
+    def test_ssd300(self):
+        run_module(*self.args)