## You can download an NVIDIA model from torchhub

#### ... and then compress it as a tar.gz file, and use this for deployment:

In [2]:
!wget https://api.ngc.nvidia.com/v2/models/nvidia/ssdpyt_fp32/versions/2/files/nvidia_ssdpyt_fp32_190826.pt

--2020-04-24 15:54:51--  https://api.ngc.nvidia.com/v2/models/nvidia/ssdpyt_fp32/versions/2/files/nvidia_ssdpyt_fp32_190826.pt
Resolving api.ngc.nvidia.com (api.ngc.nvidia.com)... 54.71.37.245, 52.27.83.248
Connecting to api.ngc.nvidia.com (api.ngc.nvidia.com)|54.71.37.245|:443... connected.
HTTP request sent, awaiting response... 302 
Location: https://s3.us-west-2.amazonaws.com/prod-model-registry-ngc-bucket/org/nvidia/models/ssdpyt_fp32/versions/2/files/nvidia_ssdpyt_fp32_190826.pt?response-content-disposition=attachment%3B%20filename%3D%22nvidia_ssdpyt_fp32_190826.pt%22&response-content-type=application%2Foctet-stream&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEFAaCXVzLXdlc3QtMiJHMEUCIA7U%2BXJ6mzT8tb8Mo9lUxGOLx8rNi1nemb0LG%2Fo%2FFtPPAiEAsQ26nRd8H5Pnzn6LSdyWyweZaga8RmUNhQ0hx%2BOJiKIqtAMIeRACGgw3ODkzNjMxMzUwMjciDJq4RK1xUSe5HJDxvCqRA1ZFlF2tnV3TPtE%2BRIJsViqDJwCtFD7HEiyKFCoxLgMBI%2F%2FMQnXWxfJmcepE5tYhgCsSHReB560xcqfXAwdOKCIUZS5ZOdeuJoLWCQZ9Q9WDSZmswkAaqVCuqKpTkqGEzNEQaA6EzU7i8xVmH3PbW4nm8lc

In [12]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.pytorch.model import PyTorchModel
import tarfile
import torch
from skimage.io import imread
from skimage.transform import resize
import numpy as np

sess = sagemaker.Session()
role = get_execution_role()
bucket = sess.default_bucket() # can replace with your own s3 bucket! 

In [2]:
with tarfile.open('model.tar.gz', mode='w:gz') as archive:
    archive.add('nvidia_ssdpyt_fp32_190826.pt')

In [7]:
modelpath = sess.upload_data(
    path='model.tar.gz', bucket=bucket,
    key_prefix='sagemaker-pytorch/input')

In [15]:
%%writefile transform_script.py

import torch
import os
import numpy as np

import torch.nn as nn
from torchvision.models.resnet import resnet18, resnet34, resnet50, resnet101, resnet152

class ResNet(nn.Module):
    def __init__(self, backbone='resnet50', backbone_path=None):
        super().__init__()
        if backbone == 'resnet18':
            backbone = resnet18(pretrained=not backbone_path)
            self.out_channels = [256, 512, 512, 256, 256, 128]
        elif backbone == 'resnet34':
            backbone = resnet34(pretrained=not backbone_path)
            self.out_channels = [256, 512, 512, 256, 256, 256]
        elif backbone == 'resnet50':
            backbone = resnet50(pretrained=not backbone_path)
            self.out_channels = [1024, 512, 512, 256, 256, 256]
        elif backbone == 'resnet101':
            backbone = resnet101(pretrained=not backbone_path)
            self.out_channels = [1024, 512, 512, 256, 256, 256]
        else:  # backbone == 'resnet152':
            backbone = resnet152(pretrained=not backbone_path)
            self.out_channels = [1024, 512, 512, 256, 256, 256]
        if backbone_path:
            backbone.load_state_dict(torch.load(backbone_path))


        self.feature_extractor = nn.Sequential(*list(backbone.children())[:7])

        conv4_block1 = self.feature_extractor[-1][0]

        conv4_block1.conv1.stride = (1, 1)
        conv4_block1.conv2.stride = (1, 1)
        conv4_block1.downsample[0].stride = (1, 1)

    def forward(self, x):
        x = self.feature_extractor(x)
        return x

class SSD300(nn.Module):
    def __init__(self, backbone=ResNet('resnet50')):
        super().__init__()

        self.feature_extractor = backbone

        self.label_num = 81  # number of COCO classes
        self._build_additional_features(self.feature_extractor.out_channels)
        self.num_defaults = [4, 6, 6, 6, 4, 4]
        self.loc = []
        self.conf = []

        for nd, oc in zip(self.num_defaults, self.feature_extractor.out_channels):
            self.loc.append(nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1))
            self.conf.append(nn.Conv2d(oc, nd * self.label_num, kernel_size=3, padding=1))

        self.loc = nn.ModuleList(self.loc)
        self.conf = nn.ModuleList(self.conf)
        self._init_weights()

    def _build_additional_features(self, input_size):
        self.additional_blocks = []
        for i, (input_size, output_size, channels) in enumerate(zip(input_size[:-1], input_size[1:], [256, 256, 128, 128, 128])):
            if i < 3:
                layer = nn.Sequential(
                    nn.Conv2d(input_size, channels, kernel_size=1, bias=False),
                    nn.BatchNorm2d(channels),
                    nn.ReLU(inplace=True),
                    nn.Conv2d(channels, output_size, kernel_size=3, padding=1, stride=2, bias=False),
                    nn.BatchNorm2d(output_size),
                    nn.ReLU(inplace=True),
                )
            else:
                layer = nn.Sequential(
                    nn.Conv2d(input_size, channels, kernel_size=1, bias=False),
                    nn.BatchNorm2d(channels),
                    nn.ReLU(inplace=True),
                    nn.Conv2d(channels, output_size, kernel_size=3, bias=False),
                    nn.BatchNorm2d(output_size),
                    nn.ReLU(inplace=True),
                )

            self.additional_blocks.append(layer)

        self.additional_blocks = nn.ModuleList(self.additional_blocks)

    def _init_weights(self):
        layers = [*self.additional_blocks, *self.loc, *self.conf]
        for layer in layers:
            for param in layer.parameters():
                if param.dim() > 1: nn.init.xavier_uniform_(param)

    # Shape the classifier to the view of bboxes
    def bbox_view(self, src, loc, conf):
        ret = []
        for s, l, c in zip(src, loc, conf):
            ret.append((l(s).view(s.size(0), 4, -1), c(s).view(s.size(0), self.label_num, -1)))

        locs, confs = list(zip(*ret))
        locs, confs = torch.cat(locs, 2).contiguous(), torch.cat(confs, 2).contiguous()
        return locs, confs

    def forward(self, x):
        x = self.feature_extractor(x)

        detection_feed = [x]
        for l in self.additional_blocks:
            x = l(x)
            detection_feed.append(x)

        # Feature Map 38x38x4, 19x19x6, 10x10x6, 5x5x6, 3x3x4, 1x1x4
        locs, confs = self.bbox_view(detection_feed, self.loc, self.conf)

        # For SSD 300, shall return nbatch x 8732 x {nlabels, nlocs} results
        return locs, confs

def predict_fn(input_data, model):
    # run prediction
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    with torch.no_grad():
        pred = model(input_data)
    pred_dict = {'pred1':pred[0].detach().cpu().numpy(),'pred2':pred[1].detach().cpu().numpy()}
    return pred_dict
        
def model_fn(model_dir):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = SSD300(backbone=ResNet('resnet50'))
    try:
        model_weights = torch.load(os.path.join(model_dir, 'nvidia_ssdpyt_fp32_190826.pt'), map_location='cpu')['model']
        model.to('cpu')
        model.load_state_dict(model_weights)
    except:
        print('using fallback model loading')
        os.system('wget https://api.ngc.nvidia.com/v2/models/nvidia/ssdpyt_fp32/versions/2/files/nvidia_ssdpyt_fp32_190826.pt')
        model_weights = torch.load(os.path.join('nvidia_ssdpyt_fp32_190826.pt'), map_location='cpu')['model']
        model.to('cpu')
        model.load_state_dict(model_weights)
    model.eval()
    return model 

def input_fn(request_body, request_content_type):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if(request_content_type == 'application/x-npy'):
        try:
            input_data = np.frombuffer(request_body, dtype=int)
        except:
            input_data = np.array(request_body, dtype=int)
    try:
        input_data = torch.tensor(np.reshape(input_data,(1,3,300,300)), dtype=torch.float32, device=device) # this needs to be a torch tensor 
    except:
        input_data = torch.tensor(np.reshape(input_data[16:],(1,3,300,300)), dtype=torch.float32, device=device) # this needs to be a torch tensor 
    return input_data

Overwriting transform_script.py


In [5]:
pytorch_model = PyTorchModel(model_data=modelpath, role=role,
                             entry_point='transform_script.py',
                             framework_version='1.4.0')

predictor = pytorch_model.deploy(instance_type='ml.p2.xlarge', initial_instance_count=1, wait=True,endpoint_name='torch-ssd-ngc-test')

-------------------!

In [13]:
img = imread('https://upload.wikimedia.org/wikipedia/commons/2/25/Postmen_Office_Room.jpg')
img  = resize(img, (300,300,3))
img = np.array(img, dtype=np.int64)
img.shape

  warn("The default mode, 'constant', will be changed to 'reflect' in "


(300, 300, 3)

In [None]:
result = predictor.predict(img.tobytes())

In [None]:
result

## Also, you can download the model from torchhub with this API

In [12]:
import torch 
precision = 'fp32'
ssd_model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_ssd', model_math=precision)

Using cache found in /home/ec2-user/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub
Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /home/ec2-user/.cache/torch/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))




Downloading checkpoint from https://api.ngc.nvidia.com/v2/models/nvidia/ssdpyt_fp32/versions/1/files/nvidia_ssdpyt_fp32_20190225.pt


## Or better, download the model from torch hub on the fly

In [16]:
%%writefile transform_script_hub.py

import torch
import os
from six import BytesIO
import numpy as np

def model_fn(model_dir):
    model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_ssd', model_math='fp32',map_location='gpu')
    return model
                       
def input_fn(request_body, request_content_type):
    return torch.load(BytesIO(request_body))

Overwriting transform_script_hub.py


In [18]:
#PyTorchModel requires a non-empty, model_data file

!echo "tmp content" > tmp
!tar -zcvf ./tmp.tar.gz tmp
pytorch_model = PyTorchModel(model_data = 'file://tmp.tar.gz',
                             role=role,
                             entry_point='./transform_script_hub.py',
                             framework_version='1.4.0')

predictor = pytorch_model.deploy(instance_type='ml.p3.2xlarge',
                                 initial_instance_count=1,
                                 endpoint_name='nvidia-ssd-pytorch-gpu2')


tmp
-----------------!

In [7]:
uris = [
    'http://images.cocodataset.org/val2017/000000397133.jpg',
    'http://images.cocodataset.org/val2017/000000037777.jpg',
    'http://images.cocodataset.org/val2017/000000252219.jpg'
]

In [8]:
inputs = [utils.prepare_input(uri) for uri in uris]
tensor = utils.prepare_tensor(inputs)

  warn("The default mode, 'constant', will be changed to 'reflect' in "


In [12]:
image = image.transpose((2, 0, 1)).reshape(1,image.shape[0],image.shape[1],image.shape[2])

In [None]:
result = predictor(image)