## 1. <font size='6'>`Torchvision.models`</font> 

The `torchvision.models` offers models, including image clarrification, segmentation, object detection, instance segmentation, person keypoint detection, video classification, and optical flow.

In [14]:
import torchvision
from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights

weights = MaskRCNN_ResNet50_FPN_Weights.COCO_V1
preprocess = weights.transforms()

# Initiate model
maskrcnn_torchvision = maskrcnn_resnet50_fpn(weights=weights)

# Set model to eval mode
maskrcnn_torchvision.eval()

MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(in

## 2. <font size='5'>Listing and retrieving available models</font>

In [3]:
import torchvision
from torchvision.models import list_models, get_model, get_weight

all_models = list_models()
detection_models = list_models(module=torchvision.models.detection)

m1 = get_model('maskrcnn_resnet50_fpn', weights = 'DEFAULT')

## 3. Using models from Hub

Most pre-trained models can be accessed directly via PyTorch Hub without having torchvision installed:

In [6]:
import torch

restnet50_torchhub = torch.hub.load('pytorch/vision', 'resnet50', weights='DEFAULT')
restnet50_torchhub

Using cache found in /Users/yangzn/.cache/torch/hub/pytorch_vision_main
  warn(


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [27]:
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

from torchvision.transforms import transforms

toTensor = transforms.Compose([transforms.ToTensor()])

img = Image.open('./images/demo.jpg')
img = toTensor(img)

# Preprocess the input and add batch dimension
img = preprocess(img).unsqueeze(0)
img.to(device='cpu')
maskrcnn_torchvision.to(device='cpu')
pred = maskrcnn_torchvision(img)[0]

pred

{'boxes': tensor([[169.5343, 197.3131, 542.8247, 675.4254],
         [456.9812,  65.3639, 808.1294, 699.6161],
         [246.7403, 536.5847, 287.3207, 585.9766],
         [706.2566, 428.7965, 829.9130, 553.3541],
         [ 19.6753, 421.2762, 444.1577, 691.0541],
         [228.5135, 112.6390, 725.2979, 689.7244],
         [ 30.5734, 422.8713, 419.9007, 678.9666],
         [ 92.9445, 518.6879, 467.9733, 684.4454],
         [  7.1849, 418.7357, 182.1808, 680.3161],
         [149.5392, 489.5084, 444.7227, 676.3806],
         [704.7598, 430.5707, 828.8578, 551.3541],
         [  9.0287, 415.5792, 178.9615, 684.5433],
         [155.1458, 465.7409, 511.9001, 672.9181],
         [558.3756, 340.1966, 620.1507, 410.4771],
         [559.2289, 331.1625, 616.4686, 431.3912],
         [140.4450, 529.5810, 451.0419, 680.9323],
         [154.4197, 552.6893, 429.1562, 682.6299],
         [ 41.6949, 459.9888, 449.7567, 688.7703],
         [119.6793, 487.0904, 176.1988, 564.4571]], grad_fn=<StackBackwar