In [1]:
import os
import sys; sys.path.append('..')
import json
from pycocotools.coco import COCO
from src.dataloaders.coco import CocoDetection

os.environ['CUDA_VISIBLE_DEVICES'] = '8,9'
device = 'cuda:0'

In [2]:
import torch
from typing import List
from torchvision import transforms
from torch.utils.data import DataLoader

def collate_batch(batch):
    images = [img for img, target in batch]
    targets = [target for img, target in batch]

    return images, targets

def to_xyxy_format(bbox) -> List:
    # Converts bbox from coco format (x, y, width, height)
    # to "x1, y1, x2, y2" format (pascal voc?)
    return [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]

def torchvision_target_format(target):
    return {
        'image_id': target[0]['image_id'],
        'labels': torch.Tensor([obj['category_id'] for obj in target]).long().to(device),
        'boxes': torch.Tensor([to_xyxy_format(obj['bbox']) for obj in target]).to(device),
        'masks': torch.Tensor([obj['mask'] for obj in target]).to(device),
        'coco_anns': target,
    }

train_ds = CocoDetection('/home/skorokhodov/densepose/coco/coco_train2014',
                         '/home/skorokhodov/densepose/coco/annotations/instances_train2014.json',
                         transform=transforms.ToTensor(),
                         target_transform=torchvision_target_format)
train_dataloader = DataLoader(train_ds, batch_size=1, collate_fn=collate_batch)

loading annotations into memory...
Done (t=13.73s)
creating index...
index created!


In [58]:
from torchvision.models.detection.mask_rcnn import maskrcnn_resnet50_fpn

model = maskrcnn_resnet50_fpn().to(device).eval()
imgs, targets = next(iter(train_dataloader))

In [34]:
from torchvision.models.detection.transform import GeneralizedRCNNTransform

image_mean = [0.485, 0.456, 0.406]
image_std = [0.229, 0.224, 0.225]
min_size = 800
max_size = 1333
transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)

In [55]:
model = model.to(device).eval()

In [62]:
import time
import numpy as np
from tqdm import tqdm

times = []

for _ in tqdm(range(50)):
    with torch.no_grad():
        start = time.time()
        _ = model.backbone(transform(imgs)[0].tensors[0].to(device).unsqueeze(0))
        #_ = model([imgs[0].to(device)])
        elapsed = time.time() - start
        times.append(elapsed)
    
np.mean(times), np.std(times)

100%|██████████| 50/50 [00:02<00:00, 25.61it/s]


(0.040575594902038575, 0.011300739142151206)

In [63]:
0.040575594902038575 / 0.06447157859802247

0.629356311484564

In [30]:
from torchsummary import summary

model = model.to('cuda')
summary(model.roi_heads.mask_head, (256, 14, 14))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 256, 14, 14]         590,080
              ReLU-2          [-1, 256, 14, 14]               0
            Conv2d-3          [-1, 256, 14, 14]         590,080
              ReLU-4          [-1, 256, 14, 14]               0
            Conv2d-5          [-1, 256, 14, 14]         590,080
              ReLU-6          [-1, 256, 14, 14]               0
            Conv2d-7          [-1, 256, 14, 14]         590,080
              ReLU-8          [-1, 256, 14, 14]               0
Total params: 2,360,320
Trainable params: 2,360,320
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.19
Forward/backward pass size (MB): 3.06
Params size (MB): 9.00
Estimated Total Size (MB): 12.26
----------------------------------------------------------------


In [None]:
from torchvision.models.detection.mask_rcnn import resnet_fpn_backbone

b = resnet_fpn_backbone('resnet50', True)
