In [1]:
import torch
import torchvision
import numpy as np
import os
from PIL import Image
import cv2
import time
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from my_utils import get_transform, my_dataloader
from engine import train_one_epoch, evaluate
import random

In [2]:
def random_color():
    b = random.randint(0, 255)
    g = random.randint(0, 255)
    r = random.randint(0, 255)

    return (b, g, r)

In [3]:
is_cuda = torch.cuda.is_available()
device = torch.device('cuda' if is_cuda else 'cpu')

In [4]:
num_classes = 2
backbone = torchvision.models.mobilenet_v2().features
backbone.out_channels = 1280

anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)

model = FasterRCNN(backbone,
                   num_classes=num_classes,
                   box_roi_pool=roi_pooler,
                   rpn_anchor_generator=anchor_generator,
                   rpn_pre_nms_top_n_test=300,
                   rpn_post_nms_top_n_test=10)

In [5]:
model.load_state_dict(torch.load('./try.pt'))
model = model.to(device)

In [6]:
cap = cv2.VideoCapture(0)

In [7]:
ret, frame = cap.read()

In [8]:
input = torch.from_numpy(frame).permute(2,0,1).unsqueeze(0).float().to(device)

In [9]:
input.shape

torch.Size([1, 3, 480, 640])

In [10]:
model.eval()
time_start = time.time()
with torch.no_grad():
    output = model(input)
    time = time.time()-time_start
print(time)

1.2316179275512695
