In [1]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

from torchvision.transforms import functional as F
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from PIL import Image

import torch
import json
import cv2

In [2]:
# Load a trained model from torchvision
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()



FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [3]:
# Load dataset from test COCO annotations
coco_val = COCO('D:\\present\\cmpe249-hw1\\dataset\\coco-annotations\\annotations_test.json')
img_ids = sorted(coco_val.imgs.keys())

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [4]:
tmp_image_path = 'D:\\present\\cmpe249-hw1\\dataset\\images\\test\\001000.png'
img_width = cv2.imread(tmp_image_path).shape[1]
img_height = cv2.imread(tmp_image_path).shape[0]

In [5]:
# Initialize results list
results = []

In [7]:
# Iterate over images in the validation set
for img_id in img_ids:
		# Load and preprocess the image
		img_info = coco_val.loadImgs(img_id)[0]
		image = Image.open('D:\\present\\cmpe249-hw1\\dataset\\images\\test\\' + img_info['file_name']).convert("RGB")
		image_tensor = F.to_tensor(image).unsqueeze(0)

		# Perform inference
		with torch.no_grad():
				prediction = model(image_tensor)

		# Format the prediction and append to results list
		for pred in zip(prediction[0]['boxes'], prediction[0]['labels'], prediction[0]['scores']):
				box, label, score = pred
				box = box.numpy().tolist()
				score = score.numpy().item()

				x1 = float(box[0])
				y1 = float(box[1])
				x2 = float(box[2])
				y2 = float(box[3])

				intx1 = int(x1)
				inty1 = int(y1)
				intx2 = int(x2)
				inty2 = int(y2)

				bbox_center_x = float( (x1 + (x2 - x1) / 2.0) / img_width)
				bbox_center_y = float( (y1 + (y2 - y1) / 2.0) / img_height)
				bbox_width = float((x2 - x1) / img_width)
				bbox_height = float((y2 - y1) / img_height)

				results.append({
					'image_id': img_id,
					'category_id': label.item(),
					'bbox': [bbox_center_x, bbox_center_y, bbox_width, bbox_height], 
					'score': score
					})

In [8]:
# Save results
with open('D:\\present\\cmpe249-hw1\\inference-fastrcnn\\results\\predictions.json', 'w') as f:
    json.dump(results, f)