In [None]:
from google.colab import drive
import sys
drive.mount('/content/drive')
FOLDERNAME = 'imgDetection/2022 AI 경진대회'
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))
%cd /content/drive/My\ Drive/$FOLDERNAME

Mounted at /content/drive
/content/drive/My Drive/imgDetection/2022 AI 경진대회


In [None]:
import torch
from pathlib import Path
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
import torchvision.transforms as T
import sys
import json
from PIL import Image
import cv2
import numpy as np
import matplotlib.pyplot as plt

sys.path.append('./detr/')
from detr.models.matcher import HungarianMatcher
from detr.models.detr import SetCriterion
from detr.datasets import coco



In [None]:
# to know maximum number of objects in overall train image set
root = Path('/content/drive/My Drive/' + FOLDERNAME + '/data')
def max_objects(root):
  '''
  inputs:
    - root(str): root folder of train label file
  returns:
    - max_num(int): maximum number of objects in overall train image set
  '''
  with open(os.path.join(root, 'Train_label.json')) as j:
    file = json.load(j)
    anns = file['annotations']
  max_num = 0
  obj = 0
  prev = anns[0]['image_id']
  for ann in anns:
    now = ann['image_id']
    if prev != now:
      obj = 0
    else:
      obj += 1
      if obj > max_num:
        max_num = obj

    prev = now
  return max_num

In [None]:
# for finetuning change the fc and num queries
class DETRModel(nn.Module):
    def __init__(self, num_classes,num_queries):
      '''
      make pretrained DETR model and modify number of classes and queires of it.
      inputs:
        - num_classes(int) 
        - num_queries(int)
      '''
      super(DETRModel,self).__init__()
      self.num_classes = num_classes
      self.num_queries = num_queries
      
      self.model = torch.hub.load('facebookresearch/detr', 'detr_resnet50', pretrained=True)
      self.in_features = self.model.class_embed.in_features
      
      self.model.class_embed = nn.Linear(in_features=self.in_features,out_features=self.num_classes)
      self.model.num_queries = self.num_queries
      
    def forward(self,images):
      '''
      inputs: 
        - images(list):
      returns:
        - output: 
      '''
      return self.model(images)

In [None]:
# load pretrained model

PATH = os.path.join(os.getcwd(), 'checkpoint/detr_best_.pth')
model_copy = DETRModel(num_classes = 14+1, num_queries = max_objects(root)*2)
ckpt_path = os.path.join(os.getcwd(), 'checkpoint/detr0003.pth')
ckpt = torch.load(ckpt_path)
model_copy.load_state_dict(ckpt['model_state_dict'])

Downloading: "https://github.com/facebookresearch/detr/archive/main.zip" to /root/.cache/torch/hub/main.zip
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

Downloading: "https://dl.fbaipublicfiles.com/detr/detr-r50-e632da11.pth" to /root/.cache/torch/hub/checkpoints/detr-r50-e632da11.pth


  0%|          | 0.00/159M [00:00<?, ?B/s]

<All keys matched successfully>

In [None]:
# transform for inference
transform = T.Compose([
    T.Resize(800),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
# collate fn for validation set
def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
threshold = 0.3

with open(os.path.join(os.getcwd(), 'data/Test_images_info.json'), 'r') as j:
  image_info = json.load(j)

submission_anno = list()
for img_info in image_info['images']:
  file_name = img_info['file_name']
  img_path = os.path.join(os.getcwd(), 'data/test/images/' + file_name)
  img = Image.open(img_path).convert("RGB")
  W, H = img.size

  with torch.no_grad():
    model_copy.eval()
    im = transform(img)
    outputs = model_copy(im.unsqueeze(0))
    probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > threshold
    pred_labels = torch.argmax(outputs['pred_logits'][0, keep], dim=-1).tolist()
    scores, _ = torch.max(probas[keep], dim = -1)
    
    if len(pred_labels) == 0:
      continue
    
    x_c, y_c, w, h = outputs['pred_boxes'][0, keep].unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (w), (h)]
    boxes = torch.stack(b, dim=1)
    preds_b = boxes * torch.tensor([W, H, W, H], dtype=torch.float32)
    for i in range(len(pred_labels)):
      tmp_dict = dict()
      tmp_dict['image_id'] = img_info['id']
      tmp_dict['bbox'] = preds_b[i].tolist()
      tmp_dict['category_id'] = pred_labels[i]
      tmp_dict['score'] = scores[i].item()
      tmp_dict['segmentation'] = []

      submission_anno.append(tmp_dict)

  dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)


In [None]:
with open('./sample_submission.json','w',encoding='utf-8') as f:
    json.dump(submission_anno,f,ensure_ascii=False)