In [1]:
# download the weights
!gdown https://drive.google.com/u/1/uc?id=1OscWY-JOtvUz-cIVk6UmZgFlHG7QoGu3

Downloading...
From: https://drive.google.com/u/1/uc?id=1OscWY-JOtvUz-cIVk6UmZgFlHG7QoGu3
To: /home/dchencgps/HW3/Detectron/model_final.pth
100%|█████████████████████████████████████████| 351M/351M [00:01<00:00, 320MB/s]


In [2]:
import cv2, json
from pycocotools import mask
import matplotlib.pyplot as plt
from numpy import asfortranarray
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor

In [3]:
def custom_config(num_classes):
    cfg = get_cfg()
    cfg.merge_from_file(
        model_zoo.get_config_file(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("train",)
    cfg.DATASETS.TEST = ("val", )
    cfg.MODEL.WEIGHTS = "model_final.pth"
    cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[8], [16], [32], [64], [128]]
    cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.7
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    cfg.SOLVER.BASE_LR = 1e-4
    cfg.SOLVER.IMS_PER_BATCH = 1
    cfg.SOLVER.MAX_ITER = 1000
    cfg.TEST.DETECTIONS_PER_IMAGE = 2000
    cfg.TEST.EVAL_PERIOD = 100
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
    cfg.MODEL.FPN.COARSEST_STRIDE = 256
    cfg.MODEL.DEVICE='cpu'
    
    return cfg

cfg = custom_config(1)

In [5]:
def prediction(cfg):
    predictor = DefaultPredictor(cfg)
    ans = []
    with open('dataset/test_img_ids.json', 'r') as f:
        test_json = json.load(f)
    for img in test_json:
        img_id = img['id']
        file_name = img['file_name']
        width = img['width']
        height = img['height']
        im = cv2.imread(f'dataset/test/{file_name}')
        im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
        for k in range(3):
            im[:, :, k] = im_gray
        pred = predictor(im)['instances']
        num_instances = len(pred.scores)
        for i in range(num_instances):
            x1, y1, x2, y2 = pred.pred_boxes[i].tensor.cpu().numpy()[0].tolist()
            x, y, w, h = x1, y1, x2 - x1, y2 - y1
            score = pred.scores[i]
            pred_class = 1
            pred_mask = asfortranarray(pred.pred_masks[i].cpu())
            RLE = mask.encode(pred_mask)
            RLE['counts'] = RLE['counts'].decode('ascii')
            annot = dict(
                image_id = img_id,
                bbox = [x, y, w, h],
                score = float(score),
                category_id = 1,
                segmentation = RLE
            )
            ans.append(annot)
    
    print(len(ans))
    with open('answer.json', 'w') as f:
        json.dump(ans, f)

In [6]:
prediction(cfg)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


1204
