In [1]:
!gdown https://drive.google.com/u/1/uc?id=1nEJ7NTtHcCHNQqUXaoPk55VH3Uwh4QGG
!unzip -nq dataset.zip

Downloading...
From: https://drive.google.com/u/1/uc?id=1nEJ7NTtHcCHNQqUXaoPk55VH3Uwh4QGG
To: /home/dchencgps/HW3/Detectron/dataset.zip
100%|███████████████████████████████████████| 75.9M/75.9M [00:00<00:00, 269MB/s]


In [2]:
import cv2, json
import numpy as np
from PIL import Image
from tqdm import tqdm
from random import shuffle
from skimage import measure
from pycocotools import mask
import matplotlib.pyplot as plt
from numpy import asfortranarray
from os import listdir, makedirs
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.structures import BoxMode
from detectron2.evaluation import COCOEvaluator
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.engine import DefaultTrainer, DefaultPredictor

In [3]:
DATA_DIR = 'dataset'
TRAIN_DIR = 'train'
TEST_DIR = 'test'
IMG_DIR = 'images'
TRAIN_SIZE = 24
VAL_SIZE = 0

files = [file_name for file_name in listdir(f'{DATA_DIR}/{TRAIN_DIR}')]
shuffle(files)
train_files = files[: TRAIN_SIZE]
val_files = files[-VAL_SIZE: ]
makedirs(f'{DATA_DIR}/{IMG_DIR}', exist_ok=True)

In [4]:
train_dataset = []
for i, file_name in enumerate(train_files):
    img = Image.open(
        f'{DATA_DIR}/{TRAIN_DIR}/{file_name}/images/{file_name}.png'
    ).convert('L')
    img.save(f'{DATA_DIR}/{IMG_DIR}/{file_name}.png')
    width, height = img.size
    image = dict(
        # grayscale
        file_name = f'{DATA_DIR}/{IMG_DIR}/{file_name}.png',
        # color
        # file_name = f'{DATA_DIR}/{TRAIN_DIR}/{file_name}/images/{file_name}.png',
        image_id = i + 7,
        width = width,
        height = height,
        annotations = []
    )
    for mask_name in tqdm(listdir(f'{DATA_DIR}/{TRAIN_DIR}/{file_name}/masks')):
        if not mask_name.endswith(".png"):
            continue
        maskfile = asfortranarray(
            Image.open(f'{DATA_DIR}/{TRAIN_DIR}/{file_name}/masks/{mask_name}'))
        RLE = mask.encode(maskfile)
        bbox = mask.toBbox(RLE)
        RLE['counts'] = RLE['counts'].decode('ascii')
        contours = measure.find_contours(maskfile, 0.5)
        annot = dict(
            category_id = 0,
            segmentation = [],
            bbox = bbox.tolist(),
            bbox_mode = BoxMode.XYWH_ABS,
            iscrowd = 0
        )
        for contour in contours:
            contour = np.flip(contour, axis=1)
            seg = contour.ravel().tolist()
            annot["segmentation"].append(seg)
        image['annotations'].append(annot)
    train_dataset.append(image)

100%|██████████████████| 356/356 [00:07<00:00, 47.61it/s]
100%|██████████████████| 328/328 [00:06<00:00, 47.76it/s]
100%|██████████████████| 363/363 [00:07<00:00, 47.44it/s]
100%|██████████████████| 432/432 [00:09<00:00, 47.49it/s]
100%|████████████████| 1165/1165 [00:24<00:00, 47.59it/s]
100%|████████████████| 1391/1391 [00:28<00:00, 48.32it/s]
100%|██████████████████| 472/472 [00:09<00:00, 47.91it/s]
100%|██████████████████| 359/359 [00:07<00:00, 48.04it/s]
100%|████████████████| 1584/1584 [00:32<00:00, 48.08it/s]
100%|██████████████████| 294/294 [00:06<00:00, 48.46it/s]
100%|██████████████████| 342/342 [00:07<00:00, 48.03it/s]
100%|██████████████████| 354/354 [00:07<00:00, 47.85it/s]
100%|██████████████████| 405/405 [00:08<00:00, 47.43it/s]
100%|██████████████████| 357/357 [00:07<00:00, 46.78it/s]
100%|██████████████████| 398/398 [00:08<00:00, 47.97it/s]
100%|██████████████████| 557/557 [00:11<00:00, 47.36it/s]
100%|██████████████████| 481/481 [00:10<00:00, 47.21it/s]
100%|█████████

In [5]:
val_dataset = []
for i, file_name in enumerate(val_files):
    img = Image.open(
        f'{DATA_DIR}/{TRAIN_DIR}/{file_name}/images/{file_name}.png'
    ).convert('L')
    width, height = img.size
    img.save(f'{DATA_DIR}/{IMG_DIR}/{file_name}.png')
    image = dict(
        # grayscale
        file_name = f'{DATA_DIR}/{IMG_DIR}/{file_name}.png',
        # color
        # file_name = f'{DATA_DIR}/{TRAIN_DIR}/{file_name}/images/{file_name}.png',
        image_id = i + 1,
        width = width,
        height = height,
        annotations = []
    )
    for mask_name in tqdm(listdir(f'{DATA_DIR}/{TRAIN_DIR}/{file_name}/masks')):
        if not mask_name.endswith(".png"):
            continue
        maskfile = asfortranarray(
            Image.open(f'{DATA_DIR}/{TRAIN_DIR}/{file_name}/masks/{mask_name}'))
        RLE = mask.encode(maskfile)
        bbox = mask.toBbox(RLE)
        RLE['counts'] = RLE['counts'].decode('ascii')
        contours = measure.find_contours(maskfile, 0.5)
        annot = dict(
            category_id = 0,
            segmentation = [],
            bbox = bbox.tolist(),
            bbox_mode = BoxMode.XYWH_ABS,
            iscrowd = 0
        )
        for contour in contours:
            contour = np.flip(contour, axis=1)
            seg = contour.ravel().tolist()
            annot["segmentation"].append(seg)
        image['annotations'].append(annot)
    val_dataset.append(image)

100%|██████████████████| 356/356 [00:07<00:00, 48.26it/s]
100%|██████████████████| 328/328 [00:06<00:00, 48.71it/s]
100%|██████████████████| 363/363 [00:07<00:00, 48.69it/s]
100%|██████████████████| 432/432 [00:08<00:00, 49.29it/s]
100%|████████████████| 1165/1165 [00:23<00:00, 48.99it/s]
100%|████████████████| 1391/1391 [00:28<00:00, 48.19it/s]
100%|██████████████████| 472/472 [00:09<00:00, 47.99it/s]
100%|██████████████████| 359/359 [00:07<00:00, 48.79it/s]
100%|████████████████| 1584/1584 [00:31<00:00, 49.84it/s]
100%|██████████████████| 294/294 [00:05<00:00, 49.30it/s]
100%|██████████████████| 342/342 [00:06<00:00, 49.68it/s]
100%|██████████████████| 354/354 [00:07<00:00, 48.07it/s]
100%|██████████████████| 405/405 [00:08<00:00, 49.52it/s]
100%|██████████████████| 357/357 [00:07<00:00, 48.45it/s]
100%|██████████████████| 398/398 [00:08<00:00, 49.45it/s]
100%|██████████████████| 557/557 [00:11<00:00, 49.62it/s]
100%|██████████████████| 481/481 [00:09<00:00, 49.22it/s]
100%|█████████

In [6]:
with open('train.json', 'w') as f:
    json.dump(train_dataset, f)
    
with open('val.json', 'w') as f:
    json.dump(val_dataset, f)

In [7]:
def custom_config(num_classes):
    cfg = get_cfg()
    cfg.merge_from_file(
        model_zoo.get_config_file(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("train",)
    cfg.DATASETS.TEST = ()
    cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
    cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[8], [16], [32], [64], [128]]
    cfg.SOLVER.BASE_LR = 2e-2
    cfg.SOLVER.STEPS = (2000, 4000, 8000, 12000, 16000)
    cfg.SOLVER.GAMMA = 0.5
    cfg.MODEL.PIXEL_MEAN = [113.31] * 3 # grayscale
    cfg.MODEL.PIXEL_STD = [1.0] * 3 # grayscale
    cfg.SOLVER.IMS_PER_BATCH = 1
    cfg.SOLVER.MAX_ITER = 50000
    cfg.TEST.DETECTIONS_PER_IMAGE = 2000
    cfg.TEST.EVAL_PERIOD = 1000
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
    cfg.MODEL.FPN.COARSEST_STRIDE = 256
    
    return cfg

In [8]:
class COCOTrainer(DefaultTrainer):

    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            makedirs("coco_eval", exist_ok=True)
            output_folder = "coco_eval"
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

def load_dataset(dataset_type="train"):
    if dataset_type == "train":
        with open("train.json", 'r') as file:
            train = json.load(file)
        return train
    elif dataset_type == "val":
        with open("val.json", 'r') as file:
            val = json.load(file)
        return val

for dt in ["train", "val"]:
    DatasetCatalog.register(dt, lambda dt=dt: load_dataset(dt))
    MetadataCatalog.get(dt).set(thing_classes=["nucleus"])
metadata = MetadataCatalog.get("train")

In [9]:
cfg = custom_config(1)
trainer = COCOTrainer(cfg)
trainer.resume_or_load(resume=False)

[32m[12/16 10:39:12 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in

In [10]:
print(cfg)

CUDNN_BENCHMARK: False
DATALOADER:
  ASPECT_RATIO_GROUPING: True
  FILTER_EMPTY_ANNOTATIONS: True
  NUM_WORKERS: 4
  REPEAT_THRESHOLD: 0.0
  SAMPLER_TRAIN: TrainingSampler
DATASETS:
  PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
  PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
  PROPOSAL_FILES_TEST: ()
  PROPOSAL_FILES_TRAIN: ()
  TEST: ()
  TRAIN: ('train',)
GLOBAL:
  HACK: 1.0
INPUT:
  CROP:
    ENABLED: False
    SIZE: [0.9, 0.9]
    TYPE: relative_range
  FORMAT: BGR
  MASK_FORMAT: polygon
  MAX_SIZE_TEST: 1333
  MAX_SIZE_TRAIN: 1333
  MIN_SIZE_TEST: 800
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
  MIN_SIZE_TRAIN_SAMPLING: choice
  RANDOM_FLIP: horizontal
MODEL:
  ANCHOR_GENERATOR:
    ANGLES: [[-90, 0, 90]]
    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]
    NAME: DefaultAnchorGenerator
    OFFSET: 0.0
    SIZES: [[16], [32], [64], [128], [256]]
  BACKBONE:
    FREEZE_AT: 2
    NAME: build_resnet_fpn_backbone
  DEVICE: cuda
  FPN:
    FUSE_TYPE: sum
    IN_FEATURES: ['res2', 'res3', 'res4', 'res5']
   

In [None]:
trainer.train()

[32m[12/16 10:39:13 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[12/16 10:39:21 d2.utils.events]: [0m eta: 1:36:57  iter: 19  total_loss: 2.253  loss_cls: 0.534  loss_box_reg: 0.1779  loss_mask: 0.6445  loss_rpn_cls: 0.5365  loss_rpn_loc: 0.3618  time: 0.3071  data_time: 0.0326  lr: 0.00039962  max_mem: 7771M
[32m[12/16 10:39:27 d2.utils.events]: [0m eta: 1:37:49  iter: 39  total_loss: 2.087  loss_cls: 0.4725  loss_box_reg: 0.4436  loss_mask: 0.4227  loss_rpn_cls: 0.3401  loss_rpn_loc: 0.3243  time: 0.3029  data_time: 0.0099  lr: 0.00079922  max_mem: 7771M
[32m[12/16 10:39:33 d2.utils.events]: [0m eta: 1:37:43  iter: 59  total_loss: 1.884  loss_cls: 0.4778  loss_box_reg: 0.4915  loss_mask: 0.3465  loss_rpn_cls: 0.3034  loss_rpn_loc: 0.2931  time: 0.3011  data_time: 0.0094  lr: 0.0011988  max_mem: 7771M
[32m[12/16 10:39:39 d2.utils.events]: [0m eta: 1:36:40  iter: 79  total_loss: 1.866  loss_cls: 0.5234  loss_box_reg: 0.5599  loss_mask: 0.3233  loss_rpn_cls: 0.24  loss_rpn_loc: 0.2649  time: 0.2987  data_time: 0.0094  lr: 0.0015984  max_