# Setup

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
!pip install pyyaml==5.1

In [None]:
!pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
import torch

In [None]:
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.7/index.html

In [None]:
!git clone --branch v0.6 https://github.com/facebookresearch/detectron2.git detectron2_repo
!pip install -e detectron2_repo

In [7]:
import detectron2
from detectron2 import model_zoo
from detectron2.utils.logger import setup_logger
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.projects import point_rend

import random
import os
import pickle
import cv2
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt

In [None]:
setup_logger()

In [8]:
root_path = "/content/drive/MyDrive/NRP/Project/Manga109_COCO/"

In [9]:
config_file_path = root_path + "r101.yaml"
checkpoint_url = root_path + "r101.yaml"

In [10]:
output_dir = "./output"
num_classes = 2 # Speech, Panel

In [11]:
device = "cuda"

In [12]:
train_dataset_name = "train"
train_images_path = root_path + "train/"
train_json_annot_path = root_path + "train.json"

test_dataset_name = "test"
test_images_path = root_path + "test/"
test_json_annot_path = root_path + "test.json"

In [13]:
# Register train dataset
register_coco_instances(
    name=train_dataset_name,
    metadata={},
    json_file=train_json_annot_path,
    image_root=train_images_path
)

In [14]:
# Register test dataset
register_coco_instances(
    name=test_dataset_name,
    metadata={},
    json_file=test_json_annot_path,
    image_root=test_images_path
)

# Visualise Dataset

In [15]:
def plot_sample(dataset_name, n=1):
  dataset_custom = DatasetCatalog.get(dataset_name)
  dataset_custom_metadata = MetadataCatalog.get(dataset_name)

  for s in random.sample(dataset_custom, n):
    img = cv2.imread(s["file_name"])
    v = Visualizer(img[:,:,::-1], metadata=dataset_custom_metadata, scale=0.5)
    v = v.draw_dataset_dict(s)
    plt.figure(figsize=(15,20))
    plt.imshow(v.get_image())
    plt.show()

In [None]:
plot_sample(dataset_name=train_dataset_name)

# Config

In [17]:
cfg_save_path = "cfg.pickle"

In [18]:
cfg = get_cfg()
point_rend.add_pointrend_config(cfg)
cfg.merge_from_file("detectron2_repo/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_101_FPN_3x_coco.yaml")

cfg.MODEL.WEIGHTS = "detectron2://PointRend/InstanceSegmentation/pointrend_rcnn_R_101_FPN_3x_coco/28119983/model_final_3f4d2a.pkl"
cfg.MODEL.ROI_HEADS.NUM_CLASSES = num_classes
cfg.MODEL.POINT_HEAD.NUM_CLASSES = num_classes
cfg.MODEL.DEVICE = device
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9

cfg.DATASETS.TRAIN = (train_dataset_name, )
cfg.DATASETS.TEST = (test_dataset_name, )

cfg.DATALOADER.NUM_WORKERS = 2

cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 1000
cfg.SOLVER.STEPS = []

cfg.OUTPUT_DIR = output_dir

In [19]:
with open(cfg_save_path, "wb") as f:
  pickle.dump(cfg, f, protocol=pickle.HIGHEST_PROTOCOL)

In [20]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

# Train

In [None]:
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

# Test

In [22]:
def on_image(dataset_name, predictor, n=1):
  dataset_custom = DatasetCatalog.get(dataset_name)

  for s in random.sample(dataset_custom, n):
    img = cv2.imread(s["file_name"])
    outputs = predictor(img)
    v = Visualizer(img[:,:,::-1], metadata={}, scale=0.5, instance_mode=ColorMode.SEGMENTATION)
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.figure(figsize=(15,20))
    plt.imshow(v.get_image())
    plt.show()

In [28]:
cfg_save_path = "cfg.pickle"

with open(cfg_save_path, "rb") as f:
  cfg = pickle.load(f)

cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9

In [None]:
predictor = DefaultPredictor(cfg)
image_path = root_path + "test/001.jpg"

In [None]:
on_image(
    dataset_name=test_dataset_name,
    predictor=predictor,
    n=5
)

# Notes

Dataset: Manga109 images manually annotated using Labelme

Classes: Speech, Panel (no Face class as I did not have sufficient time to manually annotate faces)

# Acknowledgement

@InProceedings{kirillov2019pointrend,
  title={{PointRend}: Image Segmentation as Rendering},
  author={Alexander Kirillov and Yuxin Wu and Kaiming He and Ross Girshick},
  journal={ArXiv:1912.08193},
  year={2019}
}