# Initialisation

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
!pip3 uninstall keras-nightly
!pip3 uninstall -y tensorflow
!pip3 install keras==2.1.6
!pip3 install tensorflow==1.15.0
!pip3 install h5py==2.10.0
!pip3 install opencv-contrib-python

In [None]:
!git clone https://github.com/matterport/Mask_RCNN.git

In [None]:
%cd Mask_RCNN

In [None]:
!python setup.py install
!pip install -r requirements.txt

In [None]:
!pip install elementpath
!pip install manga109api

In [None]:
import elementpath
from xml.etree import ElementTree
import manga109api
from google.colab import files
from os import listdir
from numpy import zeros, asarray, expand_dims, mean
from numpy import asarray
from mrcnn.utils import Dataset, extract_bboxes, compute_ap
from mrcnn.config import Config
from mrcnn.visualize import display_instances
from mrcnn.model import MaskRCNN, load_image_gt, mold_image
import matplotlib.pyplot as pyplot
from matplotlib.patches import Rectangle, Arrow
import math
import cv2

In [None]:
root_dir = "/content/drive/MyDrive/NRP/Project/Manga109/"
p = manga109api.Parser(root_dir=root_dir)

# Reformat Manga109 annotations

In [None]:
%cd /content

In [None]:
for book in p.books:
  tree = ElementTree.parse(root_dir + "annotations/" + book + ".xml")
  root = tree.getroot()

  %mkdir $book
  %cd /content/$book

  for page in root.findall(".//page"):
    new_xml = page
    b_xml = ElementTree.tostring(new_xml)
    with open("new_" + book + str(page.attrib["index"]) + ".xml", "wb") as f:
      f.write(b_xml)
  
  %cd /content

In [None]:
for book in p.books:
  !zip -r /content/$book /content/$book

In [None]:
print("\n".join(p.books))

# Prepare Dataset

In [None]:
class MangaDataset(Dataset):
  def load_dataset(self, is_train=True):
    self.add_class("dataset", 1, "face")
    self.add_class("dataset", 2, "text")
    self.add_class("dataset", 3, "frame")

    last_image_id = 0

    for book in sorted(p.books):
      images_dir = root_dir + "images/" + book + "/"
      annotations_dir = root_dir + "annotations/" + book + "/"

      for img in sorted(listdir(images_dir)):
        og_image_id = int(img[:-4])
        image_id = int(img[:-4]) + last_image_id
      
        tree = ElementTree.parse(annotations_dir + "new_" + book + str(og_image_id) + ".xml")
        root = tree.getroot()
        faces = []
        texts = []
        frames = []

        for face in root.findall(".//face"):
          faces.append(face)
        
        for text in root.findall(".//text"):
          texts.append(text)
        
        for frame in root.findall(".//frame"):
          frames.append(frame)
        
        if (not faces) or (not texts) or (not frames):
          continue

        if is_train and og_image_id >= 50:
          continue

        if not is_train and og_image_id < 50:
          continue
        
        img_path = images_dir + img
        ann_path = annotations_dir + "new_" + book + str(og_image_id) + ".xml"

        self.add_image("dataset", image_id=image_id, path=img_path, annotation=ann_path, class_ids=[0, 1, 2, 3])

      last_image_id = image_id + 1


  def extract_boxes(self, filename):
    tree = ElementTree.parse(filename)
    root = tree.getroot()
    boxes = []

    for box in root.findall(".//face"):
      att = box.attrib
      xmin = att["xmin"]
      ymin = att["ymin"]
      xmax = att["xmax"]
      ymax = att["ymax"]
      coors = [xmin, ymin, xmax, ymax, "face"]
      boxes.append(coors)
    
    for box in root.findall(".//text"):
      att = box.attrib
      xmin = att["xmin"]
      ymin = att["ymin"]
      xmax = att["xmax"]
      ymax = att["ymax"]
      coors = [xmin, ymin, xmax, ymax, "text"]
      boxes.append(coors)
    
    for box in root.findall(".//frame"):
      att = box.attrib
      xmin = att["xmin"]
      ymin = att["ymin"]
      xmax = att["xmax"]
      ymax = att["ymax"]
      coors = [xmin, ymin, xmax, ymax, "frame"]
      boxes.append(coors)

    page_att = root.attrib
    width = int(page_att["width"])
    height = int(page_att["height"])

    return boxes, width, height


  def load_mask(self, image_id):
    info = self.image_info[image_id]
    path = info["annotation"]
    boxes, w, h = self.extract_boxes(path)
    
    masks = zeros([h, w, len(boxes)], dtype="uint8")

    class_ids = []

    for i in range(len(boxes)):
      box = boxes[i]
      row_s, row_e = box[1], box[3]
      col_s, col_e = box[0], box[2]

      if box[4] == "face":
        masks[int(row_s):int(row_e), int(col_s):int(col_e), i] = 1
        class_ids.append(self.class_names.index("face"))

      elif box[4] == "text":
        masks[int(row_s):int(row_e), int(col_s):int(col_e), i] = 2
        class_ids.append(self.class_names.index("text"))
      
      elif box[4] == "frame":
        masks[int(row_s):int(row_e), int(col_s):int(col_e), i] = 3
        class_ids.append(self.class_names.index("frame"))

    return masks, asarray(class_ids, dtype="int32")


  def image_reference(self, image_id):
    info = self.image_info[image_id]
    return info["path"]

In [None]:
# train set
train_set = MangaDataset()
train_set.load_dataset(is_train=True)
train_set.prepare()
print("Train: %d" % len(train_set.image_ids))

In [None]:
# test/val set
test_set = MangaDataset()
test_set.load_dataset(is_train=False)
test_set.prepare()
print("Test: %d" % len(test_set.image_ids))

In [None]:
# load an image and mask
image_id = 1
image = test_set.load_image(image_id)
print(image.shape)

mask, class_ids = test_set.load_mask(image_id)
print(mask.shape)

In [None]:
# display image with masks and bounding boxes
bbox = extract_bboxes(mask)
display_instances(image, bbox, mask, class_ids, test_set.class_names)

# Train Model

In [None]:
class MangaConfig(Config):
  NAME = "manga_cfg"
  NUM_CLASSES = 1 + 3
  STEPS_PER_EPOCH = len(train_set.image_ids) // 2

In [None]:
config = MangaConfig()

In [None]:
model = MaskRCNN(mode="training", model_dir="/content", config=config)

model.load_weights("/content/drive/MyDrive/NRP/Project/Working/Mask R-CNN (3)/model_3.h5",
                   by_name=True,
                   exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",  "mrcnn_bbox", "mrcnn_mask"])

model.train(train_set, test_set, learning_rate=config.LEARNING_RATE, epochs=1, layers="all")

# config.LEARNING_RATE = 0.001

# Evaluate Model

In [None]:
class PredictionConfig(Config):
  NAME = "manga_cfg"
  NUM_CLASSES = 1 + 3
  GPU_COUNT = 1
  IMAGES_PER_GPU = 1

In [None]:
cfg = PredictionConfig()
model = MaskRCNN(mode="inference", model_dir="/content", config=cfg)

In [None]:
model.load_weights("/content/drive/MyDrive/NRP/Project/Working/Mask R-CNN (3)/model_4.h5", by_name=True)

In [None]:
# evaluate model using Manga109 dataset
def evaluate_model(dataset, model, cfg):
  APs = []
  for image_id in dataset.image_ids:
    image, image_meta, gt_class_id, gt_bbox, gt_mask = load_image_gt(dataset, cfg, image_id, use_mini_mask=False)
    scaled_image = mold_image(image, cfg)
    sample = expand_dims(scaled_image, 0)
    yhat = model.detect(sample, verbose=0)
    r = yhat[0]

    # change IoU threshold
    AP, _, _, _ = compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r["masks"], iou_threshold=0.5)
    APs.append(AP)

  mAP = mean(APs)
  return mAP

In [None]:
# evaluate model on training dataset
train_mAP = evaluate_model(train_set, model, cfg)
print("Train mAP: %.3f" % train_mAP)

In [None]:
# evaluate model on test dataset
test_mAP = evaluate_model(test_set, model, cfg)
print("Test mAP: %.3f" % test_mAP)