# Mask R-CNN - Train on Shapes Dataset


This notebook shows how to train Mask R-CNN on your own dataset. To keep things simple we use a synthetic dataset of shapes (squares, triangles, and circles) which enables fast training. You'd still need a GPU, though, because the network backbone is a Resnet101, which would be too slow to train on a CPU. On a GPU, you can start to get okay-ish results in a few minutes, and good results in less than an hour.

The code of the *Shapes* dataset is included below. It generates images on the fly, so it doesn't require downloading any data. And it can generate images of any size, so we pick a small image size to train faster. 

In [1]:
import os
import json
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import clear_output


sys.path.append('C:/Users/yliu60/Documents/GitHub/amodalAPI/PythonAPI/pycocotools')
sys.path.append('C:/Users/Yanfeng Liu/Documents/GitHub/amodalAPI/PythonAPI/pycocotools')

import mask as Mask

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn.model import log
from mrcnn import utils
from mrcnn import visualize
from PIL import Image, ImageDraw
import mrcnn_shapes as shapes_lib
import mrcnn.model as modellib
%matplotlib inline 

import metrics_hist
import params as params_lib
import batchEval
from experiment import get_mrcnn_result_list

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

from importlib import reload
_ = reload(shapes_lib)
_ = reload(visualize)
_ = reload(modellib)
_ = reload(metrics_hist)

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# The GPU id to use
os.environ["CUDA_VISIBLE_DEVICES"]="2" 

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
num_instances_per_class = 6

In [3]:
class ShapesConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "shapes"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 2

    # Number of classes (including background)
    NUM_CLASSES = 1 + 3  # background + 3 shapes

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 256
    IMAGE_MAX_DIM = 256
    
    IMAGE_RESIZE_MODE = "none"

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 64

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 1000

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 100
    
    LEARNING_RATE = 1e-4
    
    # custom properties
    FULL_GT = True
    SMALL = True
    RANDOM = False
    DATASET_TYPE = 'val'
    DATASET_NAME = 'shapes'
    DT_DIR = 'C:/Users/yliu60/Documents/GitHub/Pixel-Embedding/results/mrcnn/6-1_2_3/'
    GT_JSON_DIR = 'C:/Users/yliu60/Documents/GitHub/Pixel-Embedding/gt_json/mrcnn/6/shapes_val_small_6.json'
    DOWNSAMPLE_FACTOR = 4
    DOWNSAMPLE_RESOLUTION = 64
    
config = ShapesConfig()
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     2
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DATASET_NAME                   shapes
DATASET_TYPE                   val
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
DOWNSAMPLE_FACTOR              4
DOWNSAMPLE_RESOLUTION          64
DT_DIR                         C:/Users/yliu60/Documents/GitHub/Pixel-Embedding/results/mrcnn/6-1_2_3/
FPN_CLASSIF_FC_LAYERS_SIZE     1024
FULL_GT                        True
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
GT_JSON_DIR                    C:/Users/yliu60/Documents/GitHub/Pixel-Embedding/gt_json/mrcnn/6/shapes_val_small_6.json
IMAGES_PER_GPU                 2
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  256
IMAGE_META_SIZE                16
IMAGE_MIN_DIM                  

In [6]:
np.random.seed(num_instances_per_class)
# Validation dataset
dataset_val = shapes_lib.ShapesDataset(num_instances_per_class)
dataset_val.load_shapes(1000, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
dataset_val.prepare()
config.MRCNN_DATASET = dataset_val

In [7]:
def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

In [8]:
class InferenceConfig(ShapesConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

In [9]:
# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)

# Which weights to start with?
init_with = "coco"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that
    # are different due to the different number of classes
    # See README for instructions to download the COCO weights
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last(), by_name=True)

In [10]:
EPOCHS = 100
hist = metrics_hist.MetricsHist(config.DT_DIR)
# get new training dataset at every epoch
dataset_train = shapes_lib.ShapesDataset(num_instances_per_class)
dataset_train.load_shapes(1, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
dataset_train.prepare()

In [None]:
model.epoch = 0
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE,
            epochs=EPOCHS,
            layers="all")


Starting at epoch 0. LR=0.0001

Checkpoint Path: C:\Users\yliu60\Documents\GitHub\Pixel-Embedding\logs\shapes20190302T1108\mask_rcnn_shapes_0.h5
Selecting layers to train
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
 127/1000 [==>...........................] - ETA: 9:34 - loss: 0.5485 - rpn_class_loss: 0.0030 - rpn_bbox_los

In [None]:
model.keras_model.save_weights('C:/Users/yliu60/Documents/GitHub/Pixel-Embedding/logs/shapes_12_instances_100_epoch.h5')

# Evaluation

In [None]:
for i in np.linspace(0, 1, 11):
    inference_config.DETECTION_NMS_THRESHOLD = np.float32(i)
    model = modellib.MaskRCNN(mode="inference", 
                              config=inference_config,
                              model_dir=MODEL_DIR)

    current_epoch = 100

    # Load trained weights
    model_path = os.path.join('C:\\Users\\yliu60\\Documents\\GitHub\\Pixel-Embedding\\logs\\3_shapes_100.h5')
    print("Loading weights from ", model_path)
    model.load_weights(model_path, by_name=True)

    dt_filename = 'shapes.json'
    result_list = get_mrcnn_result_list(model, config)

    with open(os.path.join(config.DT_DIR, dt_filename), 'w') as outfile:
        json.dump(result_list, outfile)

    config.GT_JSON_DIR = 'C:\\Users\\yliu60\\Documents\\GitHub\\Pixel-Embedding\\gt_json\\deeplabv3\\6\\shapes_val_small_6_[1, 2, 3].json'

    args = params_lib.Args()

    args.num_shape_per_class = None
    args.dt_dir              = config.DT_DIR
    args.gt_dir              = config.GT_JSON_DIR
    args.maxProp             = int(1000)
    args.outputFile          = 'output'

    metrics = batchEval.main(args)

    print("NMS threshold: {}".format(i))
    ap            = metrics['both'].ap
    ap50          = metrics['both'].ap_05
    ap75          = metrics['both'].ap_075
    ar100         = metrics['both'].ar100
    ar_none       = metrics['both'].ar_none
    ar_partial    = metrics['both'].ar_partial
    ar_heavy      = metrics['both'].ar_heavy
    string_format = "{:.4f} & {:.4f} & {:.4f} & {:.4f} & {:.4f} & {:.4f} & {:.4f} \\\\ \n \\hline"
    print(string_format.format(
        ap, ap50, ap75, ar100, ar_none, ar_partial, ar_heavy))

# Examples

In [None]:
for _ in range(4):
    image_id = random.choice(dataset_val.image_ids)
    print('image_id = {}'.format(image_id))
    original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
        modellib.load_image_gt(dataset_val, inference_config, 
                               image_id, use_mini_mask=False)

    log("original_image", original_image)
    log("image_meta", image_meta)
    log("gt_class_id", gt_class_id)
    log("gt_bbox", gt_bbox)
    log("gt_mask", gt_mask)

    visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
                                dataset_val.class_names, figsize=(8, 8))

    results = model.detect([original_image], verbose=0)

    r = results[0]
    visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], 
                                dataset_val.class_names, r['scores'], ax=get_ax())

