# Mobile-Mask-RCNN: validation on Coco Dataset
This notebook provides a fast way to start training on Coco dataset based on Mask-RCNN.

In [2]:
"""
Based on the work of Waleed Abdulla (Matterport)
written by github.com/wozhouh
"""

import os
import sys

# Root directory of the project
ROOT_DIR = os.path.abspath("../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import model as modellib

# Import COCO config
sys.path.append(os.path.join(ROOT_DIR, "samples", "coco"))  # To find local version
import coco as myCoco

# Directory to save logs and trained model
WEIGHTS_DIR = os.path.join(ROOT_DIR, "weights")

# Local path to trained weights file
# COCO_MODEL_PATH = os.path.join(WEIGHTS_DIR, "ResNet-101", "mask_rcnn_coco.h5")
COCO_MODEL_PATH = os.path.join(WEIGHTS_DIR, "MobileNet-v1", "GustavZ", "GustavZ_mobile_mask_rcnn_coco.h5")
# COCO_MODEL_PATH = os.path.join(ROOT_DIR, "logs", "Light-Head", "training20181203T1502", "mask_rcnn_training_0210.h5")

# Local path to Coco dataset settings
HOME_DIR = os.getenv('HOME')
DEFAULT_COCO_PATH = os.path.join(HOME_DIR, "data", "Coco")
DEFAULT_DATASET_YEAR = "2017"
DEFAULT_VAL_NUM = 500

# assign the GPU for training
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

Using TensorFlow backend.


## Configurations

In [3]:
class InferenceConfig(myCoco.CocoConfig):
    NAME = "inference"
    
    # GPU
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    
    # data
#     IMAGE_MIN_DIM = 800
#     IMAGE_MAX_DIM = 1024
#     RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)

    IMAGE_MIN_DIM = 400
    IMAGE_MAX_DIM = 512
    RPN_ANCHOR_SCALES = (16, 32, 64, 128, 256)
    
    # model
#     BACKBONE = "resnet101"
    BACKBONE = "mobilenetv1"
    BACKBONE_STRIDES = [4, 8, 16, 32, 64]
    
    # heads
    TOP_DOWN_PYRAMID_SIZE = 256
    FPN_CLASSIF_FC_LAYERS_SIZE = 1024
#     FPN_CLASSIF_FC_LAYERS_SIZE = 2048
    
#     DETECTION_HEAD = "light-head"
    DETECTION_HEAD = "original"
    
#     MASK_HEAD = "original"
    MASK_HEAD = "mobile"
    
    # inference
    DETECTION_MIN_CONFIDENCE = 0

config = InferenceConfig()
config.display()


Configurations:
BACKBONE                       mobilenetv1
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_HEAD                 original
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  512
IMAGE_META_SIZE                93
IMAGE_MIN_DIM                  400
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [512 512   3]
LARGE_SEPARABLE_CHANNELS_MID   256
LARGE_SEPARABLE_CHANNELS_OUT   490
LARGE_SEPARABLE_KERNEL_SIZE    15
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbo

## Load the model and the dataset

In [4]:
# Create model object in inference mode.
model = modellib.MaskRCNN(mode="inference", model_dir=WEIGHTS_DIR, config=config)

# Load weights trained on MS-COCO
model.load_weights(COCO_MODEL_PATH, by_name=True)

In [5]:
model.keras_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_image (InputLayer)        (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv0 (Conv2D)                  (None, None, None, 3 864         input_image[0][0]                
__________________________________________________________________________________________________
conv0_bn (BatchNorm)            (None, None, None, 3 128         conv0[0][0]                      
__________________________________________________________________________________________________
conv0_relu (Activation)         (None, None, None, 3 0           conv0_bn[0][0]                   
__________________________________________________________________________________________________
conv_dw_1 

In [6]:
# Load the dataset
val_dataset = myCoco.CocoDataset()
coco_obj = val_dataset.load_coco(DEFAULT_COCO_PATH, "val", year=DEFAULT_DATASET_YEAR, return_coco=True, auto_download=False)
val_dataset.prepare()

loading annotations into memory...
Done (t=0.74s)
creating index...
index created!


## Evaluation

In [7]:
myCoco.evaluate_coco(model, val_dataset, coco_obj, "segm", limit=DEFAULT_VAL_NUM)

Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *segm*
DONE (t=2.70s).
Accumulating evaluation results...
DONE (t=0.72s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.082
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.155
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.080
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.017
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.110
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.145
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.100
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.119
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.119
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

In [8]:
myCoco.evaluate_coco(model, val_dataset, coco_obj, "bbox", limit=DEFAULT_VAL_NUM)

Loading and preparing results...
DONE (t=0.02s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=2.18s).
Accumulating evaluation results...
DONE (t=0.68s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.089
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.169
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.082
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.021
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.114
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.154
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.106
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.129
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.129
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10