# Validation on Coco Dataset Based on the MobileNet-Based Mask-RCNN

In [2]:
"""
Based on the work of Waleed Abdulla (Matterport)
written by github.com/wozhouh
"""

import os
import sys

# Root directory of the project
ROOT_DIR = os.path.abspath("../../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import model as modellib

# Import COCO config
sys.path.append(os.path.join(ROOT_DIR, "samples", "coco"))  # To find local version
import coco as myCoco

# Directory to save logs and trained model
WEIGHTS_DIR = os.path.join(ROOT_DIR, "weights")

# Local path to trained weights file
# COCO_MODEL_PATH = os.path.join(WEIGHTS_DIR, "MobileNet-v1", "GustavZ", "GustavZ_mobile_mask_rcnn_coco.h5")
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "logs", "MobileNet-v1", "my-training-1", "training20181203T1453", "mask_rcnn_training_0169.h5")

# Local path to Coco dataset settings
HOME_DIR = os.getenv('HOME')
DEFAULT_COCO_PATH = os.path.join(HOME_DIR, "data", "Coco")
DEFAULT_DATASET_YEAR = "2017"
DEFAULT_VAL_NUM = 500

# assign the GPU for training
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

Using TensorFlow backend.


## Configurations

In [2]:
class InferenceConfig(myCoco.CocoConfig):
    NAME = "inference"
    
    # GPU
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    
    # data
    IMAGE_MIN_DIM = 800
    IMAGE_MAX_DIM = 1024
    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)

#     IMAGE_MIN_DIM = 400
#     IMAGE_MAX_DIM = 512
#     RPN_ANCHOR_SCALES = (16, 32, 64, 128, 256)
    
    # model
    BACKBONE = "mobilenetv1"
    BACKBONE_STRIDES = [4, 8, 16, 32, 64]
    
    # heads
    TOP_DOWN_PYRAMID_SIZE = 256
    FPN_CLASSIF_FC_LAYERS_SIZE = 1024
    
    DETECTION_HEAD = "original"    
    MASK_HEAD = "original"
#     MASK_HEAD = "mobile"
    
    # inference
    DETECTION_MIN_CONFIDENCE = 0
    
    TRAIN_BN = False

config = InferenceConfig()
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_HEAD                 light-head
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     2048
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                93
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LARGE_SEPARABLE_CHANNELS_MID   256
LARGE_SEPARABLE_CHANNELS_OUT   490
LARGE_SEPARABLE_KERNEL_SIZE    15
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn

## Load the model and the dataset

In [3]:
# Create model object in inference mode.
model = modellib.MaskRCNN(mode="inference", model_dir=WEIGHTS_DIR, config=config)

# Load weights trained on MS-COCO
model.load_weights(COCO_MODEL_PATH, by_name=True)

Re-starting from epoch 37


In [4]:
model.keras_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_image (InputLayer)        (None, None, None, 3 0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, None, None, 3 0           input_image[0][0]                
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, None, None, 6 9472        zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
bn_conv1 (BatchNorm)            (None, None, None, 6 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [4]:
# Load the dataset
val_dataset = myCoco.CocoDataset()
coco_obj = val_dataset.load_coco(DEFAULT_COCO_PATH, "val", year=DEFAULT_DATASET_YEAR, return_coco=True, auto_download=False)
val_dataset.prepare()

loading annotations into memory...
Done (t=0.69s)
creating index...
index created!


## Evaluation

In [5]:
myCoco.evaluate_coco(model, val_dataset, coco_obj, "segm", limit=DEFAULT_VAL_NUM)

Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *segm*
DONE (t=3.07s).
Accumulating evaluation results...
DONE (t=0.71s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.227
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.415
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.219
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.104
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.282
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.351
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.207
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.287
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.297
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

In [6]:
myCoco.evaluate_coco(model, val_dataset, coco_obj, "bbox", limit=DEFAULT_VAL_NUM)

Loading and preparing results...
DONE (t=0.02s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=2.69s).
Accumulating evaluation results...
DONE (t=0.71s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.186
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.422
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.122
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.093
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.235
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.275
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.175
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.241
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.249
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10