# Train the Light-Head Mask-RCNN on the Coco Dataset
This notebook provides a fast way to start training on Coco dataset based on Light-Head Mask-RCNN.

In [1]:
"""
Based on the work of Waleed Abdulla (Matterport)
written by wozhouh
"""

# Import Python Packages
import os
import imgaug

# Root directory of the project
ROOT_DIR = os.path.abspath("../../../")
import sys

# Import Mask RCNN
sys.path.append(ROOT_DIR)
from mrcnn import model as modellib

# Import COCO config
sys.path.append(os.path.join(ROOT_DIR, "samples", "coco"))  # To find local version
import coco

# # assign the GPU for training
# os.environ["CUDA_VISIBLE_DEVICES"] = "4"

Using TensorFlow backend.


In [2]:
# Default setting
HOME_DIR = os.getenv('HOME')
DEFAULT_WEIGHTS_DIR = os.path.join(ROOT_DIR, "weights")
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
DEFAULT_DATASET_YEAR = "2017"
DEFAULT_COCO_PATH = os.path.join(HOME_DIR, "data", "Coco")

# weights to load
# MODEL_PATH_UNDER_HOME = os.path.join(DEFAULT_WEIGHTS_DIR, "ResNet-101", "mask_rcnn_coco.h5")
MODEL_PATH_UNDER_HOME = os.path.join(DEFAULT_LOGS_DIR, "Light-Head", "training20181211T1752", "mask_rcnn_training_0243.h5")
INIT_MODEL_PATH = os.path.join(HOME_DIR, MODEL_PATH_UNDER_HOME)
LOG_DIR = os.path.join(DEFAULT_LOGS_DIR, "Light-Head")

In [6]:
# training config
class TrainingConfig(coco.CocoConfig):
    NAME = "training"
    
    # GPU
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 2
    IMAGES_PER_GPU = 2
    
    # data
    IMAGE_MIN_DIM = 800
    IMAGE_MAX_DIM = 1024
    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
    
    # model
    BACKBONE = "resnet101"
    BACKBONE_STRIDES = [4, 8, 16, 32, 64]
    
    # heads
    TOP_DOWN_PYRAMID_SIZE = 256
    DETECTION_HEAD = "light-head" 
    MASK_HEAD = "original"
    FPN_CLASSIF_FC_LAYERS_SIZE = 2048
    RPN_TRAIN_ANCHORS_PER_IMAGE = 256
    LARGE_SEPARABLE_KERNEL_SIZE = 15
    LARGE_SEPARABLE_CHANNELS_MID = 256
    LARGE_SEPARABLE_CHANNELS_OUT = 490
    
    # training
    LEARNING_RATE = 0.001
    WEIGHT_DECAY = 0.0001
#     LOSS_WEIGHTS = {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
    LOSS_WEIGHTS = {'rpn_class_loss': 0.0, 'rpn_bbox_loss': 0.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.5, 'mrcnn_mask_loss': 0.0}
    STEPS_PER_EPOCH = 1000
    VALIDATION_STEPS = 50
    TRAIN_BN = False
    TRAIN_ROIS_PER_IMAGE = 200
    ROI_POSITIVE_RATIO = 0.33
    
config = TrainingConfig()
config.display()
model = modellib.MaskRCNN(mode="training", model_dir=LOG_DIR, config=config)


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     4
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_HEAD                 light-head
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     2048
GPU_COUNT                      2
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 2
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                93
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LARGE_SEPARABLE_CHANNELS_MID   256
LARGE_SEPARABLE_CHANNELS_OUT   490
LARGE_SEPARABLE_KERNEL_SIZE    15
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 0.0, 'r

In [4]:
# print the model summary
model.keras_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_image (InputLayer)        (None, None, None, 3 0                                            
__________________________________________________________________________________________________
input_image_meta (InputLayer)   (None, 93)           0                                            
__________________________________________________________________________________________________
input_rpn_match (InputLayer)    (None, None, 1)      0                                            
__________________________________________________________________________________________________
input_rpn_bbox (InputLayer)     (None, None, 4)      0                                            
__________________________________________________________________________________________________
input_gt_c

In [7]:
# Load the weights
model.load_weights(INIT_MODEL_PATH, by_name=True)

Re-starting from epoch 243


In [8]:
# Load the dataset
train_dataset = coco.CocoDataset()
train_dataset.load_coco(DEFAULT_COCO_PATH, "train", year=DEFAULT_DATASET_YEAR)
train_dataset.prepare()
val_dataset = coco.CocoDataset()
val_dataset.load_coco(DEFAULT_COCO_PATH, "val", year=DEFAULT_DATASET_YEAR)
val_dataset.prepare()

# Image Augmentation
# Right/Left flip 50% of the time
augmentation = imgaug.augmenters.Fliplr(0.5)

loading annotations into memory...
Done (t=14.16s)
creating index...
index created!
loading annotations into memory...
Done (t=4.43s)
creating index...
index created!


In [9]:
# # # layers: "heads", "5+", "4+", "3+", "all", "light-head"
model.train(train_dataset=train_dataset,
                val_dataset=val_dataset,
                learning_rate=config.LEARNING_RATE / 20.0,
                epochs=391,
                layers='light-head-detection',
                augmentation=augmentation)


Starting at epoch 243. LR=5e-05

Checkpoint Path: /home/processyuan/code/HumanMask/my-Mobile-Mask-RCNN/logs/Light-Head/training20181211T1752/mask_rcnn_training_{epoch:04d}.h5
Selecting layers to train
In model:  rpn_model
In model:  large_separable_conv
    light_head_large_separable_conv_0a   (Conv2D)
    light_head_large_separable_conv_1a   (Conv2D)
    light_head_large_separable_conv_0b   (Conv2D)
    light_head_large_separable_conv_1b   (Conv2D)
    light_head_large_separable_bn   (BatchNorm)
light_head_class_conv   (TimeDistributed)
light_head_class_bn    (TimeDistributed)
light_head_bbox_fc     (TimeDistributed)
light_head_class_logits   (TimeDistributed)


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 244/391
Epoch 245/391
Epoch 246/391
Epoch 247/391
Epoch 248/391
Epoch 249/391
Epoch 250/391
Epoch 251/391
Epoch 252/391
Epoch 253/391
Epoch 254/391
Epoch 255/391
Epoch 256/391
Epoch 257/391
Epoch 258/391
Epoch 259/391
Epoch 260/391
Epoch 261/391
Epoch 262/391
Epoch 263/391
Epoch 264/391


Epoch 265/391
Epoch 266/391
Epoch 267/391
Epoch 268/391
Epoch 269/391
Epoch 270/391
Epoch 271/391
Epoch 272/391
Epoch 273/391
Epoch 274/391
Epoch 275/391
Epoch 276/391
Epoch 277/391
Epoch 278/391
Epoch 279/391
Epoch 280/391
Epoch 281/391
Epoch 282/391
Epoch 283/391
Epoch 284/391


Epoch 285/391
Epoch 286/391
Epoch 287/391
Epoch 288/391
Epoch 289/391
Epoch 290/391
Epoch 291/391
Epoch 292/391
Epoch 293/391
Epoch 294/391
Epoch 295/391
Epoch 296/391
Epoch 297/391
Epoch 298/391
Epoch 299/391
Epoch 300/391
Epoch 301/391
Epoch 302/391
Epoch 303/391
Epoch 304/391


Epoch 305/391
Epoch 306/391
Epoch 307/391
Epoch 308/391
Epoch 309/391
Epoch 310/391
Epoch 311/391
Epoch 312/391
Epoch 313/391
Epoch 314/391
Epoch 315/391
Epoch 316/391
Epoch 317/391
Epoch 318/391
Epoch 319/391
Epoch 320/391
Epoch 321/391
Epoch 322/391
Epoch 323/391
Epoch 324/391


Epoch 325/391
Epoch 326/391
Epoch 327/391
Epoch 328/391
Epoch 329/391
Epoch 330/391
Epoch 331/391
Epoch 332/391
Epoch 333/391
Epoch 334/391
Epoch 335/391
Epoch 336/391
Epoch 337/391
Epoch 338/391
Epoch 339/391
Epoch 340/391
Epoch 341/391
Epoch 342/391
Epoch 343/391
Epoch 344/391


Epoch 345/391
Epoch 346/391
Epoch 347/391
Epoch 348/391
Epoch 349/391
Epoch 350/391
Epoch 351/391
Epoch 352/391
Epoch 353/391
Epoch 354/391
Epoch 355/391
Epoch 356/391
Epoch 357/391
Epoch 358/391
Epoch 359/391
Epoch 360/391
Epoch 361/391
Epoch 362/391
Epoch 363/391
Epoch 364/391


Epoch 365/391
Epoch 366/391
Epoch 367/391
Epoch 368/391
Epoch 369/391
Epoch 370/391
Epoch 371/391
Epoch 372/391
Epoch 373/391
Epoch 374/391
Epoch 375/391
Epoch 376/391
Epoch 377/391
Epoch 378/391
Epoch 379/391
Epoch 380/391
Epoch 381/391
Epoch 382/391
Epoch 383/391
Epoch 384/391


Epoch 385/391
Epoch 386/391
Epoch 387/391
Epoch 388/391
Epoch 389/391
Epoch 390/391
Epoch 391/391


In [None]:
# Refer to the trained ResNet-101-based Mask-RCNN, loss should be 
# Epoch 1/30
#  28/1000 [..............................] - ETA: 1:37:39 
# - loss: 0.6337 
# - rpn_class_loss: 0.0112 
# - rpn_bbox_loss: 0.1044 
# - mrcnn_class_loss: 0.2009 
# - mrcnn_bbox_loss: 0.0904 
# - mrcnn_mask_loss: 0.2268