# Mobile-Mask-RCNN: training on Coco Dataset

In [1]:
"""
Based on the work of Waleed Abdulla (Matterport)
written by wozhouh
"""

# Import Python Packages
import os
import imgaug

# Root directory of the project
ROOT_DIR = os.path.abspath("../")
import sys

# Import Mask RCNN
sys.path.append(ROOT_DIR)
from mrcnn import model as modellib

# Import COCO config
sys.path.append(os.path.join(ROOT_DIR, "samples", "coco"))  # To find local version
import coco

# # assign the GPU for training
# os.environ["CUDA_VISIBLE_DEVICES"] = "4"

Using TensorFlow backend.


In [2]:
# Default setting
HOME_DIR = os.getenv('HOME')
DEFAULT_WEIGHTS_DIR = os.path.join(ROOT_DIR, "weights")
DEFAULT_DATASET_YEAR = "2017"
DEFAULT_COCO_PATH = os.path.join(HOME_DIR, "data", "Coco")

# weights to load
# MODEL_PATH_UNDER_HOME = os.path.join(DEFAULT_WEIGHTS_DIR, "init", "mobilenet_1_0_224_tf_no_top.h5")
# MODEL_PATH_UNDER_HOME = os.path.join(DEFAULT_WEIGHTS_DIR, "init", "xception_weights_tf_dim_ordering_tf_kernels.h5")
MODEL_PATH_UNDER_HOME = os.path.join(DEFAULT_WEIGHTS_DIR, "init", "NASNet-mobile-no-top.h5")
# MODEL_PATH_UNDER_HOME = os.path.join(DEFAULT_WEIGHTS_DIR, "ResNet-101", "mask_rcnn_coco.h5")

INIT_MODEL_PATH = os.path.join(HOME_DIR, MODEL_PATH_UNDER_HOME)

In [3]:
# training config
class TrainingConfig(coco.CocoConfig):
    NAME = "training"
    
    # GPU
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 2
    IMAGES_PER_GPU = 2
    
    # data
    IMAGE_MIN_DIM = 800
    IMAGE_MAX_DIM = 1024
    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
    
    # model
    BACKBONE = "mobilenetv1"
    BACKBONE_STRIDES = [4, 8, 16, 32, 64]
#     BACKBONE_STRIDES = [2, 4, 8, 16, 32]
    
    # heads
    FPN_CLASSIF_FC_LAYERS_SIZE = 1024
    TOP_DOWN_PYRAMID_SIZE = 256
    
    # training
    LEARNING_RATE = 0.001
    WEIGHT_DECAY = 0.0001
    LOSS_WEIGHTS = {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
    STEPS_PER_EPOCH = 1000
    VALIDATION_STEPS = 50
    TRAIN_BN = False
    TRAIN_ROIS_PER_IMAGE = 200
    ROI_POSITIVE_RATIO = 0.33
    
config = TrainingConfig()
config.display()
model = modellib.MaskRCNN(mode="training", model_dir=DEFAULT_WEIGHTS_DIR, config=config)


Configurations:
BACKBONE                       mnasnet
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     4
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      2
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 2
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                93
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE           

In [4]:
# print the model summary
model.keras_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1024, 1024, 3 0                                            
__________________________________________________________________________________________________
input_image_meta (InputLayer)   (None, 93)           0                                            
__________________________________________________________________________________________________
input_rpn_match (InputLayer)    (None, None, 1)      0                                            
__________________________________________________________________________________________________
input_rpn_bbox (InputLayer)     (None, None, 4)      0                                            
__________________________________________________________________________________________________
input_gt_c

activation_146 (Activation)     (None, 32, 32, 176)  0           adjust_bn_9[0][0]                
__________________________________________________________________________________________________
activation_148 (Activation)     (None, 32, 32, 176)  0           adjust_bn_9[0][0]                
__________________________________________________________________________________________________
activation_150 (Activation)     (None, 32, 32, 176)  0           normal_bn_1_9[0][0]              
__________________________________________________________________________________________________
separable_conv_1_normal_left1_9 (None, 32, 32, 176)  35376       activation_142[0][0]             
__________________________________________________________________________________________________
separable_conv_1_normal_right1_ (None, 32, 32, 176)  32560       activation_144[0][0]             
__________________________________________________________________________________________________
separable_

In [5]:
# Load the weights
model.load_weights(INIT_MODEL_PATH, by_name=True)

In [6]:
# Load the dataset
train_dataset = coco.CocoDataset()
train_dataset.load_coco(DEFAULT_COCO_PATH, "train", year=DEFAULT_DATASET_YEAR)
train_dataset.prepare()
val_dataset = coco.CocoDataset()
val_dataset.load_coco(DEFAULT_COCO_PATH, "val", year=DEFAULT_DATASET_YEAR)
val_dataset.prepare()

loading annotations into memory...
Done (t=13.61s)
creating index...
index created!
loading annotations into memory...
Done (t=0.39s)
creating index...
index created!


In [7]:
# Image Augmentation
# Right/Left flip 50% of the time
augmentation = imgaug.augmenters.Fliplr(0.5)

# # config the epoch
# epoch = train_dataset.dataset_size // (config.STEPS_PER_EPOCH * config.BATCH_SIZE)
# size of 'train2017' is 118287 and size of 'val2017' is 5000

In [8]:
# stage 0: warm up
# lr: 0.00001
# batch: 4
# epochs: 1.014
# layers: 'all'
# actually stopped at epoch 30
model.train(train_dataset=train_dataset,
                val_dataset=val_dataset,
                learning_rate=config.LEARNING_RATE / 100.0,
                epochs=30,
                layers='all',
                augmentation=augmentation)


Starting at epoch 0. LR=1e-05

Checkpoint Path: /home/processyuan/code/HumanMask/my-Mobile-Mask-RCNN/weights/training20181127T1657/mask_rcnn_training_{epoch:04d}.h5
Selecting layers to train
stem_conv1             (Conv2D)
stem_bn1               (BatchNormalization)
reduction_conv_1_stem_1   (Conv2D)
reduction_bn_1_stem_1   (BatchNormalization)
separable_conv_1_reduction_left1_stem_1   (SeparableConv2D)
separable_conv_1_reduction_right1_stem_1   (SeparableConv2D)
separable_conv_1_bn_reduction_left1_stem_1   (BatchNormalization)
separable_conv_1_bn_reduction_right1_stem_1   (BatchNormalization)
separable_conv_2_reduction_left1_stem_1   (SeparableConv2D)
separable_conv_2_reduction_right1_stem_1   (SeparableConv2D)
separable_conv_2_bn_reduction_left1_stem_1   (BatchNormalization)
separable_conv_2_bn_reduction_right1_stem_1   (BatchNormalization)
separable_conv_1_reduction_right2_stem_1   (SeparableConv2D)
separable_conv_1_bn_reduction_right2_stem_1   (BatchNormalization)
separable_conv_1

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/30


ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 6 array(s), but instead got the following list of 7 arrays: [array([[[[-1., -1., -1.],
         [-1., -1., -1.],
         [-1., -1., -1.],
         ...,
         [-1., -1., -1.],
         [-1., -1., -1.],
         [-1., -1., -1.]],

        [[-1., -1., -1.],
 ...