# Mobile-Mask-RCNN: training on Coco Dataset

In [1]:
"""
Based on the work of Waleed Abdulla (Matterport)
written by wozhouh
"""

# Import Python Packages
import os
import imgaug

# Root directory of the project
ROOT_DIR = os.path.abspath("../")
import sys

# Import Mask RCNN
sys.path.append(ROOT_DIR)
from mrcnn import model as modellib

# Import COCO config
sys.path.append(os.path.join(ROOT_DIR, "samples", "coco"))  # To find local version
import coco

# # assign the GPU for training
# os.environ["CUDA_VISIBLE_DEVICES"] = "4"

Using TensorFlow backend.


In [2]:
# Default setting
HOME_DIR = os.getenv('HOME')
DEFAULT_WEIGHTS_DIR = os.path.join(ROOT_DIR, "weights")
DEFAULT_DATASET_YEAR = "2017"
DEFAULT_COCO_PATH = os.path.join(HOME_DIR, "data", "Coco")

# weights to load
# MODEL_PATH_UNDER_HOME = os.path.join(DEFAULT_WEIGHTS_DIR, "init", "mobilenet_1_0_224_tf_no_top.h5")
MODEL_PATH_UNDER_HOME = os.path.join(DEFAULT_WEIGHTS_DIR, "ResNet-101", "mask_rcnn_coco.h5")

INIT_MODEL_PATH = os.path.join(HOME_DIR, MODEL_PATH_UNDER_HOME)

LOG_DIR = os.path.join(ROOT_DIR, "logs", "Light-Head")

In [3]:
# training config
class TrainingConfig(coco.CocoConfig):
    NAME = "training"
    
    # GPU
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 2
    IMAGES_PER_GPU = 2
    
    # data
    IMAGE_MIN_DIM = 800
    IMAGE_MAX_DIM = 1024
    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
    
    # model
    BACKBONE = "resnet101"
    BACKBONE_STRIDES = [4, 8, 16, 32, 64]
    
    # heads
    TOP_DOWN_PYRAMID_SIZE = 256
#     DETECTION_HEAD = "original".
    DETECTION_HEAD = "light-head"
    MASK_HEAD = "original"
    FPN_CLASSIF_FC_LAYERS_SIZE = 2048
    RPN_TRAIN_ANCHORS_PER_IMAGE = 256
    
    # training
    LEARNING_RATE = 0.001
    WEIGHT_DECAY = 0.0001
#     LOSS_WEIGHTS = {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
    LOSS_WEIGHTS = {'rpn_class_loss': 0.0, 'rpn_bbox_loss': 0.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 0.0}
    STEPS_PER_EPOCH = 1000
    VALIDATION_STEPS = 50
    TRAIN_BN = False
    TRAIN_ROIS_PER_IMAGE = 200
    ROI_POSITIVE_RATIO = 0.33
    
config = TrainingConfig()
config.display()
model = modellib.MaskRCNN(mode="training", model_dir=LOG_DIR, config=config)


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     4
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_HEAD                 light-head
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     2048
GPU_COUNT                      2
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 2
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                93
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 0.0, 'rpn_bbox_loss': 0.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 0.0}
MASK_HEAD  

Tensor("roi_align_classifier/concat_23:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_1:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_2:23", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_23:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_24:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_1:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_2:24", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_24:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_25:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_1:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_2:25", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_25:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_26:0", shape=(?, 4), dtype=float32)
T

Tensor("roi_align_classifier/concat_56:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_3:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_4:6", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_55:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_57:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_3:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_4:7", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_56:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_58:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_3:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_4:8", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_57:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_59:0", shape=(?, 4), dtype=float32)
Tens

Tensor("roi_align_classifier/concat_90:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_3:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_4:40", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_89:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_91:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_3:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_4:41", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_90:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_92:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_3:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_4:42", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_91:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_93:0", shape=(?, 4), dtype=float32)
T

Tensor("roi_align_classifier/concat_123:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_5:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_6:23", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_121:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_124:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_5:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_6:24", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_122:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_125:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_5:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_6:25", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_123:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_126:0", shape=(?, 4), dtype=flo

Tensor("roi_align_classifier/concat_167:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_7:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_8:17", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_164:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_168:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_7:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_8:18", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_165:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_169:0", shape=(?, 4), dtype=float32)
Tensor("roi_align_classifier/StopGradient_7:0", shape=(?,), dtype=int32)
Tensor("roi_align_classifier/split_8:19", shape=(?, ?, ?, 10), dtype=float32)
Tensor("roi_align_classifier/CropAndResize_166:0", shape=(?, 1, 1, 10), dtype=float32)
Tensor("roi_align_classifier/concat_170:0", shape=(?, 4), dtype=flo

Tensor("tower_0/mask_rcnn/roi_align_classifier/concat:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_1:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_2:0", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_1:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_1:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_2:1", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize_1:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_2:0", shape=(?, 4)

Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_32:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_1:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_2:32", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize_32:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_33:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_1:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_2:33", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize_33:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_34:0", s

Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_63:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_3:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_4:13", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize_62:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_64:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_3:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_4:14", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize_63:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_65:0", s

Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_95:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_3:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_4:45", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize_94:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_96:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_3:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_4:46", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize_95:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_97:0", s

Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_127:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_5:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_6:27", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize_125:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_128:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_5:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_6:28", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize_126:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_129:

Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_157:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_7:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_8:7", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize_154:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_158:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_7:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_8:8", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize_155:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_159:0"

Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_189:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_7:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_8:39", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize_186:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_190:0", shape=(?, 4), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/StopGradient_7:0", shape=(?,), dtype=int32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/split_8:40", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/CropAndResize_187:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:0)
Tensor("tower_0/mask_rcnn/roi_align_classifier/concat_191:

Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_16:0", shape=(?, 4), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/StopGradient_1:0", shape=(?,), dtype=int32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/split_2:16", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/CropAndResize_16:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_17:0", shape=(?, 4), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/StopGradient_1:0", shape=(?,), dtype=int32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/split_2:17", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/CropAndResize_17:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_18:0", s

Tensor("tower_1/mask_rcnn/roi_align_classifier/CropAndResize_47:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_48:0", shape=(?, 4), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/StopGradient_1:0", shape=(?,), dtype=int32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/split_2:48", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/CropAndResize_48:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_50:0", shape=(?, 4), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/StopGradient_3:0", shape=(?,), dtype=int32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/split_4:0", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/CropAndResize_49

Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_77:0", shape=(?, 4), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/StopGradient_3:0", shape=(?,), dtype=int32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/split_4:27", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/CropAndResize_76:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_78:0", shape=(?, 4), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/StopGradient_3:0", shape=(?,), dtype=int32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/split_4:28", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/CropAndResize_77:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_79:0", s

Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_107:0", shape=(?, 4), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/StopGradient_5:0", shape=(?,), dtype=int32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/split_6:7", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/CropAndResize_105:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_108:0", shape=(?, 4), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/StopGradient_5:0", shape=(?,), dtype=int32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/split_6:8", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/CropAndResize_106:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_109:0"

Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_137:0", shape=(?, 4), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/StopGradient_5:0", shape=(?,), dtype=int32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/split_6:37", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/CropAndResize_135:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_138:0", shape=(?, 4), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/StopGradient_5:0", shape=(?,), dtype=int32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/split_6:38", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/CropAndResize_136:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_139:

Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_168:0", shape=(?, 4), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/StopGradient_7:0", shape=(?,), dtype=int32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/split_8:18", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/CropAndResize_165:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_169:0", shape=(?, 4), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/StopGradient_7:0", shape=(?,), dtype=int32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/split_8:19", shape=(?, ?, ?, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/CropAndResize_166:0", shape=(?, 1, 1, 10), dtype=float32, device=/device:GPU:1)
Tensor("tower_1/mask_rcnn/roi_align_classifier/concat_170:

In [4]:
# print the model summary
model.keras_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_image (InputLayer)        (None, None, None, 3 0                                            
__________________________________________________________________________________________________
input_image_meta (InputLayer)   (None, 93)           0                                            
__________________________________________________________________________________________________
input_rpn_match (InputLayer)    (None, None, 1)      0                                            
__________________________________________________________________________________________________
input_rpn_bbox (InputLayer)     (None, None, 4)      0                                            
__________________________________________________________________________________________________
input_gt_c

In [5]:
# Load the weights
model.load_weights(INIT_MODEL_PATH, by_name=True)

In [6]:
# Load the dataset
train_dataset = coco.CocoDataset()
train_dataset.load_coco(DEFAULT_COCO_PATH, "train", year=DEFAULT_DATASET_YEAR)
train_dataset.prepare()
val_dataset = coco.CocoDataset()
val_dataset.load_coco(DEFAULT_COCO_PATH, "val", year=DEFAULT_DATASET_YEAR)
val_dataset.prepare()

loading annotations into memory...
Done (t=13.75s)
creating index...
index created!
loading annotations into memory...
Done (t=0.37s)
creating index...
index created!


In [7]:
# Image Augmentation
# Right/Left flip 50% of the time
augmentation = imgaug.augmenters.Fliplr(0.5)

# # config the epoch
# epoch = train_dataset.dataset_size // (config.STEPS_PER_EPOCH * config.BATCH_SIZE)
# size of 'train2017' is 118287 and size of 'val2017' is 5000

In [8]:
# # # layers: "heads", "5+", "4+", "3+", "all", "light-head"
# stage 0
# lr: 0.0001
# batch: 4
# epochs: 1.014
# layers: 'heads'
# actually stopped at epoch 30
model.train(train_dataset=train_dataset,
                val_dataset=val_dataset,
                learning_rate=config.LEARNING_RATE / 5.0,
                epochs=30,
                layers='light-head',
                augmentation=augmentation)


Starting at epoch 0. LR=0.0002

Checkpoint Path: /home/processyuan/code/HumanMask/my-Mobile-Mask-RCNN/logs/Light-Head/training20181202T2202/mask_rcnn_training_{epoch:04d}.h5
Selecting layers to train
In model:  rpn_model
In model:  large_separable_conv
    light_head_large_separable_conv_0a   (Conv2D)
    light_head_large_separable_conv_1a   (Conv2D)
    light_head_large_separable_conv_0b   (Conv2D)
    light_head_large_separable_conv_1b   (Conv2D)
light_head_class_conv   (TimeDistributed)
light_head_class_bn    (TimeDistributed)
light_head_bbox_fc     (TimeDistributed)
light_head_class_logits   (TimeDistributed)


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/30


InvalidArgumentError: Input to reshape is a tensor with 2800 values, but the requested shape has 20
	 [[Node: tower_0/mask_rcnn/roi_align_classifier/Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _class=["loc:@train...ad/Reshape"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](tower_0/mask_rcnn/roi_align_classifier/CropAndResize, tower_0/mask_rcnn/roi_align_classifier/Reshape/shape)]]
	 [[Node: tower_0/mask_rcnn/roi_align_classifier/transpose_3/_5929 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_15748_tower_0/mask_rcnn/roi_align_classifier/transpose_3", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]