In [1]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import skimage.io
from imgaug import augmenters as iaa
import matplotlib
import matplotlib.pyplot as plt
from pathlib import Path

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

%matplotlib inline 

# Directory to save logs and trained model
ROOT_DIR = r"c:\Data\MRCNN"

DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")

RESULTS_DIR = os.path.join(ROOT_DIR, "results")

VAL_IMAGE_LEN = 2

# Local path to trained weights file
# COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
# if not os.path.exists(COCO_MODEL_PATH):
    # utils.download_trained_weights(COCO_MODEL_PATH)

In [2]:
class BreastTumourConfig(Config):
    """Configuration for training on dataset"""
    
    NAME = "BreastTumours"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 8

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # background + tumour

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 128
    IMAGE_MAX_DIM = 128

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 32

    # Number of training and validation steps per epoch
    STEPS_PER_EPOCH = (657 - VAL_IMAGE_LEN) // IMAGES_PER_GPU
    # VALIDATION_STEPS = max(1, VAL_IMAGE_LEN) // IMAGES_PER_GPU
    VALIDATION_STEPS = 5
    
    DETECTION_MIN_CONFIDENCE = 0
    BACKBONE = "resnet50"
    
config = BreastTumourConfig()
config.display()


Configurations:
BACKBONE                       resnet50
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     8
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 8
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  128
IMAGE_META_SIZE                14
IMAGE_MIN_DIM                  128
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [128 128   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE                

In [3]:
class BreastTumourInferenceConfig(BreastTumourConfig):
    # Set batch size to 1 to run one image at a time
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    # Don't resize imager for inferencing
    IMAGE_RESIZE_MODE = "pad64"
    # Non-max suppression threshold to filter RPN proposals.
    # You can increase this during training to generate more propsals.
    RPN_NMS_THRESHOLD = 0.7

In [4]:


dataset_dir = "c:\Data\MRCNN\data"
dataset_dir = os.path.join(dataset_dir, "train")
print(dataset_dir)
image_ids = []
for image in os.listdir(dataset_dir):
    image_ids.append(re.findall('\d+', image)[0])
    print(os.path.join(dataset_dir, "malignant({}).png".format('11')))
print(image_ids)


c:\Data\MRCNN\data\train
c:\Data\MRCNN\data\train\malignant(11).png
c:\Data\MRCNN\data\train\malignant(11).png
c:\Data\MRCNN\data\train\malignant(11).png
c:\Data\MRCNN\data\train\malignant(11).png
c:\Data\MRCNN\data\train\malignant(11).png
c:\Data\MRCNN\data\train\malignant(11).png
c:\Data\MRCNN\data\train\malignant(11).png
c:\Data\MRCNN\data\train\malignant(11).png
c:\Data\MRCNN\data\train\malignant(11).png
c:\Data\MRCNN\data\train\malignant(11).png
['1', '10', '2', '3', '4', '5', '6', '7', '8', '9']


In [5]:
# Setting up dataset 

class BreastTumourDataset(utils.Dataset):

    def load_tumour (self, dataset_dir, subset):
        """Load a subset of the breast ultrasound dataset.
        dataset_dir: Root directory of the dataset
        subset: Subset to load. Either the name of the sub-directory,
                such as stage1_train, stage1_test, ...etc. or, one of:
                * train: stage1_train excluding validation images
                * val: validation images from VAL_IMAGE_IDS
        """
        # Add classes. We have one class.
        # Naming the dataset tumour, and the class tumour
        self.add_class("tumour", 1, "tumour")

        # Which subset?
        # "val": use hard-coded list above
        # "train": use data from stage1_train minus the hard-coded list above
        # else: use the data from the specified sub-directory
        assert subset in ["train", "val"]
        dataset_dir = os.path.join(dataset_dir, subset)

        # Get image ids from directory names
        image_ids = []
        for image in os.listdir(dataset_dir):
            image_ids.append(re.findall('\d+', image)[0])

        # Add images
        for image_id in image_ids:
            self.add_image(
                "tumour",
                image_id=image_id,
                path=os.path.join(os.path.join(dataset_dir, "malignant ({}).png".format(image_id))))
        
        
    def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        info = self.image_info[image_id]
        mask_id = info.get('id')
        
        dataset_dir = "c:\Data\MRCNN\data\mask"
        mask_dir = os.path.join(dataset_dir, "malignant ({})_mask.png".format(mask_id))

        # Read mask files from .png image
        mask = []
        m = skimage.io.imread(mask_dir).astype(np.bool)
        mask.append(m)
        mask = np.stack(mask, axis=-1)
        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID, we return an array of ones
        return mask, np.ones([mask.shape[-1]], dtype=np.int32)

    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "tumour":
            return info["id"]
        else:
            super(self.__class__, self).image_reference(image_id)

In [6]:
"""
dataset_train = BreastTumourDataset()
dataset_train.load_tumour(dataset_dir, "train")
dataset_train.prepare()

x = dataset_train.image_info[0]
x = x.get("id")
print(x)
"""

'\ndataset_train = BreastTumourDataset()\ndataset_train.load_tumour(dataset_dir, "train")\ndataset_train.prepare()\n\nx = dataset_train.image_info[0]\nx = x.get("id")\nprint(x)\n'

In [7]:
# Training

def train(model, dataset_dir, subset):
    """Train the model."""
    # Training dataset.
    dataset_train = BreastTumourDataset()
    dataset_train.load_tumour(dataset_dir, "train")
    dataset_train.prepare()

    # Validation dataset
    dataset_val = BreastTumourDataset()
    dataset_val.load_tumour(dataset_dir, "val")
    dataset_val.prepare()

    # Image augmentation
    # http://imgaug.readthedocs.io/en/latest/source/augmenters.html
    augmentation = iaa.SomeOf((0, 2), [
        iaa.Fliplr(0.5),
        iaa.Flipud(0.5),
        iaa.OneOf([iaa.Affine(rotate=90),
                   iaa.Affine(rotate=180),
                   iaa.Affine(rotate=270)]),
        iaa.Multiply((0.8, 1.5)),
        iaa.GaussianBlur(sigma=(0.0, 5.0))
    ])

    # *** This training schedule is an example. Update to your needs ***

    # If starting from imagenet, train heads only for a bit
    # since they have random weights
    print("Train network heads")
    model.train(dataset_train, dataset_val,
                learning_rate=config.LEARNING_RATE,
                epochs=1,
                augmentation=augmentation,
                layers='heads')

    print("Train all layers")
    model.train(dataset_train, dataset_val,
                learning_rate=config.LEARNING_RATE,
                epochs=1,
                augmentation=augmentation,
                layers='all')

In [8]:
# RLE Encoding

def rle_encode(mask):
    """Encodes a mask in Run Length Encoding (RLE).
    Returns a string of space-separated values.
    """
    assert mask.ndim == 2, "Mask must be of shape [Height, Width]"
    # Flatten it column wise
    m = mask.T.flatten()
    # Compute gradient. Equals 1 or -1 at transition points
    g = np.diff(np.concatenate([[0], m, [0]]), n=1)
    # 1-based indicies of transition points (where gradient != 0)
    rle = np.where(g != 0)[0].reshape([-1, 2]) + 1
    # Convert second index in each pair to lenth
    rle[:, 1] = rle[:, 1] - rle[:, 0]
    return " ".join(map(str, rle.flatten()))


def rle_decode(rle, shape):
    """Decodes an RLE encoded list of space separated
    numbers and returns a binary mask."""
    rle = list(map(int, rle.split()))
    rle = np.array(rle, dtype=np.int32).reshape([-1, 2])
    rle[:, 1] += rle[:, 0]
    rle -= 1
    mask = np.zeros([shape[0] * shape[1]], np.bool)
    for s, e in rle:
        assert 0 <= s < mask.shape[0]
        assert 1 <= e <= mask.shape[0], "shape: {}  s {}  e {}".format(shape, s, e)
        mask[s:e] = 1
    # Reshape and transpose
    mask = mask.reshape([shape[1], shape[0]]).T
    return mask


def mask_to_rle(image_id, mask, scores):
    "Encodes instance masks to submission format."
    assert mask.ndim == 3, "Mask must be [H, W, count]"
    # If mask is empty, return line with image ID only
    if mask.shape[-1] == 0:
        return "{},".format(image_id)
    # Remove mask overlaps
    # Multiply each instance mask by its score order
    # then take the maximum across the last dimension
    order = np.argsort(scores)[::-1] + 1  # 1-based descending
    mask = np.max(mask * np.reshape(order, [1, 1, -1]), -1)
    # Loop over instance masks
    lines = []
    for o in order:
        m = np.where(mask == o, 1, 0)
        # Skip if empty
        if m.sum() == 0.0:
            continue
        rle = rle_encode(m)
        lines.append("{}, {}".format(image_id, rle))
    return "\n".join(lines)

In [9]:
# Detection

def detect(model, dataset_dir, subset):
    """Run detection on images in the given directory."""
    print("Running on {}".format(dataset_dir))

    # Create directory
    if not os.path.exists(RESULTS_DIR):
        os.makedirs(RESULTS_DIR)
    submit_dir = "submit_{:%Y%m%dT%H%M%S}".format(datetime.datetime.now())
    submit_dir = os.path.join(RESULTS_DIR, submit_dir)
    os.makedirs(submit_dir)

    # Read dataset
    dataset = BreastTumourDataset()
    dataset.load_tumour(dataset_dir, subset)
    dataset.prepare()
    # Load over images
    submission = []
    for image_id in dataset.image_ids:
        # Load image and run detection
        image = dataset.load_image(image_id)
        # Detect objects
        r = model.detect([image], verbose=0)[0]
        # Encode image to RLE. Returns a string of multiple lines
        source_id = dataset.image_info[image_id]["id"]
        rle = mask_to_rle(source_id, r["masks"], r["scores"])
        submission.append(rle)
        # Save image with masks
        visualize.display_instances(
            image, r['rois'], r['masks'], r['class_ids'],
            dataset.class_names, r['scores'],
            show_bbox=False, show_mask=False,
            title="Predictions")
        plt.savefig("{}/{}.png".format(submit_dir, dataset.image_info[image_id]["id"]))

    # Save to csv file
    submission = "ImageId,EncodedPixels\n" + "\n".join(submission)
    file_path = os.path.join(submit_dir, "submit.csv")
    with open(file_path, "w") as f:
        f.write(submission)
    print("Saved to ", submit_dir)


In [10]:
dataset_dir = r"c:\Data\MRCNN\data"
config = BreastTumourConfig()

model = modellib.MaskRCNN(mode="training", config=config, model_dir=DEFAULT_LOGS_DIR)

weights_path = model.get_imagenet_weights()
utils.download_trained_weights(weights_path)
model.load_weights(weights_path, by_name=True, exclude=[ "mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])

train(model, dataset_dir, "train")

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
A local file was found, but it seems to be incomplete or outdated because the md5 file hash does not match the original value of a268eb855778b3df3c7506639542a6af so we will re-download the data.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Downloading pretrained model to C:\Users\perkl\.keras\models\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5 ...
... done downloading pretrained model!
Train network heads

Starting at epoch 0. LR=0.001

Checkpoint Path: c:\Data\MRCNN\logs\breasttumours20200709T2258\mask_rcnn_breasttumours_{epoch:04d}.h5
Selecting layers to train
fpn_c5p5               (Conv2D)
fpn_c4p4               (Conv2D)
fpn_c3p3               (Conv2D)
fpn_c2p2               (Conv2D)
fpn_p5                 (Conv2D)
fpn_p2                 (Conv2D)
fpn_p3                 (Conv2D)
fpn_p4          

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


ValueError: An operation has `None` for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.