# 2D Nuclear Segmentation with Mask-RCNN

In [1]:
import os
import errno

import numpy as np

import deepcell

In [7]:
# create folder for this set of experiments
experiment_folder = "retina_mask/"
MODEL_DIR = os.path.join("/data/analyses/", experiment_folder)
NPZ_DIR = "/data/npz_data/20201018_freeze/"
LOG_DIR = '/data/logs'

if not os.path.isdir(MODEL_DIR):
    os.makedirs(MODEL_DIR)

In [14]:
from timeit import default_timer

In [17]:
start = default_timer()
xx = np.zeros((1000, 1000, 10))
np.mean(xx)
end = default_timer()
print(end - start)

0.026142030954360962


In [26]:
from tensorflow.keras.optimizers import SGD, Adam
from deepcell.utils.train_utils import rate_scheduler
from deepcell.utils.retinanet_anchor_utils import get_anchor_parameters
from deepcell.training import train_model_retinanet
from deepcell import model_zoo
from deepcell_toolbox.multiplex_utils import multiplex_preprocess
from deepcell_toolbox.retinanet import retinamask_postprocess
from timeit import default_timer

model_splits = ['1', '2', '3']
metrics = {}
for model in model_splits:
    print('loading data')
    test_name = "20201018_multiplex_seed_{}_test_256x256.npz".format(model)
    test_dict = np.load(NPZ_DIR + test_name)
    train_name = "20201018_multiplex_seed_{}_train_256x256.npz".format(model)
    train_dict = np.load(NPZ_DIR + train_name)
    y_train = train_dict['y']
    
    X_test = test_dict['X']
    X_test = multiplex_preprocess(X_test)
    
    y_test = test_dict['y']

    
    model_name = 'retina_mask_split_{}.h5'.format(model)
    backbone = 'resnet50'  # vgg16, vgg19, resnet50, densenet121, densenet169, densenet201

    # start timing
    time_start = default_timer()
    print('creating backbone levels')
    # Generate backbone information from the data
    backbone_levels, pyramid_levels, anchor_params = get_anchor_parameters(y_train.astype('int'))
    
    print('creating model')
    model = model_zoo.RetinaMask(
        backbone=backbone,
        input_shape=[256, 256, 2],
        class_specific_filter=False,
        num_classes=1,
        backbone_levels=backbone_levels,
        pyramid_levels=pyramid_levels,
        anchor_params=anchor_params
    )
    
    model.load_weights(MODEL_DIR + model_name)
    print('predicting')
    boxes, scores, labels, masks = model.predict(X_test)[-4:]
    print('postprocessing')
    masks =  retinamask_postprocess((boxes, scores, labels, masks))
    
    # end time
    time_end = default_timer()
    print("elapsed time is {}".format(time_end - time_start))
    
    # calculating accuracy
    print("calculating accuracy")
    db = DatasetBenchmarker(y_true=y_test, 
                       y_pred=np.expand_dims(masks, axis=-1),
                       tissue_list=test_dict['tissue_list'],
                       platform_list=test_dict['platform_list'],
                       model_name='default_model')
    tissue_stats, platform_stats = db.benchmark()
    
    metrics[model] = {'tissue_stats':tissue_stats, 'platform_stats': platform_stats}

    

loading data
creating backbone levels
creating model
predicting
postprocessing
elapsed time is 496.20219662506133
calculating accuracy

____________Object-based statistics____________

Number of true cells:		 139873
Number of predicted cells:	 102717

Correct detections:  84126	Recall: 60.1446%
Incorrect detections: 18591	Precision: 81.9008%

Gained detections: 11398	Perc Error: 17.6527%
Missed detections: 48870	Perc Error: 75.6876%
Merges: 1744		Perc Error: 2.701%
Splits: 2004		Perc Error: 3.1037%
Catastrophes: 552		Perc Error: 0.8549%

Gained detections from splits: 2159
Missed detections from merges: 1886
True detections involved in catastrophes: 786
Predicted detections involved in catastrophes: 807 

Average Pixel IOU (Jaccard Index): 0.7033 

uid is breast
uid is gi
uid is immune
uid is lung
uid is pancreas
uid is skin
uid is codex
uid is cycif
uid is imc
uid is mibi
uid is mxif
uid is vectra
uid is all
loading data
creating backbone levels
creating model
predicting
postprocessin

  segments = random_walker(foreground, markers)


elapsed time is 516.7985231330385
calculating accuracy

____________Object-based statistics____________

Number of true cells:		 146194
Number of predicted cells:	 104502

Correct detections:  86800	Recall: 59.3732%
Incorrect detections: 17702	Precision: 83.0606%

Gained detections: 10840	Perc Error: 16.0931%
Missed detections: 52219	Perc Error: 77.5246%
Merges: 2005		Perc Error: 2.9766%
Splits: 1759		Perc Error: 2.6114%
Catastrophes: 535		Perc Error: 0.7943%

Gained detections from splits: 1880
Missed detections from merges: 2200
True detections involved in catastrophes: 787
Predicted detections involved in catastrophes: 807 

Average Pixel IOU (Jaccard Index): 0.7019 

uid is breast
uid is gi
uid is immune
uid is lung
uid is pancreas
uid is skin
uid is codex
uid is cycif
uid is imc
uid is mibi
uid is mxif
uid is vectra
uid is all
loading data
creating backbone levels
creating model
predicting
postprocessing
elapsed time is 478.66113150992896
calculating accuracy

____________Object-b

In [37]:
np.savez_compressed(os.path.join(MODEL_DIR, 'retinamask_metrics.npz'), **new_metrics)

In [20]:
# Copyright 2016-2020 The Van Valen Lab at the California Institute of
# Technology (Caltech), with support from the Paul Allen Family Foundation,
# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01.
# All rights reserved.
#
# Licensed under a modified Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE
#
# The Work provided may be used for non-commercial academic purposes only.
# For any other use of the Work, including commercial use, please contact:
# vanvalenlab@gmail.com
#
# Neither the name of Caltech nor the names of its contributors may be used
# to endorse or promote products derived from this software without specific
# prior written permission.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np

from deepcell_toolbox.metrics import Metrics, stats_pixelbased
from scipy.stats import hmean


class DatasetBenchmarker(object):
    """Class to perform benchmarking across different tissue and platform types

    Args:
        y_true: true labels
        y_pred: predicted labels
        tissue_list: list of tissue names for each image
        platform_list: list of platform names for each image
        model_name: name of the model used to generate the predictions
        metrics_kwargs: arguments to be passed to metrics package

    Raises:
        ValueError: if y_true and y_pred have different shapes
        ValueError: if y_true and y_pred are not 4D
        ValueError: if tissue_ids or platform_ids is not same length as labels
    """
    def __init__(self,
                 y_true,
                 y_pred,
                 tissue_list,
                 platform_list,
                 model_name,
                 metrics_kwargs={}):
        if y_true.shape != y_pred.shape:
            raise ValueError('Shape mismatch: y_true has shape {}, '
                             'y_pred has shape {}. Labels must have the same'
                             'shape.'.format(y_true.shape, y_pred.shape))
        if len(y_true.shape) != 4:
            raise ValueError('Data must be 4D, supplied data is {}'.format(y_true.shape))

        self.y_true = y_true
        self.y_pred = y_pred

        if len({y_true.shape[0], len(tissue_list), len(platform_list)}) != 1:
            raise ValueError('Tissue_list and platform_list must have same length as labels')

        self.tissue_list = tissue_list
        self.platform_list = platform_list
        self.model_name = model_name
        self.metrics = Metrics(model_name, **metrics_kwargs)

    def _benchmark_category(self, category_ids):
        """Compute benchmark stats over the different categories in supplied list

        Args:
            category_ids: list specifying which category each image belongs to

        Returns:
            stats_dict: dictionary of benchmarking results
        """

        unique_ids = np.unique(category_ids)

        # create dict to hold stats across each category
        stats_dict = {}
        for uid in unique_ids:
            print("uid is {}".format(uid))
            stats_dict[uid] = {}
            category_idx = np.isin(category_ids, uid)

            # sum metrics across individual images
            for key in self.metrics.stats:
                stats_dict[uid][key] = self.metrics.stats[key][category_idx].sum()

            # compute additional metrics not produced by Metrics class
            stats_dict[uid]['recall'] = \
                stats_dict[uid]['correct_detections'] / stats_dict[uid]['n_true']

            stats_dict[uid]['precision'] = \
                stats_dict[uid]['correct_detections'] / stats_dict[uid]['n_pred']

            stats_dict[uid]['f1'] = \
                hmean([stats_dict[uid]['recall'], stats_dict[uid]['precision']])

            pixel_stats = stats_pixelbased(self.y_true[category_idx] != 0,
                                           self.y_pred[category_idx] != 0)
            stats_dict[uid]['jaccard'] = pixel_stats['jaccard']

        return stats_dict

    def benchmark(self):
        self.metrics.calc_object_stats(self.y_true, self.y_pred)
        tissue_stats = self._benchmark_category(category_ids=self.tissue_list)
        platform_stats = self._benchmark_category(category_ids=self.platform_list)
        all_stats = self._benchmark_category(category_ids=['all'] * len(self.tissue_list))
        tissue_stats['all'] = all_stats['all']
        platform_stats['all'] = all_stats['all']

        return tissue_stats, platform_stats
