In [1]:
# This notebook is to benchmark the output from the Mesmer model trained on increasing fractions of the TissueNet dataset
import os
import errno
import numpy as np 
import deepcell
from deepcell_toolbox.multiplex_utils import multiplex_preprocess

In [2]:
# create folder for this set of experiments
experiment_folder = "size_benchmarking"
MODEL_DIR = os.path.join("/data/analyses", experiment_folder)
NPZ_DIR = "/data/npz_data/20201018_freeze/"
LOG_DIR = '/data/logs'

In [14]:
from deepcell.utils.data_utils import get_data
from skimage.segmentation import relabel_sequential

npz_name = "20201018_multiplex_seed_3_"

test_dict = np.load(NPZ_DIR + npz_name + "test_256x256.npz")
X_test, y_test = test_dict['X'], test_dict['y']
tissue_list, platform_list = test_dict['tissue_list'], test_dict['platform_list']

In [15]:
X_test = multiplex_preprocess(X_test)

In [10]:
model_ids = ['1' ,'3', '10', '33', '100', '333', '1000', '2665']

In [20]:
from deepcell.model_zoo.panopticnet import PanopticNet
from deepcell_toolbox.deep_watershed import deep_watershed_mibi

metrics = dict()
for current_id in model_ids:
    print("analyzing model {}".format(current_id))
    model_name = npz_name + '_subset_' + current_id + '.h5'
    weights_path = os.path.join(MODEL_DIR, model_name)
    
    print('Loading model')
    # initialize model
    model = PanopticNet(
        backbone='resnet50',
        input_shape=(256, 256, 2),
        norm_method=None,
        num_semantic_heads=2,
        num_semantic_classes=[1, 3], # inner distance, outer distance, fgbg, pixelwise
        location=True,  # should always be true
        include_top=True,
        use_imagenet=False)
    
    model.load_weights(weights_path)
    
    print("creating predictions")
    inner_distance, pixelwise = model.predict(X_test)
    
    print('postprocessing')
    labeled_images = deep_watershed_mibi({'inner-distance': inner_distance,
                                     'pixelwise-interior': pixelwise[:, :, :, 1:2]}, 
                                     maxima_threshold=0.1, maxima_model_smooth=0,
                                    interior_threshold=0.3, interior_model_smooth=2,
                                    radius=3,
                                    small_objects_threshold=10,
                                     fill_holes_threshold=10)
    print("calculating accuracy")
    db = DatasetBenchmarker(y_true=y_test, 
                       y_pred=labeled_images,
                       tissue_list=tissue_list,
                       platform_list=platform_list,
                       model_name='default_model')
    tissue_stats, platform_stats = db.benchmark()
    
    metrics[current_id] = {'tissue_stats':tissue_stats, 'platform_stats': platform_stats}

analyzing model 1
Loading model
creating predictions
postprocessing
calculating accuracy

____________Object-based statistics____________

Number of true cells:		 139149
Number of predicted cells:	 128567

Correct detections:  79602	Recall: 57.2063%
Incorrect detections: 48965	Precision: 61.9148%

Gained detections: 27296	Perc Error: 38.2126%
Missed detections: 28863	Perc Error: 40.4063%
Merges: 10099		Perc Error: 14.1379%
Splits: 4213		Perc Error: 5.8979%
Catastrophes: 961		Perc Error: 1.3453%

Gained detections from splits: 5210
Missed detections from merges: 13886
True detections involved in catastrophes: 1286
Predicted detections involved in catastrophes: 1122 

Average Pixel IOU (Jaccard Index): 0.7062 

uid is breast
uid is gi
uid is immune
uid is lung
uid is pancreas
uid is skin
uid is codex
uid is cycif
uid is imc
uid is mibi
uid is mxif
uid is vectra
uid is all
analyzing model 3
Loading model
creating predictions
postprocessing
calculating accuracy

____________Object-based st

In [24]:
metrics['2665']['tissue_stats']['all']['f1']

0.8222660212747457

In [25]:
np.savez_compressed(os.path.join(MODEL_DIR, 'size_subset_metrics_seed_3.npz'), **metrics)

In [11]:
# Copyright 2016-2020 The Van Valen Lab at the California Institute of
# Technology (Caltech), with support from the Paul Allen Family Foundation,
# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01.
# All rights reserved.
#
# Licensed under a modified Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE
#
# The Work provided may be used for non-commercial academic purposes only.
# For any other use of the Work, including commercial use, please contact:
# vanvalenlab@gmail.com
#
# Neither the name of Caltech nor the names of its contributors may be used
# to endorse or promote products derived from this software without specific
# prior written permission.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np

from deepcell_toolbox.metrics import Metrics, stats_pixelbased
from scipy.stats import hmean


class DatasetBenchmarker(object):
    """Class to perform benchmarking across different tissue and platform types

    Args:
        y_true: true labels
        y_pred: predicted labels
        tissue_list: list of tissue names for each image
        platform_list: list of platform names for each image
        model_name: name of the model used to generate the predictions
        metrics_kwargs: arguments to be passed to metrics package

    Raises:
        ValueError: if y_true and y_pred have different shapes
        ValueError: if y_true and y_pred are not 4D
        ValueError: if tissue_ids or platform_ids is not same length as labels
    """
    def __init__(self,
                 y_true,
                 y_pred,
                 tissue_list,
                 platform_list,
                 model_name,
                 metrics_kwargs={}):
        if y_true.shape != y_pred.shape:
            raise ValueError('Shape mismatch: y_true has shape {}, '
                             'y_pred has shape {}. Labels must have the same'
                             'shape.'.format(y_true.shape, y_pred.shape))
        if len(y_true.shape) != 4:
            raise ValueError('Data must be 4D, supplied data is {}'.format(y_true.shape))

        self.y_true = y_true
        self.y_pred = y_pred

        if len({y_true.shape[0], len(tissue_list), len(platform_list)}) != 1:
            raise ValueError('Tissue_list and platform_list must have same length as labels')

        self.tissue_list = tissue_list
        self.platform_list = platform_list
        self.model_name = model_name
        self.metrics = Metrics(model_name, **metrics_kwargs)

    def _benchmark_category(self, category_ids):
        """Compute benchmark stats over the different categories in supplied list

        Args:
            category_ids: list specifying which category each image belongs to

        Returns:
            stats_dict: dictionary of benchmarking results
        """

        unique_ids = np.unique(category_ids)

        # create dict to hold stats across each category
        stats_dict = {}
        for uid in unique_ids:
            print("uid is {}".format(uid))
            stats_dict[uid] = {}
            category_idx = np.isin(category_ids, uid)

            # sum metrics across individual images
            for key in self.metrics.stats:
                stats_dict[uid][key] = self.metrics.stats[key][category_idx].sum()

            # compute additional metrics not produced by Metrics class
            stats_dict[uid]['recall'] = \
                stats_dict[uid]['correct_detections'] / stats_dict[uid]['n_true']

            stats_dict[uid]['precision'] = \
                stats_dict[uid]['correct_detections'] / stats_dict[uid]['n_pred']

            stats_dict[uid]['f1'] = \
                hmean([stats_dict[uid]['recall'], stats_dict[uid]['precision']])

            pixel_stats = stats_pixelbased(self.y_true[category_idx] != 0,
                                           self.y_pred[category_idx] != 0)
            stats_dict[uid]['jaccard'] = pixel_stats['jaccard']

        return stats_dict

    def benchmark(self):
        self.metrics.calc_object_stats(self.y_true, self.y_pred)
        tissue_stats = self._benchmark_category(category_ids=self.tissue_list)
        platform_stats = self._benchmark_category(category_ids=self.platform_list)
        all_stats = self._benchmark_category(category_ids=['all'] * len(self.tissue_list))
        tissue_stats['all'] = all_stats['all']
        platform_stats['all'] = all_stats['all']

        return tissue_stats, platform_stats
