In [1]:
# This notebook benchmarks the model output from versions of Mesmer trained on specific tissue types or microscope platforms
import os
import errno
import numpy as np 
import deepcell
from deepcell_toolbox.multiplex_utils import multiplex_preprocess

In [2]:
# create folder for this set of experiments
experiment_folder = "specialist_benchmarking/"
MODEL_DIR = os.path.join("/data/analyses", experiment_folder)
NPZ_DIR = '/data/npz_data/20201018_freeze/'
LOG_DIR = '/data/logs'

In [3]:
tissue_benchmark = False
if tissue_benchmark:
    model_ids = ['breast', 'gi', 'immune', 'pancreas', 'all']
else:
    model_ids = ['codex', 'cycif', 'mibi', 'vectra', 'all']

seeds = ['1', '2', '3']

In [6]:
from deepcell.model_zoo.panopticnet import PanopticNet
from deepcell_toolbox.deep_watershed import deep_watershed_mibi

metrics = dict()

for current_seed in seeds:
    print('Analyzing seed {}'.format(current_seed))
    npz_name = "20201018_multiplex_seed_{}".format(current_seed)

    test_dict = np.load(NPZ_DIR + npz_name + "_test_256x256.npz")
    X_test, y_test = test_dict['X'], test_dict['y']
    
    print("preprocessing")
    X_test = multiplex_preprocess(X_test)
    tissue_list, platform_list = test_dict['tissue_list'], test_dict['platform_list']
    
    print('Size of test is {}'.format(X_test.shape))
    
    metrics[current_seed] = {}
    for current_id in model_ids:
        print("analyzing model {}".format(current_id))
        model_name = npz_name + '_subset_100_' + current_id + '.h5'
        weights_path = os.path.join(MODEL_DIR, model_name)

        print('Loading model')
        # initialize model
        model = PanopticNet(
            backbone='resnet50',
            input_shape=(256, 256, 2),
            norm_method=None,
            num_semantic_heads=2,
            num_semantic_classes=[1, 3], # inner distance, outer distance, fgbg, pixelwise
            location=True,  # should always be true
            include_top=True,
            use_imagenet=False)

        model.load_weights(weights_path)

        print("creating predictions")
        inner_distance, pixelwise = model.predict(X_test)

        print('postprocessing')
        labeled_images = deep_watershed_mibi({'inner-distance': inner_distance,
                                         'pixelwise-interior': pixelwise[:, :, :, 1:2]}, 
                                         maxima_threshold=0.1, maxima_model_smooth=0,
                                        interior_threshold=0.3,
                                        radius=3,
                                        small_objects_threshold=10,
                                         fill_holes_threshold=10)
        print("calculating accuracy")
        db = DatasetBenchmarker(y_true=y_test, 
                           y_pred=labeled_images,
                           tissue_list=tissue_list,
                           platform_list=platform_list,
                           model_name='default_model')
        tissue_stats, platform_stats = db.benchmark()

        if tissue_benchmark:
            metrics[current_seed][current_id] = tissue_stats
        else:
            metrics[current_seed][current_id] = platform_stats

Analyzing seed 1
preprocessing
Size of test is (1286, 256, 256, 2)
analyzing model codex
Loading model


W1024 16:09:29.046036 139906648811328 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


creating predictions
postprocessing
calculating accuracy

____________Object-based statistics____________

Number of true cells:		 139873
Number of predicted cells:	 127224

Correct detections:  100699	Recall: 71.9932%
Incorrect detections: 26525	Precision: 79.1509%

Gained detections: 17327	Perc Error: 34.5297%
Missed detections: 25989	Perc Error: 51.7916%
Merges: 4763		Perc Error: 9.4918%
Splits: 1471		Perc Error: 2.9314%
Catastrophes: 630		Perc Error: 1.2555%

Gained detections from splits: 1581
Missed detections from merges: 5407
True detections involved in catastrophes: 841
Predicted detections involved in catastrophes: 777 

Average Pixel IOU (Jaccard Index): 0.7328 

uid is breast
uid is gi
uid is immune
uid is lung
uid is pancreas
uid is skin
uid is codex
uid is cycif
uid is imc
uid is mibi
uid is mxif
uid is vectra
uid is all
analyzing model cycif
Loading model
creating predictions
postprocessing
calculating accuracy

____________Object-based statistics____________

Number of 

uid is immune
uid is lung
uid is pancreas
uid is skin
uid is codex
uid is cycif
uid is imc
uid is mibi
uid is mxif
uid is vectra
uid is all
Analyzing seed 3
preprocessing
Size of test is (1256, 256, 256, 2)
analyzing model codex
Loading model
creating predictions
postprocessing
calculating accuracy

____________Object-based statistics____________

Number of true cells:		 139149
Number of predicted cells:	 132562

Correct detections:  102633	Recall: 73.7576%
Incorrect detections: 29929	Precision: 77.4226%

Gained detections: 17945	Perc Error: 37.1825%
Missed detections: 22362	Perc Error: 46.3346%
Merges: 4495		Perc Error: 9.3137%
Splits: 2644		Perc Error: 5.4784%
Catastrophes: 816		Perc Error: 1.6908%

Gained detections from splits: 3002
Missed detections from merges: 5104
True detections involved in catastrophes: 984
Predicted detections involved in catastrophes: 959 

Average Pixel IOU (Jaccard Index): 0.753 

uid is breast
uid is gi
uid is immune
uid is lung
uid is pancreas
uid is sk

In [8]:
np.savez_compressed(MODEL_DIR + '/20201018_platform_accuracy_100.npz', **metrics)

In [5]:
# Copyright 2016-2020 The Van Valen Lab at the California Institute of
# Technology (Caltech), with support from the Paul Allen Family Foundation,
# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01.
# All rights reserved.
#
# Licensed under a modified Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE
#
# The Work provided may be used for non-commercial academic purposes only.
# For any other use of the Work, including commercial use, please contact:
# vanvalenlab@gmail.com
#
# Neither the name of Caltech nor the names of its contributors may be used
# to endorse or promote products derived from this software without specific
# prior written permission.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np

from deepcell_toolbox.metrics import Metrics, stats_pixelbased
from scipy.stats import hmean


class DatasetBenchmarker(object):
    """Class to perform benchmarking across different tissue and platform types

    Args:
        y_true: true labels
        y_pred: predicted labels
        tissue_list: list of tissue names for each image
        platform_list: list of platform names for each image
        model_name: name of the model used to generate the predictions
        metrics_kwargs: arguments to be passed to metrics package

    Raises:
        ValueError: if y_true and y_pred have different shapes
        ValueError: if y_true and y_pred are not 4D
        ValueError: if tissue_ids or platform_ids is not same length as labels
    """
    def __init__(self,
                 y_true,
                 y_pred,
                 tissue_list,
                 platform_list,
                 model_name,
                 metrics_kwargs={}):
        if y_true.shape != y_pred.shape:
            raise ValueError('Shape mismatch: y_true has shape {}, '
                             'y_pred has shape {}. Labels must have the same'
                             'shape.'.format(y_true.shape, y_pred.shape))
        if len(y_true.shape) != 4:
            raise ValueError('Data must be 4D, supplied data is {}'.format(y_true.shape))

        self.y_true = y_true
        self.y_pred = y_pred

        if len({y_true.shape[0], len(tissue_list), len(platform_list)}) != 1:
            raise ValueError('Tissue_list and platform_list must have same length as labels')

        self.tissue_list = tissue_list
        self.platform_list = platform_list
        self.model_name = model_name
        self.metrics = Metrics(model_name, **metrics_kwargs)

    def _benchmark_category(self, category_ids):
        """Compute benchmark stats over the different categories in supplied list

        Args:
            category_ids: list specifying which category each image belongs to

        Returns:
            stats_dict: dictionary of benchmarking results
        """

        unique_ids = np.unique(category_ids)

        # create dict to hold stats across each category
        stats_dict = {}
        for uid in unique_ids:
            print("uid is {}".format(uid))
            stats_dict[uid] = {}
            category_idx = np.isin(category_ids, uid)

            # sum metrics across individual images
            for key in self.metrics.stats:
                stats_dict[uid][key] = self.metrics.stats[key][category_idx].sum()

            # compute additional metrics not produced by Metrics class
            stats_dict[uid]['recall'] = \
                stats_dict[uid]['correct_detections'] / stats_dict[uid]['n_true']

            stats_dict[uid]['precision'] = \
                stats_dict[uid]['correct_detections'] / stats_dict[uid]['n_pred']

            stats_dict[uid]['f1'] = \
                hmean([stats_dict[uid]['recall'], stats_dict[uid]['precision']])

            pixel_stats = stats_pixelbased(self.y_true[category_idx] != 0,
                                           self.y_pred[category_idx] != 0)
            stats_dict[uid]['jaccard'] = pixel_stats['jaccard']

        return stats_dict

    def benchmark(self):
        self.metrics.calc_object_stats(self.y_true, self.y_pred)
        tissue_stats = self._benchmark_category(category_ids=self.tissue_list)
        platform_stats = self._benchmark_category(category_ids=self.platform_list)
        all_stats = self._benchmark_category(category_ids=['all'] * len(self.tissue_list))
        tissue_stats['all'] = all_stats['all']
        platform_stats['all'] = all_stats['all']

        return tissue_stats, platform_stats
