In [1]:
# This notebook trains models on increasingly large fractions of the TissueNet dataset
import os
import errno
import numpy as np 
import deepcell
from deepcell_toolbox.multiplex_utils import multiplex_preprocess

In [2]:
# create folder for this set of experiments
experiment_folder = "size_benchmarking"
MODEL_DIR = os.path.join("/data/analyses", experiment_folder)
NPZ_DIR = "/data/npz_data/20201018_freeze/"
LOG_DIR = '/data/logs'

if not os.path.isdir(MODEL_DIR):
    os.makedirs(MODEL_DIR)

In [10]:
from deepcell.utils.data_utils import get_data
from skimage.segmentation import relabel_sequential

npz_name = "20201018_multiplex_seed_2_"

train_dict = np.load(NPZ_DIR + npz_name + "train_512x512_split.npz", allow_pickle=True)
val_dict = np.load(NPZ_DIR + npz_name + "val_256x256_split.npz", allow_pickle=True)

In [11]:
train_keys, val_keys = list(train_dict.keys()), list(val_dict.keys())
print(train_keys)

['1', '3', '10', '33', '100', '333', '1000', '2665']


In [12]:
from deepcell.model_zoo.panopticnet import PanopticNet
from tensorflow.keras.optimizers import SGD, Adam
from deepcell.utils.train_utils import rate_scheduler
from deepcell import image_generators
from deepcell.utils import train_utils
from tensorflow.python.keras.losses import MSE
from deepcell import losses
from deepcell.utils.train_utils import get_callbacks
from deepcell.utils.train_utils import count_gpus

for idx in range(6, len(train_keys)):
    print("Training model for {}".format(train_keys[idx]))
    train_key, val_key = train_keys[idx], val_keys[idx]
    
    # initialize new model
    new_model = PanopticNet(
        backbone='resnet50',
        input_shape=(256, 256, 2),
        norm_method=None,
        num_semantic_heads=2,
        num_semantic_classes=[1, 3], # inner distance, pixelwise
        location=True,  # should always be true
        include_top=True)
    
    
    X_train = train_dict[train_key].item()['X']
    X_train = multiplex_preprocess(X_train)
    y_train = train_dict[train_key].item()['y']
    print("X_train shape is {}, y_train shape is {}".format(X_train.shape, y_train.shape))
    
    
    X_val = val_dict[val_key].item()['X']
    X_val = multiplex_preprocess(X_val)
    y_val = val_dict[val_key].item()['y']
    print("X_val shape is {}, y_val shape is {}".format(X_val.shape, y_val.shape))
        
    # set up training parameters
    model_name = npz_name + '_subset_' + train_key
    n_epoch = 100  # Number of training epochs

    optimizer = Adam(lr=1e-4, clipnorm=0.001)
    lr_sched = rate_scheduler(lr=1e-4, decay=0.99)

    batch_size = 8

    min_objects = 0  # throw out images with fewer than this many objects
    seed=0
    print("Model name is {}".format(model_name))
    
    # create augmented dataset
    datagen = image_generators.CroppingDataGenerator(
        rotation_range=180,
        shear_range=0,
        zoom_range=(0.7, 1/0.7),
        horizontal_flip=True,
        vertical_flip=True,
        crop_size=(256, 256),
        float_dtype='float16',
        int_dtype='int16')

    datagen_val = image_generators.SemanticDataGenerator(
        rotation_range=0,
        shear_range=0,
        zoom_range=0,
        horizontal_flip=0,
        vertical_flip=0,
        float_dtype='float16',
        int_dtype='int16')

    train_data = datagen.flow(
        {'X': X_train, 'y': y_train},
        seed=seed,
        transforms=['inner-distance', 'pixelwise'],
        transforms_kwargs={'pixelwise':{'dilation_radius': 1}, 
                          'inner-distance': {'erosion_width': 1, 'alpha': 'auto'}},
        min_objects=min_objects,
        batch_size=batch_size)

    val_data = datagen_val.flow(
        {'X': X_val, 'y': y_val},
        seed=seed,
        transforms=['inner-distance', 'pixelwise'],
        transforms_kwargs={'pixelwise':{'dilation_radius': 1},
                          'inner-distance': {'erosion_width': 1, 'alpha': 'auto'}},
        min_objects=min_objects,
        batch_size=batch_size)
    
    print('generators created')
    
    # set up losses
    def semantic_loss(n_classes):
        def _semantic_loss(y_pred, y_true):
            if n_classes > 1:
                return 0.01 * losses.weighted_categorical_crossentropy(
                    y_pred, y_true, n_classes=n_classes)
            return MSE(y_pred, y_true)
        return _semantic_loss


    loss = {}

    # Give losses for all of the semantic heads
    for layer in new_model.layers:
        if layer.name.startswith('semantic_'):
            n_classes = layer.output_shape[-1]
            loss[layer.name] = semantic_loss(n_classes)
            
    # compile model
    new_model.compile(loss=loss, optimizer=optimizer)
    
    # train model
    model_path = os.path.join(MODEL_DIR, '{}.h5'.format(model_name))
    loss_path = os.path.join(MODEL_DIR, '{}.npz'.format(model_name))

    num_gpus = count_gpus()

    print('Training on', num_gpus, 'GPUs.')

    train_callbacks = get_callbacks(
        model_path,
        lr_sched=lr_sched,
        #tensorboard_log_dir=LOG_DIR,
        save_weights_only=num_gpus >= 2,
        monitor='val_loss',
        verbose=1)

    loss_history = new_model.fit_generator(
        train_data,
        steps_per_epoch=333,
        epochs=n_epoch,
        validation_data=val_data,
        validation_steps=val_data.y.shape[0] // batch_size,
        callbacks=train_callbacks)


Training model for 1000
X_train shape is (1000, 512, 512, 2), y_train shape is (1000, 512, 512, 1)


W1027 04:30:31.265104 139858664515392 semantic.py:111] X data dtype is float32: this will increase memory use during preprocessing. Consider using a smaller dtype


X_val shape is (1230, 256, 256, 2), y_val shape is (1230, 256, 256, 1)
Model name is 20201018_multiplex_seed_2__subset_1000


W1027 04:33:14.317371 139858664515392 semantic.py:111] X data dtype is float32: this will increase memory use during preprocessing. Consider using a smaller dtype


generators created
Training on 1 GPUs.
Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.15149, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_1000.h5
Epoch 2/100
Epoch 00002: val_loss improved from 0.15149 to 0.05394, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_1000.h5
Epoch 3/100
Epoch 00003: val_loss improved from 0.05394 to 0.01832, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_1000.h5
Epoch 4/100
Epoch 00004: val_loss improved from 0.01832 to 0.01660, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_1000.h5
Epoch 5/100
Epoch 00005: val_loss improved from 0.01660 to 0.01574, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_1000.h5
Epoch 6/100
Epoch 00006: val_loss did not improve from 0.01574
Epoch 7/100
Epoch 00007: val_loss improved from 0.01574 to 0.01521, saving model to /data/analyses/size_ben

Epoch 20/100
Epoch 00020: val_loss improved from 0.01360 to 0.01356, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_1000.h5
Epoch 21/100
Epoch 00021: val_loss did not improve from 0.01356
Epoch 22/100
Epoch 00022: val_loss improved from 0.01356 to 0.01336, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_1000.h5
Epoch 23/100
Epoch 00023: val_loss did not improve from 0.01336
Epoch 24/100
Epoch 00024: val_loss improved from 0.01336 to 0.01328, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_1000.h5
Epoch 25/100
Epoch 00025: val_loss did not improve from 0.01328
Epoch 26/100
Epoch 00026: val_loss did not improve from 0.01328
Epoch 27/100
Epoch 00027: val_loss did not improve from 0.01328
Epoch 28/100
Epoch 00028: val_loss improved from 0.01328 to 0.01292, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_1000.h5
Epoch 29/100
Epoch 00029: val_loss did n

Epoch 39/100
Epoch 00039: val_loss did not improve from 0.01271
Epoch 40/100
Epoch 00040: val_loss improved from 0.01271 to 0.01262, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_1000.h5
Epoch 41/100
Epoch 00041: val_loss did not improve from 0.01262
Epoch 42/100
Epoch 00042: val_loss did not improve from 0.01262
Epoch 43/100
Epoch 00043: val_loss did not improve from 0.01262
Epoch 44/100
Epoch 00044: val_loss did not improve from 0.01262
Epoch 45/100
Epoch 00045: val_loss improved from 0.01262 to 0.01248, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_1000.h5
Epoch 46/100
Epoch 00046: val_loss did not improve from 0.01248
Epoch 47/100
Epoch 00047: val_loss did not improve from 0.01248
Epoch 48/100
Epoch 00048: val_loss did not improve from 0.01248
Epoch 49/100
Epoch 00049: val_loss improved from 0.01248 to 0.01246, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_1000.h5
Epoch 50

Epoch 00079: val_loss did not improve from 0.01215
Epoch 80/100
Epoch 00080: val_loss did not improve from 0.01215
Epoch 81/100
Epoch 00081: val_loss did not improve from 0.01215
Epoch 82/100
Epoch 00082: val_loss did not improve from 0.01215
Epoch 83/100
Epoch 00083: val_loss did not improve from 0.01215
Epoch 84/100
 73/333 [=====>........................] - ETA: 50s - loss: 0.0141 - semantic_0_loss: 0.0098 - semantic_1_loss: 0.0043
Epoch 00084: val_loss did not improve from 0.01215
Epoch 85/100
Epoch 00085: val_loss did not improve from 0.01215
Epoch 86/100
Epoch 00086: val_loss did not improve from 0.01215
Epoch 87/100
Epoch 00087: val_loss did not improve from 0.01215
Epoch 88/100
Epoch 00088: val_loss did not improve from 0.01215
Epoch 89/100
Epoch 00089: val_loss did not improve from 0.01215
Epoch 90/100
Epoch 00090: val_loss did not improve from 0.01215
Epoch 91/100
Epoch 00091: val_loss did not improve from 0.01215
Epoch 92/100
Epoch 00092: val_loss did not improve from 0.0121

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00003: val_loss improved from 0.06243 to 0.01820, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_2665.h5
Epoch 4/100
Epoch 00004: val_loss improved from 0.01820 to 0.01557, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_2665.h5
Epoch 5/100
Epoch 00005: val_loss did not improve from 0.01557
Epoch 6/100
Epoch 00006: val_loss improved from 0.01557 to 0.01459, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_2665.h5
Epoch 7/100
Epoch 00007: val_loss did not improve from 0.01459
Epoch 8/100
Epoch 00008: val_loss improved from 0.01459 to 0.01424, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_2665.h5
Epoch 9/100
Epoch 00009: val_loss did not improve from 0.01424
Epoch 10/100
Epoch 00010: val_loss did not improve from 0.01424
Epoch 11/100
Epoch 00011: val_loss improved from 0.01424 to 0.01392, saving model to /data/analyses/size_benchmarking/20201

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00022: val_loss did not improve from 0.01314
Epoch 23/100
Epoch 00023: val_loss did not improve from 0.01314
Epoch 24/100
Epoch 00024: val_loss improved from 0.01314 to 0.01312, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_2665.h5
Epoch 25/100
Epoch 00025: val_loss did not improve from 0.01312
Epoch 26/100
Epoch 00026: val_loss improved from 0.01312 to 0.01290, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_2665.h5
Epoch 27/100
Epoch 00027: val_loss did not improve from 0.01290
Epoch 28/100
Epoch 00028: val_loss did not improve from 0.01290
Epoch 29/100
Epoch 00029: val_loss improved from 0.01290 to 0.01287, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_2665.h5
Epoch 30/100
Epoch 00030: val_loss did not improve from 0.01287
Epoch 31/100

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00042: val_loss did not improve from 0.01249
Epoch 43/100
Epoch 00043: val_loss did not improve from 0.01249
Epoch 44/100
Epoch 00044: val_loss did not improve from 0.01249
Epoch 45/100
Epoch 00045: val_loss improved from 0.01249 to 0.01247, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_2665.h5
Epoch 46/100
Epoch 00046: val_loss did not improve from 0.01247
Epoch 47/100
Epoch 00047: val_loss did not improve from 0.01247
Epoch 48/100
Epoch 00048: val_loss did not improve from 0.01247
Epoch 49/100
Epoch 00049: val_loss did not improve from 0.01247
Epoch 50/100
Epoch 00050: val_loss did not improve from 0.01247
Epoch 51/100
  3/333 [..............................] - ETA: 51s - loss: 0.0146 - semantic_0_loss: 0.0104 - semantic_1_loss: 0.0042

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00061: val_loss did not improve from 0.01247
Epoch 62/100
Epoch 00062: val_loss improved from 0.01247 to 0.01242, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_2665.h5
Epoch 63/100
Epoch 00063: val_loss did not improve from 0.01242
Epoch 64/100
Epoch 00064: val_loss did not improve from 0.01242
Epoch 65/100
Epoch 00065: val_loss did not improve from 0.01242
Epoch 66/100
Epoch 00066: val_loss did not improve from 0.01242
Epoch 67/100
Epoch 00067: val_loss improved from 0.01242 to 0.01230, saving model to /data/analyses/size_benchmarking/20201018_multiplex_seed_2__subset_2665.h5
Epoch 68/100
Epoch 00068: val_loss did not improve from 0.01230
Epoch 69/100
Epoch 00069: val_loss did not improve from 0.01230
Epoch 70/100

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00080: val_loss did not improve from 0.01221
Epoch 81/100
Epoch 00081: val_loss did not improve from 0.01221
Epoch 82/100
Epoch 00082: val_loss did not improve from 0.01221
Epoch 83/100
Epoch 00083: val_loss did not improve from 0.01221
Epoch 84/100
Epoch 00084: val_loss did not improve from 0.01221
Epoch 85/100
Epoch 00085: val_loss did not improve from 0.01221
Epoch 86/100
Epoch 00086: val_loss did not improve from 0.01221
Epoch 87/100
Epoch 00087: val_loss did not improve from 0.01221
Epoch 88/100
Epoch 00088: val_loss did not improve from 0.01221
Epoch 89/100

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00099: val_loss did not improve from 0.01210
Epoch 100/100
Epoch 00100: val_loss did not improve from 0.01210
