In [1]:
#stolen from https://www.kaggle.com/keegil/keras-u-net-starter-lb-0-277
import os
import sys
import random
from collections import OrderedDict
import itertools
import math
import logging
import json
import re

import numpy as np
import pandas as pd

import cv2
import utils
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.lines as lines
from matplotlib.patches import Polygon

from tqdm import tqdm
from itertools import chain
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from skimage.morphology import label

from keras.models import Model, load_model
from keras.layers import Input, AlphaDropout, Activation
from keras.layers.core import Dropout, Lambda
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import SGD
from keras import layers
from keras import backend as K

import tensorflow as tf

# Set some parameters
IMG_WIDTH = 672
IMG_HEIGHT = 672
IMG_CHANNELS = 3
TRAIN_PATH = 'stage1_train/'
TEST_PATH = 'stage2_test/'

# Get train and test IDs
train_ids = next(os.walk(TRAIN_PATH))[1]
test_ids = next(os.walk(TEST_PATH))[1]

  (fname, cnt))
  (fname, cnt))
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# initializes the training images and target images (masks) as arrays of zeros
X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 3), dtype=np.uint8)
Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)
#gets and resizes the images each containing several nuclei
for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
    path = TRAIN_PATH + id_
    img = cv2.imread(path + '/images/' + id_ + '.png')[:,:,:3]
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_train[n] = img
    mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)
    #gets and resizes the masks that contain the individual segmented nuclei
    #the targets for the model
    for mask_file in next(os.walk(path + '/masks/'))[2]:
        mask_ = cv2.imread(path + '/masks/' + mask_file,0)
        mask_ = np.expand_dims(resize(mask_, (672, 672), mode='constant', 
                                      preserve_range=True), axis=-1)
        mask = np.maximum(mask, mask_)
    Y_train[n] = mask

# initializes, gets, and resizes test images
X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, 3), dtype=np.uint8)
sizes_test = []
for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
    path = TEST_PATH + id_
    img = cv2.imread(path + '/images/' + id_ + '.png')[:,:,:3]
    sizes_test.append([img.shape[0], img.shape[1]])
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_test[n] = img

print('Done!')

100%|██████████| 664/664 [09:04<00:00,  1.22it/s]
100%|██████████| 3019/3019 [02:54<00:00, 17.34it/s]

Done!





In [3]:
from sklearn.model_selection import train_test_split
x_train, x_valid, y_train, y_valid = train_test_split(X_train, Y_train, test_size=0.1, random_state=8011)

In [4]:
from keras.losses import binary_crossentropy

# Define IoU metric
def mean_iou(y_true, y_pred):
    prec = []
    for t in np.arange(0.5, 1.0, 0.05):
        y_pred_ = tf.to_int32(y_pred > t)
        score, up_opt = tf.metrics.mean_iou(y_true, y_pred_, 2)
        K.get_session().run(tf.local_variables_initializer())
        with tf.control_dependencies([up_opt]):
            score = tf.identity(score)
        prec.append(score)
    return K.mean(K.stack(prec), axis=0)

def get_crop_shape(target, refer):
    # width, the 3rd dimension
    cw = (target.get_shape()[2] - refer.get_shape()[2]).value
    assert (cw >= 0)
    if cw % 2 != 0:
        cw1, cw2 = int(cw/2), int(cw/2) + 1
    else:
        cw1, cw2 = int(cw/2), int(cw/2)
    # height, the 2nd dimension
    ch = (target.get_shape()[1] - refer.get_shape()[1]).value
    assert (ch >= 0)
    if ch % 2 != 0:
        ch1, ch2 = int(ch/2), int(ch/2) + 1
    else:
        ch1, ch2 = int(ch/2), int(ch/2)

    return (ch1, ch2), (cw1, cw2)

In [5]:
#from https://github.com/zizhaozhang/unet-tensorflow-keras/blob/master/model.py

concat_axis = 3
inputs = Input((IMG_HEIGHT, IMG_WIDTH, 3))

conv1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same', name='conv1_1')(inputs)
conv1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
conv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv2)

conv3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
conv3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
pool3 = layers.MaxPooling2D(pool_size=(2, 2))(conv3)

conv4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
conv4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
conv4 = Dropout(0.5)(conv4)
pool4 = layers.MaxPooling2D(pool_size=(2, 2))(conv4)

conv5 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
conv5 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)
conv5 = Dropout(0.5)(conv5)

up_conv5 = layers.UpSampling2D(size=(2, 2))(conv5)
ch, cw = get_crop_shape(conv4, up_conv5)
crop_conv4 = layers.Cropping2D(cropping=(ch,cw))(conv4)
up6 = layers.concatenate([up_conv5, crop_conv4], axis=concat_axis)
conv6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
conv6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

up_conv6 = layers.UpSampling2D(size=(2, 2))(conv6)
ch, cw = get_crop_shape(conv3, up_conv6)
crop_conv3 = layers.Cropping2D(cropping=(ch,cw))(conv3)
up7 = layers.concatenate([up_conv6, crop_conv3], axis=concat_axis) 
conv7 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
conv7 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

up_conv7 = layers.UpSampling2D(size=(2, 2))(conv7)
ch, cw = get_crop_shape(conv2, up_conv7)
crop_conv2 = layers.Cropping2D(cropping=(ch,cw))(conv2)
up8 = layers.concatenate([up_conv7, crop_conv2], axis=concat_axis)
conv8 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
conv8 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

up_conv8 = layers.UpSampling2D(size=(2, 2))(conv8)
ch, cw = get_crop_shape(conv1, up_conv8)
crop_conv1 = layers.Cropping2D(cropping=(ch,cw))(conv1)
up9 = layers.concatenate([up_conv8, crop_conv1], axis=concat_axis)
conv9 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
conv9 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

ch, cw = get_crop_shape(inputs, conv9)
conv9 = layers.ZeroPadding2D(padding=((ch[0], ch[1]), (cw[0], cw[1])))(conv9)
conv10 = layers.Conv2D(1, (1, 1), activation='sigmoid')(conv9)

model = Model(inputs=[inputs], outputs=[conv10])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[mean_iou])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 672, 672, 3)  0                                            
__________________________________________________________________________________________________
conv1_1 (Conv2D)                (None, 672, 672, 32) 896         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 672, 672, 32) 9248        conv1_1[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 336, 336, 32) 0           conv2d_1[0][0]                   
__________________________________________________________________________________________________
conv2d_2 (

In [6]:
"""
from imgaug import augmenters as iaa

aug = iaa.Sequential([iaa.Fliplr(0.5), iaa.Flipud(0.5),
                      iaa.Affine(rotate=(-45,45),translate_px={"x": (-40, 40)}),
                      iaa.PiecewiseAffine(scale=(0.01, 0.2)),
                      iaa.ElasticTransformation(alpha=(0, 5.0), sigma=0.25)])

aug_val = iaa.Sequential([])

x_train_aug = aug.augment_images(x_train)
y_train_aug = aug.augment_images(y_train)
x_valid_aug = aug_val.augment_images(x_valid)
x_valid_aug = aug_val.augment_images(y_valid)
"""


'\nfrom imgaug import augmenters as iaa\n\naug = iaa.Sequential([iaa.Fliplr(0.5), iaa.Flipud(0.5),\n                      iaa.Affine(rotate=(-45,45),translate_px={"x": (-40, 40)}),\n                      iaa.PiecewiseAffine(scale=(0.01, 0.2)),\n                      iaa.ElasticTransformation(alpha=(0, 5.0), sigma=0.25)])\n\naug_val = iaa.Sequential([])\n\nx_train_aug = aug.augment_images(x_train)\ny_train_aug = aug.augment_images(y_train)\nx_valid_aug = aug_val.augment_images(x_valid)\nx_valid_aug = aug_val.augment_images(y_valid)\n'

In [7]:

from keras.preprocessing.image import ImageDataGenerator



data_gen_args = dict(horizontal_flip=True,
                         vertical_flip=True,
                         rotation_range=90.,
                         width_shift_range=0.1,
                         height_shift_range=0.1,
                         zoom_range=0.1)
image_datagen = ImageDataGenerator(**data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)
image_datagen.fit(x_train, seed=7)
mask_datagen.fit(y_train, seed=7)
image_generator = image_datagen.flow(x_train, batch_size=1, seed=7)
mask_generator = mask_datagen.flow(y_train, batch_size=1, seed=7)
train_generator = zip(image_generator, mask_generator)

val_gen_args = dict()
image_datagen_val = ImageDataGenerator(**val_gen_args)
mask_datagen_val = ImageDataGenerator(**val_gen_args)
image_datagen_val.fit(x_valid, seed=7)
mask_datagen_val.fit(y_valid, seed=7)
image_generator_val = image_datagen_val.flow(x_valid, batch_size=1, seed=7)
mask_generator_val = mask_datagen_val.flow(y_valid, batch_size=1, seed=7)
valid_generator=zip(image_generator_val, mask_generator_val)



In [8]:
checkpoint = ModelCheckpoint('unet.h5', monitor='val_loss', verbose=1, save_best_only=True)
model.fit_generator(train_generator, steps_per_epoch=len(x_train), epochs=2,
                    validation_data=valid_generator, validation_steps=len(x_valid), callbacks = [checkpoint])
    

Epoch 1/2

Epoch 00001: val_loss improved from inf to 0.41531, saving model to unet.h5
Epoch 2/2

Epoch 00002: val_loss did not improve


<keras.callbacks.History at 0x7f69b7c44ba8>

In [9]:
# Predict on train, val and test
model = load_model('unet.h5', custom_objects={'mean_iou': mean_iou, 'get_crop_shape':get_crop_shape})
preds_test = model.predict(X_test,  batch_size=1, verbose=1)

# Threshold predictions
preds_test_t = (preds_test > 0.5).astype(np.uint8)

# Create list of upsampled test masks
preds_test_upsampled = []
for i in range(len(preds_test)):
    preds_test_upsampled.append(resize(np.squeeze(preds_test[i]), 
                                       (sizes_test[i][0], sizes_test[i][1]), 
                                       mode='constant', preserve_range=True))
    
def rle_encoding(x):
    dots = np.where(x.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def prob_to_rles(x, cutoff=0.5):
    lab_img = label(x > cutoff)
    for i in range(1, lab_img.max() + 1):
        yield rle_encoding(lab_img == i)
        
new_test_ids = []
rles = []
for n, id_ in enumerate(test_ids):
    rle = list(prob_to_rles(preds_test_upsampled[n]))
    rles.extend(rle)
    new_test_ids.extend([id_] * len(rle))
    
# Create submission DataFrame
sub = pd.DataFrame()
sub['ImageId'] = new_test_ids
sub['EncodedPixels'] = pd.Series(rles).apply(lambda x: ' '.join(str(y) for y in x))
sub.groupby('ImageId').ngroup()
print(len(sub))
sub.to_csv('UNet_crop_aug_larger-img_size_imgaug.csv', index=False)

31802
