In [15]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from tqdm import tqdm

from keras.models import Model, load_model, save_model
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras import optimizers
from keras.preprocessing.image import array_to_img, img_to_array, load_img

from utils import *
from unet_model import *
from custom_metrics import *
from lovasz_loss import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
img_size_ori = 101
img_size_target = 101

In [3]:
train_df = pd.read_csv('datasets/train.csv', index_col='id', usecols=[0])
depths_df = pd.read_csv('datasets/depths.csv', index_col='id')
train_df = train_df.join(depths_df)
test_df = depths_df[~depths_df.index.isin(train_df.index)]

len(train_df)

4000

In [4]:
train_df['images'] = [np.array(load_img('datasets/train/images/{}.png'.format(idx), 
                                        color_mode='grayscale')) / 255. for idx in tqdm(train_df.index)]

100%|█████████████████████████████████████████████████████████████████████████████| 4000/4000 [00:29<00:00, 136.72it/s]


In [5]:
train_df['masks'] = [np.array(load_img('datasets/train/masks/{}.png'.format(idx), 
                                       color_mode='grayscale')) / 255 for idx in tqdm(train_df.index)]

100%|████████████████████████████████████████████████████████████████████████████| 4000/4000 [00:02<00:00, 1712.58it/s]


In [6]:
train_df['coverage'] = train_df.masks.map(np.sum) / pow(img_size_ori, 2)
train_df['coverage_class'] = train_df.coverage.map(cov_to_class)

In [19]:
ids_train, ids_valid, X_train, X_valid, y_train, y_valid, cov_train, cov_test, depth_train, depth_test = \
train_test_split(train_df.index.values, 
                 np.array(train_df.images.map(upsample).tolist()).reshape(-1, img_size_target, img_size_target, 1),
                 np.array(train_df.masks.map(upsample).tolist()).reshape(-1, img_size_target, img_size_target, 1),
                 train_df.coverage.values, 
                 train_df.z.values, 
                 test_size=0.2, stratify=train_df.coverage_class, random_state=555)

In [22]:
X_train = data_augmentation(X_train)
y_train = data_augmentation(y_train)

In [16]:
# model
input_layer = Input((img_size_target, img_size_target, 1))
output_layer = build_model(input_layer, 16)

model1 = Model(input_layer, output_layer)

c = optimizers.adam(lr = 0.005)
model1.compile(loss='binary_crossentropy', optimizer=c, metrics=[my_iou_metric])

model1.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 101, 101, 1)  0                                            
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 101, 101, 16) 160         input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_2 (BatchNor (None, 101, 101, 16) 64          conv2d_3[0][0]                   
__________________________________________________________________________________________________
activation_2 (Activation)       (None, 101, 101, 16) 0           batch_normalization_2[0][0]      
__________________________________________________________________________________________________
conv2d_4 (

__________________________________________________________________________________________________
dropout_2 (Dropout)             (None, 25, 25, 32)   0           max_pooling2d_2[0][0]            
__________________________________________________________________________________________________
conv2d_13 (Conv2D)              (None, 25, 25, 64)   18496       dropout_2[0][0]                  
__________________________________________________________________________________________________
batch_normalization_12 (BatchNo (None, 25, 25, 64)   256         conv2d_13[0][0]                  
__________________________________________________________________________________________________
activation_12 (Activation)      (None, 25, 25, 64)   0           batch_normalization_12[0][0]     
__________________________________________________________________________________________________
conv2d_14 (Conv2D)              (None, 25, 25, 64)   36928       activation_12[0][0]              
__________

__________________________________________________________________________________________________
conv2d_23 (Conv2D)              (None, 6, 6, 256)    295168      dropout_4[0][0]                  
__________________________________________________________________________________________________
batch_normalization_22 (BatchNo (None, 6, 6, 256)    1024        conv2d_23[0][0]                  
__________________________________________________________________________________________________
activation_22 (Activation)      (None, 6, 6, 256)    0           batch_normalization_22[0][0]     
__________________________________________________________________________________________________
conv2d_24 (Conv2D)              (None, 6, 6, 256)    590080      activation_22[0][0]              
__________________________________________________________________________________________________
batch_normalization_23 (BatchNo (None, 6, 6, 256)    1024        conv2d_24[0][0]                  
__________

concatenate_2 (Concatenate)     (None, 25, 25, 128)  0           conv2d_transpose_2[0][0]         
                                                                 activation_16[0][0]              
__________________________________________________________________________________________________
dropout_6 (Dropout)             (None, 25, 25, 128)  0           concatenate_2[0][0]              
__________________________________________________________________________________________________
conv2d_33 (Conv2D)              (None, 25, 25, 64)   73792       dropout_6[0][0]                  
__________________________________________________________________________________________________
batch_normalization_32 (BatchNo (None, 25, 25, 64)   256         conv2d_33[0][0]                  
__________________________________________________________________________________________________
activation_32 (Activation)      (None, 25, 25, 64)   0           batch_normalization_32[0][0]     
__________

activation_41 (Activation)      (None, 50, 50, 32)   0           batch_normalization_41[0][0]     
__________________________________________________________________________________________________
conv2d_transpose_4 (Conv2DTrans (None, 101, 101, 16) 4624        activation_41[0][0]              
__________________________________________________________________________________________________
concatenate_4 (Concatenate)     (None, 101, 101, 32) 0           conv2d_transpose_4[0][0]         
                                                                 activation_6[0][0]               
__________________________________________________________________________________________________
dropout_8 (Dropout)             (None, 101, 101, 32) 0           concatenate_4[0][0]              
__________________________________________________________________________________________________
conv2d_43 (Conv2D)              (None, 101, 101, 16) 4624        dropout_8[0][0]                  
__________

In [23]:
%%time
early_stopping = EarlyStopping(monitor='val_my_iou_metric', mode = 'max',patience=15, verbose=1)
model_checkpoint = ModelCheckpoint('model1.model', monitor='my_iou_metric', mode='max',
                                   save_best_only=True, verbose=1)

reduce_lr = ReduceLROnPlateau(monitor='val_my_iou_metric', mode='max', factor=0.5, patience=5,
                              min_lr=0.0001, verbose=1)

epochs = 100
batch_size = 8

history = model1.fit(X_train, y_train,
                     validation_data = [X_valid, y_valid], 
                     epochs=epochs, 
                     batch_size=batch_size, 
                     callbacks=[early_stopping, model_checkpoint, reduce_lr],
                     verbose=2)

Train on 6400 samples, validate on 800 samples
Epoch 1/100
 - 156s - loss: 0.4260 - my_iou_metric: 0.3894 - val_loss: 0.6087 - val_my_iou_metric: 0.4105

Epoch 00001: my_iou_metric improved from -inf to 0.38944, saving model to model1.model
Epoch 2/100


KeyboardInterrupt: 

In [None]:
model1 = load_model('model1.model', 
                    custom_objects={'my_iou_metric':my_iou_metric})
# remove activation layer and use lovasz loss
input_x = model1.layers[0].input

output_layer = model1.layers[-1].input
model = Model(input_x, output_layer)
c = optimizers.adam(lr=0.01)

model.compile(loss=lovasz_loss, optimizer=c, metrics=[my_iou_metric_2])

model.summary()

In [None]:
%%time
early_stopping = EarlyStopping(monitor='val_my_iou_metric_2', mode = 'max',patience=30, verbose=1)
model_checkpoint = ModelCheckpoint(save_model_name,monitor='val_my_iou_metric_2', 
                                   mode='max', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_my_iou_metric_2', mode='max', factor=0.5, patience=5, 
                              min_lr=0.00005, verbose=1)
epochs = 100
batch_size = 32

history = model.fit(X_train, y_train,
                    validation_data=[X_valid, y_valid], 
                    epochs=epochs,
                    batch_size=batch_size,
                    callbacks=[model_checkpoint, reduce_lr, early_stopping], 
                    verbose=2)

In [None]:
plt.subplots(1, 2, 1)
plot(history.history['loss'], label='Train')
plot(history.history['val_loss'], label='Validation')
plt.legend()

plt.subplots(1, 2, 2)
plot(history.history['my_iou_metric'], label='Train')
plot(history.history['val_my_iou_metric'], label='Validation')
plt.legend()

In [None]:
model = load_model(save_model_name,custom_objects={'my_iou_metric_2': my_iou_metric_2,
                                                   'lovasz_loss': lovasz_loss})

In [None]:
preds_valid = predict_result(model, X_valid, img_size_target)

In [None]:
## Scoring for last model, choose threshold by validation data 
thresholds_ori = np.linspace(0.3, 0.7, 31)
# Reverse sigmoid function: Use code below because the  sigmoid activation was removed
thresholds = np.log(thresholds_ori/(1-thresholds_ori)) 

# ious = np.array([get_iou_vector(y_valid, preds_valid > threshold) for threshold in tqdm_notebook(thresholds)])
# print(ious)
ious = np.array([iou_metric_batch(y_valid, preds_valid > threshold) for threshold in tqdm_notebook(thresholds)])
print(ious)

In [None]:
threshold_best_index = np.argmax(ious)
iou_best = ious[threshold_best_index]
threshold_best = thresholds[threshold_best_index]

plt.plot(thresholds, ious)
plt.plot(threshold_best, iou_best, 'xr', label='Best threshold')
plt.xlabel("Threshold")
plt.ylabel("IoU")
plt.title("Threshold vs IoU ({}, {})".format(threshold_best, iou_best))
plt.legend()

In [None]:
X_test = np.array([(np.array(load_img("datasets/test/images/{}.png".format(idx), 
                                      color_mode='grayscale'))) / 255 for idx in tqdm(test_df.index)]).reshape(-1, img_size_target, img_size_target, 1)

In [None]:
preds_test = predict_result(model, X_test, img_size_target)

In [None]:
pred_dict = {idx: rle_encode(np.round(downsample(preds_test[i]) > threshold_best)) for i, idx in enumerate(tqdm(test_df.index.values))}

In [None]:
sub = pd.DataFrame.from_dict(pred_dict, orient='index')
sub.index.names = ['id']
sub.columns = ['rle_mask']
sub.to_csv(submission_file)