# U-net with FC layer for classification

In [1]:
# https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [2]:
import os
import datetime
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [3]:
# Project
import sys
sys.path.append(os.path.join(os.path.abspath(os.path.dirname('.')), '..', 'common'))
from data_utils import type_1_ids, type_2_ids, type_3_ids, test_ids
from training_utils import get_trainval_id_type_lists, get_test_id_type_list, data_iterator
from metrics import logloss_mc

In [4]:
print("\n=========================")
print("Training dataset: ")
print("- type 1: ", len(type_1_ids))
print("- type 2: ", len(type_2_ids))
print("- type 3: ", len(type_3_ids))

print("Test dataset: ")
print("- ", len(test_ids))
print("=========================\n")



Training dataset: 
('- type 1: ', 250)
('- type 2: ', 781)
('- type 3: ', 450)
Test dataset: 
('- ', 512)



In [5]:
def train(model, train_id_type_list, val_id_type_list, batch_size=16, nb_epochs=10, image_size=(224, 224)):
    samples_per_epoch = 512
    nb_val_samples = 128

    if not os.path.exists('weights'):
        os.mkdir('weights')

    weights_filename = os.path.join("weights", "unet_simple_{epoch:02d}-{val_loss:.4f}.h5")
    model_checkpoint = ModelCheckpoint(weights_filename, monitor='loss', save_best_only=True)

    print("Training parameters: ", batch_size, nb_epochs, samples_per_epoch, nb_val_samples)
    
    train_iter = data_iterator(train_id_type_list, batch_size=batch_size, image_size=image_size, 
verbose=0)
    val_iter = data_iterator(val_id_type_list, batch_size=batch_size, image_size=image_size, verbose=0)
    
    history = model.fit_generator(
        train_iter,
        samples_per_epoch=samples_per_epoch,
        nb_epoch=nb_epochs,
        validation_data=val_iter,
        nb_val_samples=nb_val_samples,
        callbacks=[model_checkpoint],
        verbose=1,
    )

    return history


def validate(model, val_id_type_list, batch_size=16, image_size=(224, 224)):
    val_iter = data_iterator(val_id_type_list, batch_size=batch_size, image_size=image_size, test_mode=True)

    total_loss = 0.0
    total_counter = 0 
    for X, Y_true, _ in val_iter:           
        s = Y_true.shape[0]
        total_counter += s
        Y_pred = model.predict(X)
        loss = logloss_mc(Y_true, Y_pred)
        total_loss += s * loss
        print("--", total_counter, "batch loss : ", loss)

    if total_counter == 0:
        total_counter += 1

    total_loss *= 1.0 / total_counter   
    print("Total loss : ", total_loss)
    
    
def predict(model, batch_size=16, image_size=(224, 224), info=''):

    test_id_type_list = get_test_id_type_list()
    test_iter = data_iterator(test_id_type_list, batch_size=batch_size, image_size=image_size, test_mode=True)
    
    df = pd.DataFrame(columns=['image_name','Type_1','Type_2','Type_3'])
    total_counter = 0
    for X, _, image_ids in test_iter:            
        Y_pred = model.predict(X)    
        s = X.shape[0]
        total_counter += s
        print("--", total_counter)
        for i in range(s):
            df.loc[total_counter + i, :] = (image_ids[i] + '.jpg', ) + tuple(Y_pred[i, :])

    now = datetime.datetime.now()
    sub_file = 'submission_' + info + '_' + str(now.strftime("%Y-%m-%d-%H-%M")) + '.csv'
    df.to_csv(sub_file, index=False)

In [6]:
from keras import __version__
from unet_keras122 import get_unet
from keras.callbacks import ModelCheckpoint

print("Keras version: ", __version__)

Using Theano backend.


Couldn't import dot_parser, loading of dot files will not be possible.


 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GT 750M (CNMeM is enabled with initial size: 50.0% of memory, cuDNN 5103)


('Keras version: ', '1.2.2')


In [7]:
print("\n {} - Get train/val lists ...".format(datetime.datetime.now()))
train_id_type_list, val_id_type_list = get_trainval_id_type_lists()


 2017-03-19 16:25:04.096696 - Get train/val lists ...
Train dataset contains : 
('-', [175, 546, 315], ' images of corresponding types')
Validation dataset contains : 
('-', [75, 234, 135], ' images of corresponding types')


In [9]:
print("\n {} - Get U-Net model ...".format(datetime.datetime.now()))
unet = get_unet()
unet.summary()


 2017-03-19 16:25:04.133587 - Get U-Net model ...
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 3, 224, 224)   0                                            
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 32, 224, 224)  896         input_1[0][0]                    
____________________________________________________________________________________________________
batchnormalization_1 (BatchNorma (None, 32, 224, 224)  128         convolution2d_1[0][0]            
____________________________________________________________________________________________________
activation_1 (Activation)        (None, 32, 224, 224)  0           batchnormalization_1[0][0]       
________________________________________

In [10]:
nb_epochs = 20
batch_size = 4
print("\n {} - Start training ...".format(datetime.datetime.now()))
train(unet, train_id_type_list, val_id_type_list, nb_epochs=nb_epochs, batch_size=batch_size)


 2017-03-19 16:25:07.367406 - Start training ...
('Training parameters: ', 4, 20, 512, 128)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20



Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
 24/512 [>.............................] - ETA: 77s - loss: 3.7167 - acc: 0.3750('Image is corrupted. Id/Type:', '1339', 'Type_1')
Epoch 15/20
 40/512 [=>............................] - ETA: 79s - loss: 3.7035 - acc: 0.2500('Image is corrupted. Id/Type:', '1339', 'Type_1')
Epoch 16/20
 52/512 [==>...........................] - ETA: 75s - loss: 4.4012 - acc: 0.2692('Image is corrupted. Id/Type:', '1339', 'Type_1')
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x148639c90>

In [None]:
print("\n {} - Start validation ...".format(datetime.datetime.now()))
validate(unet, val_id_type_list, batch_size=batch_size)

In [None]:
print("\n {} - Start predictions and write submission ...".format(datetime.datetime.now()))
predict(unet, info='unet_no_additional', batch_size=batch_size)