# Trained ResNet-50 classification

In [1]:
# https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [2]:
import os
import datetime
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [3]:
# Project
import sys
sys.path.append(os.path.join(os.path.abspath(os.path.dirname('.')), '..', 'common'))
from data_utils import type_1_ids, type_2_ids, type_3_ids, test_ids
from training_utils import get_trainval_id_type_lists, get_test_id_type_list, data_iterator, data_augmentation
from metrics import logloss_mc

Using Theano backend.


Couldn't import dot_parser, loading of dot files will not be possible.


 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GT 750M (CNMeM is enabled with initial size: 50.0% of memory, cuDNN 5103)


In [4]:
print("\n=========================")
print("Training dataset: ")
print("- type 1: ", len(type_1_ids))
print("- type 2: ", len(type_2_ids))
print("- type 3: ", len(type_3_ids))

print("Test dataset: ")
print("- ", len(test_ids))
print("=========================\n")



Training dataset: 
('- type 1: ', 250)
('- type 2: ', 781)
('- type 3: ', 450)
Test dataset: 
('- ', 512)



In [23]:
def train(model, train_id_type_list, val_id_type_list, batch_size=16, nb_epochs=10, image_size=(224, 224)):
    
    samples_per_epoch = (2048 // batch_size) * batch_size
    nb_val_samples = (1024 // batch_size) * batch_size

    if not os.path.exists('weights'):
        os.mkdir('weights')

    weights_filename = os.path.join("weights", "resnet_simple_{epoch:02d}-{val_loss:.4f}.h5")
    model_checkpoint = ModelCheckpoint(weights_filename, monitor='loss', save_best_only=True)

    print("Training parameters: ", batch_size, nb_epochs, samples_per_epoch, nb_val_samples)
    
    train_iter = data_iterator(train_id_type_list, 
                               batch_size=batch_size, 
                               image_size=image_size, 
                               data_augmentation_fn=lambda X, Y: data_augmentation(X, Y),
                               verbose=0)
    val_iter = data_iterator(val_id_type_list, batch_size=batch_size, image_size=image_size, verbose=0)
    
    history = model.fit_generator(
        train_iter,
        samples_per_epoch=samples_per_epoch,
        nb_epoch=nb_epochs,
        validation_data=val_iter,
        nb_val_samples=nb_val_samples,
        callbacks=[model_checkpoint],
        verbose=1,
    )

    return history


def validate(model, val_id_type_list, batch_size=16, image_size=(224, 224)):
    val_iter = data_iterator(val_id_type_list, batch_size=batch_size, image_size=image_size, test_mode=True)

    total_loss = 0.0
    total_counter = 0 
    for X, Y_true, _ in val_iter:           
        s = Y_true.shape[0]
        total_counter += s
        Y_pred = model.predict(X)
        loss = logloss_mc(Y_true, Y_pred)
        total_loss += s * loss
        print("--", total_counter, "batch loss : ", loss)

    if total_counter == 0:
        total_counter += 1

    total_loss *= 1.0 / total_counter   
    print("Total loss : ", total_loss)
    
    
def predict(model, batch_size=16, image_size=(224, 224), info=''):

    test_id_type_list = get_test_id_type_list()
    test_iter = data_iterator(test_id_type_list, batch_size=batch_size, image_size=image_size, test_mode=True)
    
    df = pd.DataFrame(columns=['image_name','Type_1','Type_2','Type_3'])
    total_counter = 0
    for X, _, image_ids in test_iter:            
        Y_pred = model.predict(X)    
        s = X.shape[0]
        print("--", total_counter)
        for i in range(s):
            df.loc[total_counter, :] = (image_ids[i] + '.jpg', ) + tuple(Y_pred[i, :])
            total_counter += 1

    now = datetime.datetime.now()
    sub_file = 'submission_' + info + '_' + str(now.strftime("%Y-%m-%d-%H-%M")) + '.csv'
    sub_file = os.path.join('..', 'results', sub_file)
    df.to_csv(sub_file, index=False)
    
    !cat {sub_file} | wc -l

In [6]:
from resnet_keras122 import get_resnet50
from keras.callbacks import ModelCheckpoint
from keras import __version__
print("Keras version: ", __version__)

('Keras version: ', '1.2.2')


In [7]:
print("\n {} - Get train/val lists ...".format(datetime.datetime.now()))
train_id_type_list, val_id_type_list = get_trainval_id_type_lists()


 2017-03-20 00:06:25.939047 - Get train/val lists ...
Train dataset contains : 
('-', [175, 546, 315], ' images of corresponding types')
Validation dataset contains : 
('-', [75, 234, 135], ' images of corresponding types')


In [8]:
print("\n {} - Get ResNet-50 model ...".format(datetime.datetime.now()))
resnet = get_resnet50()
resnet.summary()


 2017-03-20 00:06:25.959272 - Get ResNet-50 model ...
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 3, 224, 224)   0                                            
____________________________________________________________________________________________________
zeropadding2d_1 (ZeroPadding2D)  (None, 3, 230, 230)   0           input_1[0][0]                    
____________________________________________________________________________________________________
conv1 (Convolution2D)            (None, 64, 112, 112)  9472        zeropadding2d_1[0][0]            
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, 64, 112, 112)  256         conv1[0][0]                      
____________________________________

In [32]:
nb_epochs = 50
batch_size = 10

print("\n {} - Start training ...".format(datetime.datetime.now()))
history = train(resnet, train_id_type_list, val_id_type_list, nb_epochs=nb_epochs, batch_size=batch_size)


 2017-03-20 09:16:19.040692 - Start training ...
('Training parameters: ', 10, 50, 2040, 1020)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [42]:
# Choose the best saved weights
weights_files = !ls -1 weights/*.h5
best_val_loss = 1e5
best_weights_filename = ""
for f in weights_files:
    index = os.path.basename(f).index('-')
    loss = float(os.path.basename(f)[index+1:-4])
    if best_val_loss > loss:
        best_val_loss = loss
        best_weights_filename = f
print("Best val loss weights: ",best_weights_filename)

# load weights to the model
resnet.load_weights(best_weights_filename)

('Best val loss weights: ', 'weights/resnet_simple_13-0.9255.h5')


In [43]:
print("\n {} - Start validation ...".format(datetime.datetime.now()))
validate(resnet, val_id_type_list, batch_size=batch_size)


 2017-03-20 13:42:47.529796 - Start validation ...
('--', 10, 'batch loss : ', 1.7966474)
('--', 20, 'batch loss : ', 1.3152173)
('--', 30, 'batch loss : ', 1.613647)
('--', 40, 'batch loss : ', 1.4569806)
('--', 50, 'batch loss : ', 2.0192065)
('--', 60, 'batch loss : ', 1.6919658)
('--', 70, 'batch loss : ', 1.3893174)
('--', 80, 'batch loss : ', 1.8404121)
('--', 90, 'batch loss : ', 1.9715917)
('--', 100, 'batch loss : ', 1.0972403)
('--', 110, 'batch loss : ', 1.7654177)
('--', 120, 'batch loss : ', 1.8749675)
('--', 130, 'batch loss : ', 1.4216545)
('--', 140, 'batch loss : ', 1.9844157)
('--', 150, 'batch loss : ', 1.3714226)
('--', 160, 'batch loss : ', 1.6437589)
('--', 170, 'batch loss : ', 1.7335141)
('--', 180, 'batch loss : ', 1.3536484)
('--', 190, 'batch loss : ', 1.4434325)
('--', 200, 'batch loss : ', 1.3440229)
('--', 210, 'batch loss : ', 1.5808131)
('--', 220, 'batch loss : ', 1.3319602)
('--', 230, 'batch loss : ', 2.203378)
('--', 240, 'batch loss : ', 1.1053935)

In [44]:
print("\n {} - Start predictions and write submission ...".format(datetime.datetime.now()))
predict(resnet, info='resnet50_4d_no_additional', batch_size=8)


 2017-03-20 13:46:08.072525 - Start predictions and write submission ...
('--', 0)
('--', 8)
('--', 16)
('--', 24)
('--', 32)
('--', 40)
('--', 48)
('--', 56)
('--', 64)
('--', 72)
('--', 80)
('--', 88)
('--', 96)
('--', 104)
('--', 112)
('--', 120)
('--', 128)
('--', 136)
('--', 144)
('--', 152)
('--', 160)
('--', 168)
('--', 176)
('--', 184)
('--', 192)
('--', 200)
('--', 208)
('--', 216)
('--', 224)
('--', 232)
('--', 240)
('--', 248)
('--', 256)
('--', 264)
('--', 272)
('--', 280)
('--', 288)
('--', 296)
('--', 304)
('--', 312)
('--', 320)
('--', 328)
('--', 336)
('--', 344)
('--', 352)
('--', 360)
('--', 368)
('--', 376)
('--', 384)
('--', 392)
('--', 400)
('--', 408)
('--', 416)
('--', 424)
('--', 432)
('--', 440)
('--', 448)
('--', 456)
('--', 464)
('--', 472)
('--', 480)
('--', 488)
('--', 496)
('--', 504)
     513
