In [86]:
import numpy as np
import pandas as pd
import math
import datetime

from keras.preprocessing import image
from keras.utils import to_categorical
from keras.models import Model, Sequential
from keras.layers import Input, Lambda, Dense
from keras.applications import imagenet_utils
from keras.applications.xception import Xception
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.mobilenet import MobileNet

from secrets import KAGGLE_USER, KAGGLE_PW

In [59]:
competition_name = 'dog-breed-identification'
data_dir = '/opt/notebooks/data/' + competition_name + '/preprocessed'
batch_size = 16
nb_classes = 120

In [51]:
def preprocess_input_xception(x):
    return imagenet_utils.preprocess_input(x, mode='tf')

def preprocess_input_vgg(x):
    vgg_mean = np.array([103.939, 116.779, 123.68], dtype=np.float32).reshape((1,1,3))
    x = x[..., ::-1]
    # Zero-center by mean pixel
    x = x - vgg_mean
    return x

def add_preprocess(base_model, preprocess_func, inputs_shape):
    inputs = Input(shape=inputs_shape)
    x = Lambda(preprocess_func)(inputs)
    outputs = base_model(x)
    model = Model(inputs, outputs)
    return model

def get_batch_data(data_dir, target_size):
    
    def get_batches(dir_, target_size=target_size):
        gen=image.ImageDataGenerator()
        return gen.flow_from_directory('%s/%s'% (data_dir, dir_), 
                                       shuffle=False, 
                                       target_size=target_size,
                                       batch_size=batch_size)
    batches     = get_batches('train')
    batches_val = get_batches('valid')
    batches_test = get_batches('test')
    return batches, batches_val, batches_test

def get_batch_nb(batches):
    return math.ceil(batches.n/batch_size)

In [52]:
model_params = {'weights': 'imagenet', 'include_top': False, 'pooling': 'avg'}

In [53]:
base_models = [
    {'name': 'Xception', 'mdl': Xception, 'input_shape': (299, 299, 3), 'prep': preprocess_input_xception},
    {'name': 'VGG16', 'mdl': VGG16, 'input_shape': (224, 224, 3), 'prep': preprocess_input_vgg},
    {'name': 'VGG19', 'mdl': VGG19, 'input_shape': (224, 224, 3), 'prep': preprocess_input_vgg},
    {'name': 'InceptionV3', 'mdl': InceptionV3, 'input_shape': (299, 299, 3), 'prep': preprocess_input_xception},
    {'name': 'ResNet50', 'mdl': ResNet50, 'input_shape': (224, 224, 3), 'prep': preprocess_input_vgg},
    {'name': 'InceptionResNetV2', 'mdl': InceptionResNetV2, 'input_shape': (299, 299, 3), 'prep': preprocess_input_xception},
#     {'mdl': MobileNet, 'input_shape': (299, 299, 3), 'prep': preprocess_input_vgg},
]

### train

In [80]:
preds = []

for base_model in base_models:
    print(base_model['name'])
    base_model_preprocessed = add_preprocess(
        base_model=base_model['mdl'](**model_params), 
        preprocess_func=base_model['prep'], 
        inputs_shape=base_model['input_shape'],
    )

    batches, batches_val, batches_test = \
        get_batch_data(data_dir, 
                       target_size=base_model['input_shape'][:2],)
    nb_batches = get_batch_nb(batches)
    nb_batches_val = get_batch_nb(batches_val) 
    nb_batches_test = get_batch_nb(batches_test)
    
    # get bottleneck features

#     base_model_output = base_model_preprocessed.predict_generator(batches, 
#                                                                   steps=nb_batches, 
#                                                                   verbose=1)
#     np.save(data_dir+'/results/base_model_output_{}'.format(base_model['name']), 
#             base_model_output)
    base_model_output = np.load(data_dir+'/results/base_model_output_{}.npy'.format(base_model['name']))
    
#     base_model_output_val = base_model_preprocessed.predict_generator(batches_val, 
#                                                                       steps=nb_batches_val, 
#                                                                       verbose=1)
#     np.save(data_dir+'/results/base_model_output_val_{}'.format(base_model['name']), 
#             base_model_output_val)
    base_model_output_val = np.load(data_dir+'/results/base_model_output_val_{}.npy'.format(base_model['name']))

#     base_model_output_test = base_model_preprocessed.predict_generator(batches_test, 
#                                                                       steps=nb_batches_test, 
#                                                                       verbose=1)
#     np.save(data_dir+'/results/base_model_output_test_{}'.format(base_model['name']), 
#             base_model_output_test)
    base_model_output_test = np.load(data_dir+'/results/base_model_output_test_{}.npy'.format(base_model['name']))

    # linear model
    lm = Sequential(
        [Dense(nb_classes, 
               activation='softmax', 
               input_shape=(base_model_output.shape[1],)
              )]
    )
    lm.compile(optimizer='rmsprop',
               loss='categorical_crossentropy', 
               metrics=['accuracy'])

    y = to_categorical(batches.classes)
    y_val = to_categorical(batches_val.classes)
    lm.fit(base_model_output,
           y, 
           epochs=15,
           batch_size=nb_batches,
           validation_data=(base_model_output_val, y_val))

    pred = lm.predict(base_model_output_test, batch_size=batch_size, verbose=1)

    preds.append(pred)

pred_ensemble = np.stack(preds).mean(axis=0)

Xception
Found 8222 images belonging to 120 classes.
Found 2000 images belonging to 120 classes.
Found 10357 images belonging to 1 classes.
Train on 8222 samples, validate on 2000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
VGG16
Found 8222 images belonging to 120 classes.
Found 2000 images belonging to 120 classes.
Found 10357 images belonging to 1 classes.
Train on 8222 samples, validate on 2000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
VGG19
Found 8222 images belonging to 120 classes.
Found 2000 images belonging to 120 classes.
Found 10357 images belonging to 1 classes.
Train on 8222 samples, validate on 2000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
E

Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
ResNet50
Found 8222 images belonging to 120 classes.
Found 2000 images belonging to 120 classes.
Found 10357 images belonging to 1 classes.
Train on 8222 samples, validate on 2000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
InceptionResNetV2
Found 8222 images belonging to 120 classes.
Found 2000 images belonging to 120 classes.
Found 10357 images belonging to 1 classes.
Train on 8222 samples, validate on 2000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [93]:
pred_ensemble = np.stack(np.array(preds)[[0,3,5]]).mean(axis=0)

### predict

In [81]:
test_ids = [f.split('/')[1].split('.')[0] for f in batches_test.filenames]

In [94]:
subm=pd.DataFrame(np.hstack([np.array(test_ids).reshape(-1, 1), pred_ensemble]))
labels = pd.read_csv(data_dir+'/labels.csv')
cols = ['id']+sorted(labels.breed.unique())
subm.columns = cols

In [95]:
description = 'xception_inception_inception_resnet_average'
submission_file_name = data_dir+'/results/%s_%s.csv' % (description,
                                                        datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')
                                                       )
subm.to_csv(submission_file_name, index=False)

### submit

In [96]:
!kg config -u $KAGGLE_USER -p $KAGGLE_PW -c $competition_name

In [None]:
!kg submit $submission_file_name -m $description