In [1]:
import math
import os
import datetime

import numpy as np
import pandas as pd

from keras.preprocessing import image
from keras.layers import Input, Lambda, Dense, Dropout, Flatten
from keras.models import Model, Sequential

from keras.utils import to_categorical
from keras.optimizers import RMSprop

from keras.applications import xception

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, accuracy_score

from secrets import KAGGLE_USER, KAGGLE_PW

Using TensorFlow backend.


In [2]:
competition_name = 'dog-breed-identification'
data_dir = '/opt/notebooks/data/' + competition_name + '/preprocessed'

gen = image.ImageDataGenerator()
batch_size = 16
target_size=(299, 299)

def add_preprocess(base_model, preprocess_func, inputs_shape=(299, 299, 3)):
    inputs = Input(shape=inputs_shape)
    x = Lambda(preprocess_func)(inputs)
    outputs = base_model(x)
    model = Model(inputs, outputs)
    return model

### train

In [3]:
batches = gen.flow_from_directory(data_dir+'/train', target_size=target_size, batch_size=batch_size)
batches_val = gen.flow_from_directory(data_dir+'/valid', shuffle=False, target_size=target_size, batch_size=batch_size)

nb_batches = math.ceil(batches.n/batch_size)
nb_batches_val = math.ceil(batches_val.n/batch_size)

y_encode = batches.classes
y_val_encode = batches_val.classes

y = to_categorical(batches.classes)
y_val = to_categorical(batches_val.classes)


Found 8222 images belonging to 120 classes.
Found 2000 images belonging to 120 classes.


In [4]:
base_model = xception.Xception(weights='imagenet', include_top=False, pooling='avg')
inputs = Input(shape=(299, 299, 3))
x = Lambda(xception.preprocess_input)(inputs)
x = base_model(x)
outputs = Dense(120, activation='softmax', name='predictions')(x)
model_ft = Model(inputs, outputs)
for layer in base_model.layers:
    layer.trainable = False

In [5]:
model_ft.compile(optimizer=RMSprop(), loss='categorical_crossentropy', metrics=['accuracy'])

In [6]:
model_ft.fit_generator(batches, 
                    steps_per_epoch=nb_batches, 
                    epochs=10,
                    validation_data=batches_val,
                    validation_steps=nb_batches_val
                   )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f16e2b60908>

### predict

In [7]:
batches_test = gen.flow_from_directory(data_dir+'/test', shuffle=False, target_size=target_size, batch_size=batch_size)

Found 10357 images belonging to 1 classes.


In [8]:
nb_batches_test = math.ceil(batches_test.n/batch_size)

In [9]:
pred = model_ft.predict_generator(batches_test, steps=nb_batches_test, verbose=1)



In [10]:
test_ids = [f.split('/')[1].split('.')[0] for f in batches_test.filenames]

In [11]:
subm=pd.DataFrame(np.hstack([np.array(test_ids).reshape(-1, 1), pred]))
labels = pd.read_csv(data_dir+'/labels.csv')
cols = ['id']+sorted(labels.breed.unique())
subm.columns = cols

In [12]:
description = 'xception_data_finetune_simple'
submission_file_name = data_dir+'/results/%s_%s.csv' % (description,
                                                        datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')
                                                       )
subm.to_csv(submission_file_name, index=False)

### submit

In [13]:
!kg config -u $KAGGLE_USER -p $KAGGLE_PW -c $competition_name

In [14]:
!kg submit $submission_file_name -m $description

0.53155
