In [2]:
import math
import os
import datetime

import numpy as np
import pandas as pd

from keras.preprocessing import image
from keras.layers import Input, Lambda, Dense, Dropout, Flatten
from keras.models import Model, Sequential

from keras.utils import to_categorical
from keras.optimizers import RMSprop, Adam

from keras.applications import xception
from keras.applications import inception_v3

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, accuracy_score

from secrets import KAGGLE_USER, KAGGLE_PW

Using TensorFlow backend.


In [3]:
competition_name = 'dog-breed-identification'
data_dir = '/opt/notebooks/data/' + competition_name + '/preprocessed'

gen = image.ImageDataGenerator()

batch_size = 16
target_size=(299, 299)

def add_preprocess(base_model, preprocess_func, inputs_shape=(299, 299, 3)):
    inputs = Input(shape=inputs_shape)
    x = Lambda(preprocess_func)(inputs)
    outputs = base_model(x)
    model = Model(inputs, outputs)
    return model

In [4]:
batches = gen.flow_from_directory(data_dir+'/train', shuffle=False, target_size=target_size, batch_size=batch_size)
batches_val = gen.flow_from_directory(data_dir+'/valid', shuffle=False, target_size=target_size, batch_size=batch_size)

nb_batches = math.ceil(batches.n/batch_size)
nb_batches_val = math.ceil(batches_val.n/batch_size)

y_encode = batches.classes
y_val_encode = batches_val.classes

y = to_categorical(batches.classes)
y_val = to_categorical(batches_val.classes)

Found 8222 images belonging to 120 classes.
Found 2000 images belonging to 120 classes.


In [5]:
base_model = xception.Xception(weights='imagenet', include_top=False, pooling='avg')

model_x = add_preprocess(base_model, xception.preprocess_input)

# bf_x=model_x.predict_generator(batches, steps=nb_batches, verbose=1)
# np.save(data_dir+'/results/bf_x', bf_x)
bf_x = np.load(data_dir+'/results/bf_x.npy')
# bf_val_x=model_x.predict_generator(batches_val, steps=nb_batches_val, verbose=1)
# np.save(data_dir+'/results/bf_val_x', bf_val_x)
bf_val_x = np.load(data_dir+'/results/bf_val_x.npy')

In [7]:
logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs')
logreg.fit(bf_x, y_encode)
valid_probs = logreg.predict_proba(bf_val_x)
valid_preds = logreg.predict(bf_val_x)
print('logloss:', log_loss(y_val_encode, valid_probs))
print('accuracy:', accuracy_score(y_val_encode, valid_preds))

logloss: 0.325739933423
accuracy: 0.904


In [9]:
lm = Sequential([Dense(120, activation='softmax', input_shape=(2048,))])
lm.compile(optimizer=RMSprop(), loss='categorical_crossentropy', metrics=['accuracy'])
lm.fit(bf_x, y, epochs=15, batch_size=nb_batches, validation_data=(bf_val_x, y_val))

Train on 8222 samples, validate on 2000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f1e5d3d5ef0>

In [15]:
lm = Sequential([Dense(120, activation='softmax', input_shape=(2048,))])
lm.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
lm.fit(bf_x, y, epochs=50, batch_size=nb_batches, validation_data=(bf_val_x, y_val))

Train on 8222 samples, validate on 2000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f1e50099630>

In [17]:
lm = Sequential([Dense(120, activation='softmax', input_shape=(2048,))])
lm.compile(optimizer=Adam(lr=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])
lm.fit(bf_x, y, epochs=50, batch_size=nb_batches, validation_data=(bf_val_x, y_val))
lm.compile(optimizer=Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
lm.fit(bf_x, y, epochs=5, batch_size=nb_batches, validation_data=(bf_val_x, y_val))

Train on 8222 samples, validate on 2000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Train on 8222 samples, validate on 2000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f1e491a8b38>