In [None]:
%matplotlib inline

import numpy as np
import random
import os
import time

from IPython.display import FileLink

from matplotlib import pyplot as plt

import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=FutureWarning)
    import h5py

import sys
from PIL import Image
sys.modules['Image'] = Image 
    
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.models import Sequential
from keras.optimizers import Adam, RMSprop
from keras.preprocessing import image
from keras.utils.data_utils import get_file

# In case we are going to use the TensorFlow backend we need to explicitly set the Theano image ordering
from keras import backend as K
K.set_image_dim_ordering('th')

In [None]:
vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((3, 1, 1))

# Subtract pre-calculated mean of imagenet dataset and transpose RGB to BGR
def vgg_preprocess(x):
    x = x - vgg_mean
    return x[:, ::-1] # reverse axis rgb->bgr

def add_convolutional_layers(model):
    blocks = [
        (2, 64),
        (2, 128),
        (3, 256),
        (3, 512),
        (3, 512)]
    for block in blocks:
        layers = block[0]
        filters = block[1]
        for i in range(layers):
            model.add(ZeroPadding2D((1,1)))
            model.add(Convolution2D(filters, (3, 3), activation='relu'))
        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
def add_fully_connected_layers(model):
    for i in range(2):
        model.add(Dense(4096, activation='relu'))
        model.add(Dropout(0.5))

def build_model():
    image_shape = (3, 224, 224)
    model = Sequential()
    
    # Pre-processing layer
    model.add(Lambda(vgg_preprocess, input_shape=image_shape, output_shape=image_shape))
    
    # Create convolutional layers
    add_convolutional_layers(model)
    model.add(Flatten())
        
    # Create fully-connected layers
    add_fully_connected_layers(model)
        
    # Output layer
    model.add(Dense(1000, activation='softmax'))
    return model

def finetune_model(model, num_categories):
    model.pop()
    for layer in model.layers:
        layer.trainable = False
    model.add(Dense(num_categories, activation='softmax'))
    
def get_batches(path, 
                batch_size,
                gen=image.ImageDataGenerator(), 
                target_size=(224, 224), 
                shuffle=True, 
                class_mode='categorical'):
    return gen.flow_from_directory(path,
                                   batch_size=batch_size,
                                   target_size=target_size, 
                                   shuffle=shuffle, 
                                   class_mode=class_mode)
    
def plots(ims, figsize=(12,6), rows=1, interp=False, titles=None):
    if type(ims[0]) is np.ndarray:
        ims = np.array(ims).astype(np.uint8)
        if (ims.shape[-1] != 3):
            ims = ims.transpose((0,2,3,1))
    f = plt.figure(figsize=figsize)
    for i in range(len(ims)):
        sp = f.add_subplot(rows, len(ims)//rows, i+1)
        sp.axis('Off')
        if titles is not None:
            sp.set_title(titles[i], fontsize=16)
        plt.imshow(ims[i], interpolation=None if interp else 'none')

In [None]:
path = "data/dogscats-redux/full/"
batch_size = 64
trn_batches = get_batches(path + 'train', batch_size)
val_batches = get_batches(path + 'valid', batch_size, shuffle=True)

In [None]:
model_local_path = 'vgg16.h5'
model_url = 'http://files.fast.ai/models/vgg16.h5'
model = build_model()
model.load_weights(get_file(model_local_path, model_url, cache_subdir='models'))
finetune_model(model, trn_batches.num_classes)

model.compile(optimizer=Adam(lr=0.01),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

In [None]:
trn_steps = int(np.ceil(trn_batches.samples / batch_size))
val_steps = int(np.ceil(val_batches.samples / batch_size))

print("Training steps:", trn_steps)
print("Validation steps:", val_steps)

In [None]:
num_epochs = 3
results_dir = os.path.join(path, "results")
%mkdir -p {results_dir}
for epoch in range(num_epochs):
    model.fit_generator(trn_batches, trn_steps, 1, validation_data=val_batches, validation_steps=val_steps)
    model.save_weights(os.path.join(results_dir, ("model.%d" % epoch) + ".h5"))

In [None]:
imgs, labels = next(trn_batches)
plots(imgs, titles=labels)

In [None]:
tst_batch_size = 4
tst_batches = get_batches(path + 'test', tst_batch_size, shuffle=False)

In [None]:
pred = model.predict_generator(tst_batches, int(np.ceil(tst_batches.samples / tst_batch_size)), verbose=1)

In [None]:
tst_indices = random.sample(range(len(pred)), 4)
tst_filenames = [os.path.join(path, 'test', tst_batches.filenames[i]) for i in tst_indices]

In [None]:
tst_images = [image.load_img(filename) for filename in tst_filenames]
pred[tst_indices]
is_dog = pred[:,1]
plots(tst_images, titles=is_dog[tst_indices])

In [None]:
timestamp = int(time.time())
submission_path = os.path.join(path, 'results', 'submission.{0}.csv'.format(timestamp))
is_dog_clipped = is_dog.clip(min=0.05, max=0.95)
ids = np.array([int(f[11:f.find('.')]) for f in tst_batches.filenames])
submission = np.stack([ids, is_dog_clipped], axis=1)
np.savetxt(submission_path, submission, fmt='%d,%.5f', header='id,label', comments='')
FileLink(submission_path)