In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
from __future__ import division
import sys
import os
from os.path import join as pjoin
from glob import glob
sys.path.append('..')

In [2]:
import cv2
import csv
import numpy as np
from sklearn.model_selection import train_test_split
from utils.commands import unzip, make_data_dir, mkdir, call, count_file, KaggleCLI, execute_in

cli = KaggleCLI('dogs-vs-cats-redux-kernels-edition')

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Data preprocess

In [3]:
cli.download_data()

In [4]:
data_path = pjoin(os.getcwd(), 'data')
model_path = pjoin(os.getcwd(), 'models')
sample_path = pjoin(data_path, 'sample')
train_name = 'train'
valid_name = 'valid'
test_name = 'ttest'

In [7]:
unzip(os.path.join(data_path, 'test.zip'), data_path)
unzip(os.path.join(data_path, 'train.zip'), data_path)

data/test.zip data
data/train.zip data


0

In [8]:
for category in ['dogs', 'cats']:
    for folder in ['train', 'test', 'valid']:
        mkdir(os.path.join(data_path, folder, category))
        mkdir(os.path.join(sample_path, folder, category))

In [5]:
cwd = os.getcwd()
os.chdir('data/train/')

call("find . -name 'cat.*' | xargs -J ^ mv ^ cats")
call("find . -name 'dog.*' | xargs -J ^ mv ^ dogs")

os.chdir(cwd)

/Users/dorian/WorkSpace/kaggle_fun/dogs_vs_cats_redux_kernels_edition/data/train


In [5]:
cwd = os.getcwd()
os.chdir('data/')

train_cats, valid_cats, train_dogs, valid_dogs = train_test_split(os.listdir('train/cats'), os.listdir('train/dogs'), test_size=0.2)
train_cats, test_cats, train_dogs, test_dogs = train_test_split(train_cats, train_dogs, test_size=0.1)

# training data
for d in valid_dogs:
    call("mv train/dogs/{} valid/dogs".format(d))
for c in valid_cats:
    call("mv train/cats/{} valid/cats".format(c))

for d in test_dogs:
    call("mv train/dogs/{} test/dogs".format(d))
for c in test_cats:
    call("mv train/cats/{} test/cats".format(c))


# sample data
for d in train_dogs[:20]:
    call("cp train/dogs/{} sample/train/dogs".format(d))
for c in train_cats[:20]:
    call("cp train/cats/{} sample/train/cats".format(c))

for d in valid_dogs[:5]:
    call("cp train/dogs/{} sample/valid/dogs".format(d))
for c in valid_cats[:5]:
    call("cp train/cats/{} sample/valid/cats".format(c))

for d in test_dogs[:5]:
    call("cp train/dogs/{} sample/test/dogs".format(d))
for c in test_cats[:5]:
    call("cp train/cats/{} sample/test/cats".format(c))


os.chdir(cwd)

/Users/dorian/WorkSpace/kaggle_fun/dogs_vs_cats_redux_kernels_edition/data
/Users/dorian/WorkSpace/kaggle_fun/dogs_vs_cats_redux_kernels_edition


## Fine tune VGG

In [4]:
from utils.pretrained_models import VGG16
from keras.preprocessing.image import ImageDataGenerator

In [5]:
vgg_model = VGG16.get_model(2).model

In [7]:
train_datagen = ImageDataGenerator()
valid_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()
train_flow = train_datagen.flow_from_directory(
        os.path.join(data_path, 'train'),
        target_size=(224, 224),
        batch_size=5,
        class_mode='categorical')
valid_flow = valid_datagen.flow_from_directory(
        os.path.join(data_path, 'valid'),
        target_size=(224, 224),
        batch_size=5,
        class_mode='categorical')
test_flow = test_datagen.flow_from_directory(
        os.path.join(data_path, 'test'),
        target_size=(224, 224),
        batch_size=5,
        class_mode='categorical',
        shuffle=False)

Found 40 images belonging to 2 classes.
Found 10 images belonging to 2 classes.
Found 10 images belonging to 2 classes.


In [11]:
for l in vgg_model.layers[:-1]:
    l.trainable = False

In [12]:
vgg_model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
vgg_model.fit_generator(
        train_flow,
        steps_per_epoch=20,
        epochs=1,
        validation_data=valid_flow,
        validation_steps=20)

Epoch 1/1


<keras.callbacks.History at 0x1063eee50>

In [13]:
vgg_model.evaluate_generator(test_flow)

[0.03848399128764868, 1.0]

In [17]:
vgg_model.predict_generator(test_flow)

array([[9.9922144e-01, 7.7851169e-04],
       [5.2542412e-03, 9.9474573e-01],
       [9.4702196e-01, 5.2978039e-02],
       [9.9267840e-01, 7.3215407e-03],
       [1.6351958e-04, 9.9983644e-01],
       [9.2523640e-01, 7.4763611e-02],
       [2.1216381e-01, 7.8783619e-01],
       [1.9384778e-04, 9.9980623e-01],
       [9.9969661e-01, 3.0338240e-04],
       [1.7961633e-04, 9.9982041e-01]], dtype=float32)