In [3]:
%matplotlib inline
%config IPCompleter.greedy=True
from time import strftime, gmtime

import numpy as np
import pandas as pd
import re
import os

from keras.layers.core import Lambda, Dense, Flatten, Dropout
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator, DirectoryIterator
from keras.optimizers import Adam, RMSprop

import matplotlib.pyplot as plt

from keras import backend as K
K.set_image_dim_ordering("th")

In [17]:
from keras.optimizers import RMSprop

vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((3,1,1))

def vgg_preprocess(x):
    x = x-vgg_mean

    return x[:, ::-1] # reverse axis rgb->bgr

def plot_img(img):
    img = img.astype(np.uint8)
    img = np.transpose(img, (1,2,0))
    plt.imshow(img)

def get_time_str():
    return strftime("%Y_%m_%d__%H_%M_%S", gmtime())

p=re.compile('\d+')

def extract_id(f_name):
    return p.search(f_name).group()

def get_list_of_images(d):
    b_sz = 64
    bb = load_generator(d, b_sz, False)

    res = []
    started = False

    counter = 0
    while True:
        print(counter)
        counter+=1

        y = bb.next()
        res+=list(y[0])
        if started and bb.batch_index==0:
            break
        started = True

    return np.array(res), bb.filenames

def create_submission(res, filenames):
    ids = [extract_id(s) for s in filenames]
    probs = res[:,1]
    z = zip(ids, probs)
    z.sort(key=lambda s: int(s[0]))
    df = pd.DataFrame({'id':[x[0] for x in z], 'label':[x[1] for x in z]})
    f_name = 'sub_{}.csv'.format(get_time_str())
    df.to_csv(f_name, index=False)


def build_vgg_model(weights_fp):
    model = Sequential()
    model.add(Lambda(vgg_preprocess, input_shape=(3, 224, 224), output_shape=(3, 224, 224)))

    number_of_filters = 64

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(number_of_filters, (3, 3), activation="relu"))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(number_of_filters, (3, 3), activation="relu"))

    model.add(MaxPooling2D((2,2), strides=(2,2)))

    number_of_filters = 128

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(number_of_filters, (3, 3), activation="relu"))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(number_of_filters, (3, 3), activation="relu"))

    model.add(MaxPooling2D((2,2), strides=(2,2)))

    number_of_filters = 256

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(number_of_filters, (3, 3), activation="relu"))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(number_of_filters, (3, 3), activation="relu"))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(number_of_filters, (3, 3), activation="relu"))

    model.add(MaxPooling2D((2,2), strides=(2,2)))

    number_of_filters = 512

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(number_of_filters, (3, 3), activation="relu"))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(number_of_filters, (3, 3), activation="relu"))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(number_of_filters, (3, 3), activation="relu"))

    model.add(MaxPooling2D((2,2), strides=(2,2)))

    number_of_filters = 512

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(number_of_filters, (3, 3), activation="relu"))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(number_of_filters, (3, 3), activation="relu"))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(number_of_filters, (3, 3), activation="relu"))

    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(Flatten())

    model.add(Dense(4096, activation="relu"))
    model.add(Dropout(0.5))

    model.add(Dense(4096, activation="relu"))
    model.add(Dropout(0.5))

    model.add(Dense(1000, activation="softmax"))

    model.load_weights(weights_fp)

    return model

def load_generator(fp, batch_size=64, shuffle=True):
    gen = ImageDataGenerator()
    return gen.flow_from_directory(fp, target_size=(224, 224), shuffle=shuffle, class_mode='categorical',
                                   batch_size=batch_size)

def enable_gen_debug(gen, name):
    gen.old_next = gen.next
    def new_next():
        print name
        return gen.old_next()

    gen.next = new_next

def finetune_and_compile_vgg(weights_fp):
    model = build_vgg_model(weights_fp)
    model.pop()
    for l in model.layers:
        l.trainable = False

    model.add(Dense(2, activation='softmax'))
    model.compile(RMSprop(lr=0.1), 'categorical_crossentropy', metrics=['accuracy'])

    return model

In [5]:
weights_fp = "/home/dd_petrovskiy/.keras/models/vgg16.h5"
base_path = '/home/dd_petrovskiy/courses/data/dogscats'
sample_fp= os.path.join(base_path, 'sample')
train_fp= os.path.join(base_path, 'train')
valid_fp = os.path.join(base_path, 'valid')
test_fp = os.path.join(base_path, 'test1')
# to_predict_fp = '/home/dpetrovskyi/fai/to_predict'

In [6]:
model = finetune_and_compile_vgg(weights_fp)

In [18]:
batch_size = 64
train_gen = load_generator(train_fp, batch_size)
valid_gen = load_generator(valid_fp, batch_size)
to_pred_gen = load_generator(test_fp, batch_size)
to_predict_arr, filenames = get_list_of_images(test_fp)

/home/dd_petrovskiy/courses/data/dogscats/train
Found 23000 images belonging to 2 classes.
/home/dd_petrovskiy/courses/data/dogscats/valid
Found 2000 images belonging to 2 classes.
/home/dd_petrovskiy/courses/data/dogscats/test1
Found 12500 images belonging to 1 classes.
/home/dd_petrovskiy/courses/data/dogscats/test1
Found 12500 images belonging to 1 classes.
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186

In [14]:
print 'Fitting'
model.fit_generator(train_gen,
                    steps_per_epoch=train_gen.samples/batch_size,
                    validation_data=valid_gen,
                    validation_steps=valid_gen.samples/batch_size)
print 'Fitting is done'

Fitting
Epoch 1/1
Fitting is done


In [19]:
res = model.predict(to_predict_arr)
create_submission(res, filenames)

In [16]:
to_predict_arr.shape

(12500, 3, 224, 224)