In [None]:
from theano.sandbox import cuda

In [4]:
%matplotlib inline
from IPython.display import FileLink

import h5py
import numpy as np
import PIL

from utils_yvan import save_array, load_array

from keras import backend as K
from keras.utils.data_utils import get_file
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD, RMSprop, Adam
from keras.preprocessing import image
from keras.regularizers import l2
from keras.metrics import categorical_crossentropy

Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5105)


In [5]:
import os
DATA_DIR = os.path.join('/scratch', 'yns207', 'data_statefarm')
path = DATA_DIR
test_path = os.path.join(path, 'test')
models_path = os.path.join(path, 'results')
train_path = os.path.join(path, 'train')
valid_path = os.path.join(path, 'valid')
batch_size = 64

In [None]:
gen = image.ImageDataGenerator()
tr_batches = gen.flow_from_directory(train_path, target_size=(224,224), class_mode='categorical', shuffle=True, batch_size=batch_size)
va_batches = gen.flow_from_directory(valid_path, target_size=(224,224), class_mode='categorical', shuffle=False, batch_size=batch_size*2)
te_batches = gen.flow_from_directory(test_path, target_size=(224,224), class_mode='categorical', shuffle=False, batch_size=batch_size)

In [None]:
va_classes = va_batches.classes
tr_classes = tr_batches.classes
va_labels = to_categorical(va_classes)
tr_labels = to_categorical(tr_classes)
va_filenames = va_batches.filenames
tr_filenames = tr_batches.filenames
te_filenames = te_batches.filenames

In [None]:
model = Sequential([
    BatchNormalization(axis=1, input_shape=(3,224,224)),
    Flatten(),
    Dense(10, activation='softmax')
])
model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit_generator(tr_batches, 
                    steps_per_epoch=tr_batches.n//batch_size, 
                    validation_data=va_batches, 
                    validation_steps=va_batches.n//batch_size,
                    epochs=2)

# jump straight ahead to the single conv layer

In [None]:
def conv1(batches):
    model = Sequential([
            BatchNormalization(axis=1, input_shape=(3,224,224)),
            Convolution2D(32,(3,3), activation='relu'),
            BatchNormalization(axis=1),
            MaxPooling2D((3,3)),
            Convolution2D(64,(3,3), activation='relu'),
            BatchNormalization(axis=1),
            MaxPooling2D((3,3)),
            Flatten(),
            Dense(200, activation='relu'),
            BatchNormalization(),
            Dense(10, activation='softmax')
        ])

    model.compile(Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
    hist1 = model.fit_generator(tr_batches, 
                    steps_per_epoch=(tr_batches.n//batch_size)+1, 
                    validation_data=va_batches, 
                    validation_steps=(va_batches.n//batch_size)+1,
                    epochs=2)
    model.optimizer.lr = 0.001
    hist2 = model.fit_generator(tr_batches, 
                        steps_per_epoch=(tr_batches.n//batch_size)+1, 
                        validation_data=va_batches, 
                        validation_steps=(va_batches.n//batch_size)+1,
                        epochs=4)
    return model  

In [None]:
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, 
                shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
tr_batches = gen_t.flow_from_directory(train_path, target_size=(224,224), class_mode='categorical', shuffle=True, batch_size=batch_size)

In [None]:
model = conv1(tr_batches)

In [None]:
model.optimizer.tr = 0.0001
hist = model.fit_generator(tr_batches, 
                        steps_per_epoch=(tr_batches.n//batch_size)+1, 
                        validation_data=va_batches, 
                        validation_steps=(va_batches.n//batch_size)+1,
                        epochs=4)

In [None]:
print(hist.history)

In [None]:
model = Sequential([
        BatchNormalization(axis=1, input_shape=(3,224,224)),
        Convolution2D(32,(3,3),activation='relu'),
        BatchNormalization(axis=1),
        MaxPooling2D(),
        Convolution2D(64,(3,3),activation='relu'),
        BatchNormalization(axis=1),
        MaxPooling2D(),
        Flatten(),
        Dense(200,activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(200, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
])

In [None]:
model.compile(Adam(lr=10e-5), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
hist = model.fit_generator(tr_batches, 
                        steps_per_epoch=(tr_batches.n//batch_size)+1, 
                        validation_data=va_batches, 
                        validation_steps=(va_batches.n//batch_size)+1,
                        epochs=2)

In [None]:
model.optimizer.lr=0.001
hist = model.fit_generator(tr_batches, 
                        steps_per_epoch=(tr_batches.n//batch_size)+1, 
                        validation_data=va_batches, 
                        validation_steps=(va_batches.n//batch_size)+1,
                        epochs=10)

In [None]:
model.optimizer.lr = 0.00001
hist = model.fit_generator(tr_batches, 
                        steps_per_epoch=(tr_batches.n//batch_size)+1, 
                        validation_data=va_batches, 
                        validation_steps=(va_batches.n//batch_size)+1,
                        epochs=10)

# try vgg conv layers

In [3]:
from vgg16 import Vgg16

In [4]:
vgg = Vgg16()
model = vgg.model
last_conv_index = [i for i,l in enumerate(model.layers) if type(l) is Convolution2D][-1]
conv_layers = model.layers[:last_conv_index+1]
fc_layers = model.layers[last_conv_index+1:]

  model.add(Convolution2D(filters, 3, 3, activation='relu'))
  model.add(Convolution2D(filters, 3, 3, activation='relu'))
  model.add(Convolution2D(filters, 3, 3, activation='relu'))
  model.add(Convolution2D(filters, 3, 3, activation='relu'))
  model.add(Convolution2D(filters, 3, 3, activation='relu'))


In [5]:
conv_model = Sequential(conv_layers)

In [None]:
'''
When you fit your conv features, conv_feat was created from shuffled batches, 
but trn_labels was not shuffled, so they don't match. 
You need to not shuffle the batches used to create conv_feat.
'''
gen = image.ImageDataGenerator()

tr_batches = gen.flow_from_directory(train_path, target_size=(224,224), class_mode='categorical', shuffle=False, batch_size=batch_size)
va_batches = gen.flow_from_directory(valid_path, target_size=(224,224), class_mode='categorical', shuffle=False, batch_size=batch_size*2)
te_batches = gen.flow_from_directory(test_path, target_size=(224,224), class_mode='categorical', shuffle=False, batch_size=batch_size)

In [None]:
va_classes = va_batches.classes
tr_classes = tr_batches.classes
va_labels = to_categorical(va_classes)
tr_labels = to_categorical(tr_classes)
va_filenames = va_batches.filenames
tr_filenames = tr_batches.filenames
te_filenames = te_batches.filenames

In [None]:
conv_tr_feat = conv_model.predict_generator(tr_batches, (tr_batches.n//batch_size)+1, workers=2)
save_array(os.path.join(models_path, 'conv_tr_feat.dat'), conv_tr_feat)

In [None]:
# divide by 2 because the validation batch size is 2x bigger
conv_va_feat = conv_model.predict_generator(va_batches, (va_batches.n//(batch_size*2))+1, workers=2)
save_array(os.path.join(models_path, 'conv_va_feat.dat'), conv_va_feat)

In [None]:
conv_tr_path = os.path.join(models_path, 'conv_tr_feat.dat')
conv_tr_feat = load_array(conv_tr_path)

In [None]:
conv_va_path = os.path.join(models_path, 'conv_va_feat.dat')
conv_va_feat = load_array(conv_hva_path)

# batchnorm dense layers on pretrained conv layers

In [None]:
'''
http://forums.fast.ai/t/statefarm-kaggle-comp/183/124
details on a dif architecture to improve performance
'''

In [None]:
def get_bn_layers(p):
    return [
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Flatten(),
        Dropout(p/2),
        Dense(200, activation='relu'),
        BatchNormalization(),
        Dropout(p/2),
        Dense(200, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(10, activation='softmax')
    ]

In [None]:
p=0.8

In [None]:
bn_model = Sequential(get_bn_layers(p))
bn_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
bn_model.fit(conv_tr_feat,
            tr_labels,
            batch_size=batch_size,
            epochs=10,
            validation_data=(conv_va_feat,va_labels)
            )

In [None]:
bn_model.optimizer.lr = 1e-3

In [None]:
bn_model.fit(conv_tr_feat,
            tr_labels,
            batch_size=batch_size,
            epochs=10,
            validation_data=(conv_va_feat,va_labels)
            )

In [None]:
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, 
                shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
aug_batches = gen_t.flow_from_directory(train_path, target_size=(224,224), class_mode='categorical', shuffle=False, batch_size=batch_size//4)

In [None]:
# create one tahts 2x bigger than original training
# the idea is that there will be an even distribution
# of the ranomd sutff in there.
aug_conv_feat = conv_model.predict_generator(aug_batches, (2*tr_batches.n//batch_size)+1, workers=2)

In [None]:
save_array(os.path.join(models_path, 'aug_conv_feat.dat'), aug_conv_feat)

In [None]:
aug_conv_feat = load_array(os.path.join(models_path, 'aug_conv_feat.dat'))

In [None]:
#this crashes the kernel as we run out of RAM
#need to use batches instead,
#apparently jeremy shows an example somewhere
# could also just NOT precompute the conv features, run
# a normal training round.
#aug_conv_feat_c = np.concatenate([aug_conv_feat, conv_tr_feat])

In [None]:
aug_conv_labels = np.concatenate([tr_labels]*3)

In [None]:
def get_bn_da_layers(p):
    return [
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Flatten(),
        Dropout(p),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(10, activation='softmax')
    ]

In [None]:
p=0.8

In [None]:
bn_model = Sequential(get_bn_da_layers(p))
bn_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
bn_model.fit(aug_conv_feat,
             aug_conv_labels,
             batch_size=batch_size,
             epochs=4,
             validation_data=(conv_va_feat,va_labels)
            )

In [None]:
bn_model.optimizer.lr = 0.01

In [None]:
bn_model.fit(aug_conv_feat,
             aug_conv_labels,
             batch_size=batch_size,
             epochs=4,
             validation_data=(conv_va_feat,va_labels)
            )

In [None]:
bn_model.optimizer.lr = 0.0001

In [None]:
bn_model.fit(aug_conv_feat,
             aug_conv_labels,
             batch_size=batch_size,
             epochs=4,
             validation_data=(conv_va_feat,va_labels)
            )

# retry of batch_norm, data augmentation, and psuedo labelling

In [19]:
# ok scratch all the pretrained BS, we need to
# generate batches with the mixiterator
# that contain 1/4 psuedo labelled data
# and 3/4 training data and then pass this 
# through the whole model conv + fc layers
# actually do not need to use the mixiterator

In [12]:
from vgg16 import Vgg16
vgg = Vgg16()
model = vgg.model
last_conv_index = [i for i,l in enumerate(model.layers) if type(l) is Convolution2D][-1]
conv_layers = model.layers[:last_conv_index+1]
fc_layers = model.layers[last_conv_index+1:]

  model.add(Convolution2D(filters, 3, 3, activation='relu'))
  model.add(Convolution2D(filters, 3, 3, activation='relu'))
  model.add(Convolution2D(filters, 3, 3, activation='relu'))
  model.add(Convolution2D(filters, 3, 3, activation='relu'))
  model.add(Convolution2D(filters, 3, 3, activation='relu'))


In [6]:
def get_bn_da_layers(p):
    return [
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Flatten(),
        Dropout(p/2),
        Dense(200, activation='relu'),
        BatchNormalization(),
        Dropout(p/2),
        Dense(200, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(10, activation='softmax')
    ]

In [7]:
gen = image.ImageDataGenerator()
gen_t = image.ImageDataGenerator(rotation_range=15, 
                                height_shift_range=0.05, 
                                shear_range=0.1, 
                                channel_shift_range=20, 
                                width_shift_range=0.1)

tr_batches = gen_t.flow_from_directory(train_path, target_size=(224,224), class_mode='categorical', shuffle=True, batch_size=batch_size)
tr_batches_fixed = gen.flow_from_directory(train_path, target_size=(224,224), class_mode='categorical', shuffle=False, batch_size=batch_size)
va_batches = gen.flow_from_directory(valid_path, target_size=(224,224), class_mode='categorical', shuffle=False, batch_size=batch_size*2)
te_batches = gen.flow_from_directory(test_path, target_size=(224,224), class_mode='categorical', shuffle=False, batch_size=batch_size)

Found 18131 images belonging to 10 classes.
Found 18131 images belonging to 10 classes.
Found 4293 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.


In [8]:
va_classes = va_batches.classes
tr_classes = tr_batches_fixed.classes
va_labels = to_categorical(va_classes)
tr_labels = to_categorical(tr_classes)
va_filenames = va_batches.filenames
tr_filenames = tr_batches.filenames
te_filenames = te_batches.filenames

In [9]:
p = 0.8

In [14]:
yvan_model = Sequential()
conv_model = Sequential(conv_layers)
for l in conv_model.layers:
    l.trainable = False
yvan_model.add(conv_model)
yvan_model.add(Sequential(get_bn_da_layers(p)))
adam = Adam(lr=0.001)
yvan_model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
yvan_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_5 (Sequential)    (None, 512, 14, 14)       14714688  
_________________________________________________________________
sequential_6 (Sequential)    (None, 10)                5061610   
Total params: 19,776,298
Trainable params: 5,060,810
Non-trainable params: 14,715,488
_________________________________________________________________


In [None]:
yvan_model.fit_generator(tr_batches,
                         steps_per_epoch=(tr_batches.n//batch_size)+1,
                         validation_data=va_batches,
                         validation_steps=(va_batches.n//(batch_size*2))+1,
                         epochs=1)

Epoch 1/1


<keras.callbacks.History at 0x2af4dfc52128>

In [None]:
hist = yvan_model.fit_generator(tr_batches,
                         steps_per_epoch=(tr_batches.n//batch_size)+1,
                         validation_data=va_batches,
                         validation_steps=(va_batches.n//(batch_size*2))+1,
                         epochs=4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4

In [50]:
hist.history

{'acc': [0.83602669459500178,
  0.90298383983233133,
  0.92046770722530347,
  0.93910981190789133],
 'loss': [0.50245500503852825,
  0.30926036406321766,
  0.24712512025790526,
  0.19637700660066593],
 'val_acc': [0.82156999761161897,
  0.80270207293406248,
  0.81691125088739913,
  0.82366643388899452],
 'val_loss': [0.62175132371372421,
  0.64871419478540182,
  0.65319442587967325,
  0.64364949728874454]}

In [51]:
#then save those weights
yvan_model.save_weights(os.path.join(models_path, 'yvan_model_weights.h5'))

In [16]:
yvan_model.load_weights(os.path.join(models_path, 'yvan_model_weights.h5'))

In [17]:
#then use that training to pseudo label

In [18]:
val_pseudo_labels = yvan_model.predict_generator(va_batches, steps=va_batches.n//(batch_size*2)+1)

In [19]:
val_pseudo_labels = (val_pseudo_labels == val_pseudo_labels.max(axis=1, keepdims=1)).astype(float)

In [20]:
val_pseudo_labels

array([[ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  1.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  1.],
       [ 0.,  0.,  0., ...,  0.,  0.,  1.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.]])

In [21]:
tr_labels

array([[ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  1.],
       [ 0.,  0.,  0., ...,  0.,  0.,  1.],
       [ 0.,  0.,  0., ...,  0.,  0.,  1.]])

In [22]:
pseudo_labels = np.concatenate([tr_labels, val_pseudo_labels])

In [23]:
#zeroth elem because thats the actual data
#it actually returns a tuple with something else in it
va_pseudo_data = np.concatenate([va_batches.next()[0] for i in range((va_batches.n//(batch_size*2))+1)])
tr_fixed_data = np.concatenate([tr_batches_fixed.next()[0] for i in range((tr_batches_fixed.n//batch_size)+1)])

In [24]:
save_array(os.path.join(models_path, 'va_pseudo_data.dat'), va_pseudo_data)
save_array(os.path.join(models_path, 'tr_fixed_data.dat'), tr_fixed_data)

In [25]:
pseudo_data = np.concatenate([tr_fixed_data, va_pseudo_data])

In [26]:
pseudo_data.shape

(22424, 3, 224, 224)

In [27]:
va_pseudo_data.shape


(4293, 3, 224, 224)

In [28]:
va_labels.shape

(4293, 10)

In [None]:
#then train on the pseudo labels

In [None]:
hist = yvan_model.fit(pseudo_data,
                    pseudo_labels,
                    batch_size=batch_size,
                    validation_data=(va_pseudo_data, va_labels),
                    epochs=4)

Train on 22424 samples, validate on 4293 samples
Epoch 1/4
 1088/22424 [>.............................] - ETA: 952s - loss: 1.3552 - acc: 0.7895

In [31]:
hist.history

{'acc': [0.86327149484823573,
  0.90385301462718515,
  0.91772208348198359,
  0.92204780590096158],
 'loss': [0.52636034737261028,
  0.34129503553270063,
  0.28946414123124153,
  0.27423459440310255],
 'val_acc': [0.78593058467272303,
  0.79431632891099491,
  0.79734451432564635,
  0.80246913580246915],
 'val_loss': [0.6578886400089935,
  0.58394270200927079,
  0.56764786423377267,
  0.55063074598105777]}

In [None]:
yvan_model.optimizer.lr = 0.0001
hist = yvan_model.fit(pseudo_data,
                    pseudo_labels,
                    batch_size=batch_size,
                    validation_data=(va_pseudo_data, va_labels),
                    epochs=5)

Train on 22424 samples, validate on 4293 samples
Epoch 1/5
Epoch 2/5

In [34]:
hist.history

{'acc': [0.92686407422747219,
  0.93092222616496445,
  0.92998572957545489,
  0.93168034251145371,
  0.93444523724580808],
 'loss': [0.26126723269501034,
  0.24872710621712082,
  0.23935677077524201,
  0.23384266970011322,
  0.2264022688715959],
 'val_acc': [0.80642907058348501,
  0.78965758211388337,
  0.80526438388073607,
  0.79757745166897298,
  0.80130444910319121],
 'val_loss': [0.54172315662511994,
  0.55974224888026058,
  0.55361377409835699,
  0.5439553114186626,
  0.52685085808150622]}

In [74]:
yvan_model.save_weights(os.path.join(models_path, 'yvan_model_weights2.h5'))

In [87]:
yvan_model.load_weights(os.path.join(models_path, 'yvan_model_weights2.h5'))

In [88]:
#then use it to make predictions on the test set
#then normalize test set, submit to kaggle

In [89]:
preds = yvan_model.predict_generator(te_batches, steps=(te_batches.n//batch_size)+1, workers=2)

In [90]:
preds.shape

(79726, 10)

In [91]:
save_array(os.path.join(models_path, 'predictions_f.dat'), preds)

In [92]:
# crop thes predictions to prevent overconfident false predictions
# which are punished excessively for multiclass loss
def do_clip(arr, mx): return np.clip(arr, (1-mx)/9, mx)

In [93]:
subm = do_clip(preds, 0.93)

In [94]:
# get file names
from glob import glob
g = glob(os.path.join(test_path, 'uknown/*'))
img_names = [f[43:] for f in g]
img_names[:5]

['img_31304.jpg',
 'img_56510.jpg',
 'img_36143.jpg',
 'img_95819.jpg',
 'img_1960.jpg']

In [95]:
sub_path = os.path.join(models_path, 'sub_may23_2017.gz')

In [96]:
import pandas as pd
sub = pd.DataFrame(subm, columns=['c0','c1','c2','c3','c4','c5','c6','c7','c8','c9'])
pd.options.display.float_format = '{:,.5f}'.format
sub['img'] = pd.Series(img_names)

In [97]:
sub.head()

Unnamed: 0,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,img
0,0.01249,0.62044,0.00778,0.18909,0.00778,0.00778,0.14855,0.01504,0.00778,0.00778,img_31304.jpg
1,0.00778,0.00778,0.00778,0.81733,0.16374,0.00778,0.00778,0.00778,0.01078,0.00778,img_56510.jpg
2,0.02991,0.00778,0.00778,0.01529,0.01115,0.01016,0.00778,0.00778,0.87228,0.04765,img_36143.jpg
3,0.00778,0.01056,0.00778,0.00778,0.00778,0.00778,0.00778,0.93,0.00778,0.00778,img_95819.jpg
4,0.00912,0.00778,0.00778,0.00778,0.46916,0.33204,0.02055,0.02083,0.12988,0.00778,img_1960.jpg


In [98]:
sub.to_csv(sub_path, index=False, compression='gzip')

In [99]:
FileLink(sub_path)

# the crap below didnt work

bn_model.save_weights(os.path.join(models_path, 'aug_conv_weights.h5'))

val_pseudo_labels = bn_model.predict(conv_va_feat, batch_size=batch_size)

pseudo_labels = np.concatenate([tr_labels, val_pseudo_labels])

pseudo_feat = np.concatenate([conv_tr_feat, conv_va_feat])

bn_model.load_weights(os.path.join(models_path, 'conv_weights.h5'))

bn_model.fit(pseudo_feat,
             pseudo_labels,
             batch_size=batch_size,
             epochs=5,
             validation_data=(conv_va_feat,va_labels)
            )

bn_model.optimizer.lr=0.001
bn_model.fit(pseudo_feat,
             pseudo_labels,
             batch_size=batch_size,
             epochs=5,
             validation_data=(conv_va_feat,va_labels)
            )

bn_model.save_weights(os.path.join(models_path, 'conv_weights_f.h5'))

def do_clip(arr, mx): return np.clip(arr, (1-mx)/9, mx)

va_preds = bn_model.predict(conv_va_feat, batch_size=batch_size*2)

cat_entropy_va = categorical_crossentropy(va_labels, do_clip(va_preds, 0.93)).eval()

cat_entropy_va.mean()

conv_te_feat = conv_model.predict_generator(te_batches, (te_batches.n//batch_size)+1)
save_array(os.path.join(models_path, 'conv_te_feat.dat'), conv_te_feat)

conv_te_feat = load_array(os.path.join(models_path, 'conv_te_feat.dat'))

te_preds = bn_model.predict(conv_te_feat, batch_size=batch_size*2)

sub = do_clip(te_peds, 0.93)

sub_path = os.path.join(models_path, 'sub_may172017.gz')

classes = sorted(tr_batches.class_indices, key=batches.class_indices.get)
subm = pd.DataFrame(sub, columns=classes)
subm.insert(0, 'img', [a[4:] for a in test_filenames])
subm.head()

subm.to_csv(sub_path, index=False, compression='gzip')

FileLink(sub_path)