# Dogs vs Cat Redux

---

Notebook to prototype different architectures rapidly. Using sample dataset.

In [1]:
from utils import *
from vgg16 import Vgg16
from keras.callbacks import ModelCheckpoint
from keras.initializers import he_normal
%matplotlib inline

Using cuDNN version 5005 on context None
Mapped name None to device cuda: GeForce GTX 1060 6GB (0000:04:00.0)
Using Theano backend.


Plan:

1. create decent size sample to work through ideas quickly
2. once a good approach is found automate it for full dataset
2. __extra! create an ensemble with the sample data frist__
3. submit to kaggle 3x times tonight

In [2]:
def frozen_vggbn():
    vggbn = Vgg16BN()
    vggbn = vggbn.model
    for layer in vggbn.layers:
        layer.trainable = False
    return vggbn

In [3]:
def half_frozen_vggbn(depth):
    '''Transfer learning from vggbn to a frozen model based on given depth.
    '''
    vggbn = Vgg16BN()
    vggbn = vggbn.model
    vggbn.pop()
    for layer in vggbn.layers[:depth]:
        #print('Freezing {}'.format(layer.name))
        layer.trainable = False
    for layer in vggbn.layers[depth:]:
        if 'dense' in layer.name:
            #print('Changing {} to he_normal initilizer'.format(layer.name))
            layer.kernel_initializer = he_normal()
    return vggbn

---
Save results get data from appropriate places

In [4]:
sample_train_path = '../../dogscats/lrg_sample/train/'
sample_val_path = '../../dogscats/lrg_sample/valid/'

train_path = '../../dogscats/train/'
val_path = '../../dogscats/valid/'

sample_results_path = '../../dogscats/lrg_sample/results/'
sample_model_path = '../../dogscats/lrg_sample/models/'

model_path = '../../dogscats/models/'

---

Set up data batches. Used both for prototyping but found that augmented is always results in better performance. 

In [5]:
aug_gen = image.ImageDataGenerator(
    channel_shift_range=10,
    zoom_range=0.05,
    rotation_range=5,
    width_shift_range=0.05,
    height_shift_range=0.05,
    horizontal_flip=True)

In [6]:
batch_size=32 #32 - kept running out of mem on proto6

train_batches = get_batches(train_path,batch_size=batch_size)
aug_train_batches = aug_gen.flow_from_directory(directory=train_path,batch_size=batch_size,shuffle=True,target_size=(224,224))
val_batches = get_batches(val_path,batch_size=batch_size*2)

train_steps = train_batches.samples//train_batches.batch_size
aug_train_steps = aug_train_batches.samples//aug_train_batches.batch_size
val_steps = val_batches.samples//val_batches.batch_size

Found 23000 images belonging to 2 classes.
Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


---
Ensemble time.

In [7]:
def get_proto8():
    vggbn = half_frozen_vggbn(-14)
    for i in range(5):
        vggbn.pop()
    vggbn.add(BatchNormalization())
    vggbn.add(Dropout(.5))
    vggbn.add(Dense(2,activation='softmax',kernel_initializer='he_normal'))
    vggbn.compile(Adam(lr=.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    return vggbn

In [8]:
def get_proto4():
    vggbn = frozen_vggbn()
    vggbn.pop()
    vggbn.add(Dense(2,activation='softmax',kernel_initializer='he_normal'))
    vggbn.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return vggbn

In [9]:
model_name = 'prod8'

In [10]:
def train_model(model_func,i,epochs=10):
    model = model_func()
    cb = [ModelCheckpoint(model_path+'ens_{}_{}'.format(model_name,i), monitor='val_loss', save_best_only=True, save_weights_only=False)]
    model.fit_generator(aug_train_batches, aug_train_steps, epochs=epochs, callbacks=cb,
                    validation_data=val_batches, validation_steps=val_steps)
    return model

--- 
Manually build models because kernel will fail if training more than 1 at a time

In [11]:
# model_num = 1
# model = train_model(get_proto8,model_num)

# model.load_weights(model_path+'ens_{}_{}'.format(model_name,model_num))

# test_batches = get_batches('../../dogscats/test/',batch_size=batch_size,shuffle=False)
# test_steps = test_batches.n//test_batches.batch_size+1
# y_pred = model.predict_generator(test_batches,steps=test_steps)

# y_pred = pd.DataFrame(y_pred)
# y_pred.to_csv(model_path+'ens_{}_{}_pred'.format(model_name,model_num))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Found 12500 images belonging to 1 classes.


In [11]:
# model_num = 2
# model = train_model(get_proto8,model_num)

# model.load_weights(model_path+'ens_{}_{}'.format(model_name,model_num))

# test_batches = get_batches('../../dogscats/test/',batch_size=batch_size,shuffle=False)
# test_steps = test_batches.n//test_batches.batch_size+1
# y_pred = model.predict_generator(test_batches,steps=test_steps)

# y_pred = pd.DataFrame(y_pred)
# y_pred.to_csv(model_path+'ens_{}_{}_pred'.format(model_name,model_num))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Found 12500 images belonging to 1 classes.


In [11]:
model_num = 3
model = train_model(get_proto8,model_num)

model.load_weights(model_path+'ens_{}_{}'.format(model_name,model_num))

test_batches = get_batches('../../dogscats/test/',batch_size=batch_size,shuffle=False)
test_steps = test_batches.n//test_batches.batch_size+1
y_pred = model.predict_generator(test_batches,steps=test_steps)

y_pred = pd.DataFrame(y_pred)
y_pred.to_csv(model_path+'ens_{}_{}_pred'.format(model_name,model_num))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Found 12500 images belonging to 1 classes.


---
Load saved predictions of models and submit

In [16]:
ens_preds = pd.DataFrame()
for i in range(model_num):
    df = pd.read_csv(model_path+'lone_{}_{}_pred'.format(model_name,i+1),index_col=0)
    ens_preds = pd.concat([ens_preds,df],axis=1)

In [17]:
dog_preds = ens_preds.drop('0',axis=1)
avg_preds = dog_preds.mean(axis=1)
avg_preds = np.array(avg_preds)
avg_preds = avg_preds.clip(min=0.05, max=0.95)
#avg_preds = avg_preds.clip(lower=0.05,upper=0.95)

In [18]:
filenames = test_batches.filenames
full_files = [f.split('/')[1] for f in filenames]
ids = [int(f.split('.')[0]) for f in full_files]
formatted = np.stack([ids,avg_preds], axis=1)

In [19]:
sub_path = '../../dogscats/submissions/'
np.savetxt(sub_path+'lone_{}_full_pred.csv'.format(model_name), formatted, fmt='%d,%.5f', header='id,label', comments='')

---
Submission for the prototype sample ensemble __(simplest ensemble possible)__ of 3 finetuned models trained for 1 epoch each resulted in a score of .12318! thats only .02 worse than the full data 30+ epoch set I submitted yesterday! 

    LB socre of .12318

Now question is which prototype would be best to create an ensemble out of or would more than 1 model in the ensemble be good?

---
After examining the prototype scores and reading about finetuning multiple layers adding ability to models, I ended up using proto8. Ensembling 3 of these at 10 epochs of training each I got:

    LB score of .08918

### Download and submit:

---
Use this command to download file from server. Must be done from the client *not* from this notebook

    scp 96.237.225.57:/home/xbno/anaconda3/courses/dogscats/submissions/submission1.csv ~/Desktop/

