# Dogs vs Cat Redux

---

Notebook to prototype different architectures rapidly. Using sample dataset.

In [1]:
from utils import *
from vgg16 import Vgg16
from keras.callbacks import ModelCheckpoint
from keras.initializers import he_normal
%matplotlib inline

Using cuDNN version 5005 on context None
Mapped name None to device cuda: GeForce GTX 1060 6GB (0000:04:00.0)
Using Theano backend.


In [2]:
resnet = keras.applications.resnet50.ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)


Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5

Plan:

1. create decent size sample to work through ideas quickly
2. __once a good approach is found automate it for full dataset__
3. submit to kaggle at least 20 times

---

Prototyping:

    Batchnorm
    Data augmentation
    Try multiple levels of vgg conv layers
    An ensemble - in next notebook
    
After finding in proto4 that he_normalization is excellent, I neglected to add it to any of the other models I made... This needs to be added

In [9]:
def resnet():
    return keras.applications.resnet50.ResNet50(include_top=True, weights='imagenet')

In [10]:
res = resnet()

In [16]:
for layer in res.layers:
    layer.get_config()

In [4]:
def frozen_resnet():
    model = resnet()
    vggbn = vggbn.model
    for layer in vggbn.layers:
        layer.trainable = False
    return vggbn

In [5]:
def frozen_vggbn():
    vggbn = Vgg16BN()
    vggbn = vggbn.model
    for layer in vggbn.layers:
        layer.trainable = False
    return vggbn

In [6]:
vgg = frozen_vggbn()

In [8]:
vgg.input_shape

(None, 3, 224, 224)

In [None]:
res = frozen_resnet()

Confirm last layer is trainiable

In [38]:
[layer.get_config() for layer in  vggbn.layers][-1:]

[{'activation': 'softmax',
  'activity_regularizer': None,
  'bias_constraint': None,
  'bias_initializer': {'class_name': 'Zeros', 'config': {}},
  'bias_regularizer': None,
  'kernel_constraint': None,
  'kernel_initializer': {'class_name': 'VarianceScaling',
   'config': {'distribution': 'uniform',
    'mode': 'fan_avg',
    'scale': 1.0,
    'seed': None}},
  'kernel_regularizer': None,
  'name': 'dense_16',
  'trainable': True,
  'units': 2,
  'use_bias': True}]

In [3]:
batch_size=32 #32 - kept running out of mem on proto6

In [4]:
aug_gen = image.ImageDataGenerator(
    channel_shift_range=10,
    zoom_range=0.05,
    rotation_range=5,
    width_shift_range=0.05,
    height_shift_range=0.05,
    horizontal_flip=True)

---

Set up data batches. Used both for prototyping but found that augmented is always results in better performance. 

In [5]:
train_batches = get_batches('../../dogscats/lrg_sample/train/',batch_size=batch_size)
aug_train_batches = aug_gen.flow_from_directory(directory='../../dogscats/lrg_sample/train/',batch_size=batch_size,shuffle=True,target_size=(224,224))
val_batches = get_batches('../../dogscats/lrg_sample/valid/',batch_size=batch_size*2)

train_steps = train_batches.samples//train_batches.batch_size
aug_train_steps = aug_train_batches.samples//aug_train_batches.batch_size
val_steps = val_batches.samples//val_batches.batch_size

Found 2000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


In [6]:
results_path = '../../dogscats/lrg_sample/results/'
model_path = '../../dogscats/lrg_sample/models/'

---
Prototype 4.

Simple finetune model with he_normal intialization

In [15]:
cb = [ModelCheckpoint(model_path+'proto4', monitor='val_loss', save_best_only=True, save_weights_only=False)]


In [56]:
vggbn = frozen_vggbn()
vggbn.add(Dense(2,activation='softmax',kernel_initializer='he_normal'))

In [17]:
opt = Adam(lr=.001)

vggbn.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])

proto4 = vggbn.fit_generator(aug_train_batches,
                    aug_train_steps,
                    epochs=10,
                    callbacks=cb,
                    validation_data=val_batches,
                    validation_steps=val_steps)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [57]:
vggbn.load_weights('../../dogscats/lrg_sample/models/proto4')
save_sub(vggbn,'vggbn_proto4.csv')

Found 12500 images belonging to 1 classes.
Raw Predictions: [  1.0000e+00   2.1217e-08   1.0000e+00   1.0000e+00   1.0000e+00]
Mid Predictions: [ 0.5409  0.5071  0.5398  0.4203  0.4167  0.5239  0.4462  0.4727  0.4285  0.4695  0.5042  0.4594
  0.4743  0.4302  0.5321  0.5339  0.4672  0.4052  0.4326  0.5794  0.4618  0.4403  0.4074  0.4703
  0.4544  0.4102  0.5624  0.576   0.4969  0.4922  0.4198  0.4587  0.5256  0.4896  0.4264  0.5851
  0.5363  0.5439  0.4562  0.4993  0.5351  0.5362  0.4362  0.5609]
Edge Predictions: [ 1.  1.  1. ...,  1.  1.  1.]
/home/xbno/anaconda3/courses/dogscats/submissions


In [7]:
def half_frozen_vggbn(depth):
    '''Transfer learning from vggbn to a frozen model based on given depth.
    
    '''
    
    vggbn = Vgg16BN()
    vggbn = vggbn.model
    vggbn.pop()
    for layer in vggbn.layers[:depth]:
        #print('Freezing {}'.format(layer.name))
        layer.trainable = False
    for layer in vggbn.layers[depth:]:
        if 'dense' in layer.name:
            #print('Changing {} to he_normal initilizer'.format(layer.name))
            layer.kernel_initializer = he_normal()
    return vggbn

---
Prototype 5.

Free up last 3 conv layers and both fc layers of vgg. Add finetune layer

In [7]:
cb = [ModelCheckpoint(model_path+'proto5', monitor='val_loss', save_best_only=True, save_weights_only=False, verbose=True)]


In [9]:
vggbn = half_frozen_vggbn(-14)
vggbn.add(Dense(2,activation='softmax',kernel_initializer='he_normal'))
opt = Adam(lr=.001)
vggbn.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])

Changing conv2d_11 to he_normal initilizer
Changing conv2d_12 to he_normal initilizer
Changing conv2d_13 to he_normal initilizer
Changing dense_1 to he_normal initilizer
Changing dense_2 to he_normal initilizer


In [13]:
[layer.get_config() for layer in  vggbn.layers][-14:]

[{'activation': 'relu',
  'activity_regularizer': None,
  'bias_constraint': None,
  'bias_initializer': {'class_name': 'Zeros', 'config': {}},
  'bias_regularizer': None,
  'data_format': 'channels_first',
  'dilation_rate': (1, 1),
  'filters': 512,
  'kernel_constraint': None,
  'kernel_initializer': {'class_name': 'VarianceScaling',
   'config': {'distribution': 'normal',
    'mode': 'fan_in',
    'scale': 2.0,
    'seed': None}},
  'kernel_regularizer': None,
  'kernel_size': (3, 3),
  'name': 'conv2d_24',
  'padding': 'valid',
  'strides': (1, 1),
  'trainable': True,
  'use_bias': True},
 {'data_format': 'channels_first',
  'name': 'zero_padding2d_25',
  'padding': ((1, 1), (1, 1)),
  'trainable': True},
 {'activation': 'relu',
  'activity_regularizer': None,
  'bias_constraint': None,
  'bias_initializer': {'class_name': 'Zeros', 'config': {}},
  'bias_regularizer': None,
  'data_format': 'channels_first',
  'dilation_rate': (1, 1),
  'filters': 512,
  'kernel_constraint': None

In [14]:
opt = Adam(lr=.001)

vggbn.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])

proto5 = vggbn.fit_generator(aug_train_batches,
                    aug_train_steps,
                    epochs=10,
                    callbacks=cb,
                    validation_data=val_batches,
                    validation_steps=val_steps)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


---
Prototype 6.

Originally only reinitializing the all unfrozen layers but this doesn't make sense as it doesn't take advantage of transferring the learned weights of vgg. Otherwise I'm not finetuning the conv layers but actually retraining them from scratch. 

6a.
- This keeps on running out of mem. There are 25m trainable parameters which might be too many. This is with leaving all conv layers in an allowing them to be finetuned while adding a dense 512 and 2
    
6b. 
- Popped the original dense layers and kept the last 3 conv layers trainable. Added a 512 dense with batchnorm and dropout .5 and a dense 2

In [15]:
cb = [ModelCheckpoint(model_path+'proto6_he', monitor='val_loss', save_best_only=True, save_weights_only=False)]


In [16]:
vggbn = half_frozen_vggbn(-14)

In [17]:
for i in range(6):
    vggbn.pop()

In [18]:
vggbn.add(BatchNormalization())
vggbn.add(Dense(512,kernel_initializer='he_normal',activation='relu'))
vggbn.add(BatchNormalization())
vggbn.add(Dropout(.5))
vggbn.add(Dense(2,activation='softmax',kernel_initializer='he_normal'))

In [21]:
#[layer.name for layer in vggbn.layers]# if 'dense' in layer.name ]

In [19]:
opt = Adam()
vggbn.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])

In [25]:
proto6 = vggbn.fit_generator(aug_train_batches,
                    aug_train_steps,
                    epochs=10,
                    callbacks=cb,
                    validation_data=val_batches,
                    validation_steps=val_steps)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [20]:
proto6 = vggbn.fit_generator(aug_train_batches,
                    aug_train_steps,
                    epochs=10,
                    callbacks=cb,
                    validation_data=val_batches,
                    validation_steps=val_steps)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


---
Prototype 7. 

Finetune the last 3 conv layers as well as the 1st fc layer. Add batchnorm and dropout between it and the final dense 2

In [33]:
cb = [ModelCheckpoint(model_path+'proto7', monitor='val_loss', save_best_only=True, save_weights_only=False)]


In [27]:
vggbn = half_frozen_vggbn(-14)

In [30]:
for i in range(5):
    vggbn.pop()
vggbn.add(BatchNormalization())
vggbn.add(Dropout(.5))
vggbn.add(Dense(2,activation='softmax'))

In [32]:
opt = Adam()
vggbn.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])

In [34]:
proto7 = vggbn.fit_generator(aug_train_batches,
                    aug_train_steps,
                    epochs=10,
                    callbacks=cb,
                    validation_data=val_batches,
                    validation_steps=val_steps)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [35]:
proto7 = vggbn.fit_generator(aug_train_batches,
                    aug_train_steps,
                    epochs=10,
                    callbacks=cb,
                    validation_data=val_batches,
                    validation_steps=val_steps)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [53]:
vggbn.load_weights('../../dogscats/lrg_sample/models/proto7')
save_sub(vggbn,'vggbn_proto7.csv')

Found 12500 images belonging to 1 classes.
Raw Predictions: [  9.9987e-01   2.7138e-04   9.8846e-01   1.0000e+00   9.9997e-01]
Mid Predictions: [ 0.5025  0.5967  0.4689  0.4181  0.5078  0.5762  0.5725  0.576   0.4204  0.4042  0.4492  0.568
  0.418   0.4782  0.4858  0.5527  0.5704  0.4806  0.4704  0.432   0.408   0.5846  0.479   0.4588
  0.5149  0.4475  0.596   0.4524  0.5686  0.4794  0.4783  0.4765  0.5205  0.4946  0.4161  0.5545
  0.475   0.4795  0.4922  0.4507  0.4604  0.4889  0.4943  0.4684  0.4808  0.5521  0.5974  0.4795
  0.5009  0.4493  0.4656  0.5291  0.5906  0.4484  0.5227  0.4072  0.5345  0.5489  0.4399  0.4123
  0.4278  0.4556  0.5722  0.5021  0.4206  0.5899  0.4826  0.4359  0.5536  0.5693  0.5882  0.4293
  0.4007  0.5989  0.5859  0.4023  0.5332  0.5723  0.592   0.4881  0.5474  0.431   0.4736  0.5902
  0.5837  0.5366  0.4626  0.515 ]
Edge Predictions: [ 1.  1.  1. ...,  1.  1.  1.]
/home/xbno/anaconda3/courses/dogscats/submissions


---
Prototype 8. (same as 7 with less data aug)

Weaken the aug_data. Still results in the same wobbly behavior.

In [38]:
# aug_gen = image.ImageDataGenerator(
#     channel_shift_range=5,
#     zoom_range=0.02,
#     rotation_range=3,
#     width_shift_range=0.02,
#     height_shift_range=0.02,
#     horizontal_flip=True)

In [8]:
train_batches = get_batches('../../dogscats/lrg_sample/train/',batch_size=batch_size)
aug_train_batches = aug_gen.flow_from_directory(directory='../../dogscats/lrg_sample/train/',batch_size=batch_size,shuffle=True,target_size=(224,224))
val_batches = get_batches('../../dogscats/lrg_sample/valid/',batch_size=batch_size*2,shuffle=False)

train_steps = train_batches.samples//train_batches.batch_size
aug_train_steps = aug_train_batches.samples//aug_train_batches.batch_size
val_steps = val_batches.samples//val_batches.batch_size

Found 2000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


In [10]:
cb = [ModelCheckpoint(model_path+'proto8_he', monitor='val_loss', save_best_only=True, save_weights_only=False)]


In [11]:
vggbn = half_frozen_vggbn(-14)

In [31]:
#[layer.name for layer in vggbn.layers]# if 'dense' in layer.name ]

In [12]:
for i in range(5):
    vggbn.pop()
vggbn.add(BatchNormalization())
vggbn.add(Dropout(.5))
vggbn.add(Dense(2,activation='softmax',kernel_initializer='he_normal'))

In [13]:
opt = Adam()
vggbn.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])

In [14]:
proto8 = vggbn.fit_generator(aug_train_batches,
                    aug_train_steps,
                    epochs=10,
                    callbacks=cb,
                    validation_data=val_batches,
                    validation_steps=val_steps)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [47]:
proto8 = vggbn.fit_generator(aug_train_batches,
                    aug_train_steps,
                    epochs=10,
                    callbacks=cb,
                    validation_data=val_batches,
                    validation_steps=val_steps)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [51]:
def save_sub(vggbn,save_name):
    test_batches = get_batches('../../dogscats/test/',batch_size=batch_size,shuffle=False)
    test_steps = test_batches.n//test_batches.batch_size+1

    y_pred = vggbn.predict_generator(test_batches,steps=test_steps)

    filenames = test_batches.filenames

    #Grab the dog prediction column
    isdog = y_pred[:,1]
    print("Raw Predictions: " + str(isdog[:5]))
    print("Mid Predictions: " + str(isdog[(isdog < .6) & (isdog > .4)]))
    print("Edge Predictions: " + str(isdog[(isdog == 1) | (isdog == 0)]))

    y_pred.shape

    #So to play it safe, we use a sneaky trick to round down our edge predictions
    #Swap all ones with .95 and all zeros with .05
    isdog = isdog.clip(min=0.05, max=0.95)
    full_files = [f.split('/')[1] for f in filenames]
    ids = [int(f.split('.')[0]) for f in full_files]
    subm = np.stack([ids,isdog], axis=1)

    sub_path = '../../dogscats/submissions/'
    %cd $sub_path
    submission_file_name = sub_path + save_name
    np.savetxt(submission_file_name, subm, fmt='%d,%.5f', header='id,label', comments='')

In [48]:
vggbn.load_weights('../../dogscats/lrg_sample/models/proto8')
save_sub(vggbn,'vggbn_proto8.csv')

Use this command to download file from server:

    scp 96.237.225.57:/home/xbno/anaconda3/courses/dogscats/submissions/submission1.csv ~/Desktop/

