### Introduction

In this notebook, we will use transfer learning to create a model for the Kaggle Aerial Cactus Detection competition. This notebook has several sections:
1. Setting up
    - Import packages
    - Set directories
- Data preparation
    - Import data
    - Prepare data for modelling
- Import transfer model
    - Download VGG16 model structure and weights
    - Freeze weights of lower layers
- Training our own custom layers on top of transfer model
    - Conduct grid search to find optimal structure of top classification layers
    - Train top classification layers
- Testing model and prepare submission
    - Run model on test data
    - Prepare submission file for Kaggle competition

### Setting up

In [1]:
# Import packages
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import os
from tqdm import tqdm, tqdm_notebook
from keras.models import Sequential, Model, load_model
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.applications import VGG16
from keras import applications
from keras.optimizers import Adam
from keras import regularizers
from keras.optimizers import Adam
from keras.optimizers import SGD

from keras.constraints import maxnorm
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

from keras.wrappers.scikit_learn import KerasClassifier

from sklearn.metrics import confusion_matrix, roc_auc_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV,KFold

from keras.models import Sequential
from keras.layers import Conv2D, Dense, Flatten, BatchNormalization, Dropout, LeakyReLU, Flatten
from keras import regularizers
from keras.optimizers import Adam
from keras.optimizers import SGD

Using TensorFlow backend.


In [3]:
# Set train and test paths to data. Import training labels as a dataframe
train_dir = '../input/train/train/'
test_dir = '../input/test/test/'
train_df = pd.read_csv('../input/train.csv')
train_df.head()

Unnamed: 0,id,has_cactus
0,0004be2cfeaba1c0361d39e2b000257b.jpg,1
1,000c8a36845c0208e833c79c1bffedd1.jpg,1
2,000d1e9a533f62e55c289303b072733d.jpg,1
3,0011485b40695e9138e92d0b3fb55128.jpg,1
4,0014d7a11e90b62848904c1418fc8cf2.jpg,1


### Data preparation

In [4]:
# Prepare training images for modeling by appending them to a list and converting to np arrays
X_tr = []
Y_tr = []
imges = train_df['id'].values
for img_id in tqdm_notebook(imges):
    X_tr.append(cv2.imread(train_dir + img_id))    
    Y_tr.append(train_df[train_df['id'] == img_id]['has_cactus'].values[0])  
X_tr = np.asarray(X_tr)
X_tr = X_tr.astype('float32')
X_tr /= 255
Y_tr = np.asarray(Y_tr)

HBox(children=(IntProgress(value=0, max=17500), HTML(value='')))




In [15]:
# Prepare test images for testing by appending them to a list and converting to np arrays
X_tst = []
Test_imgs = []
for img_id in tqdm_notebook(os.listdir(test_dir)):
    X_tst.append(cv2.imread(test_dir + img_id))     
    Test_imgs.append(img_id)
X_tst = np.asarray(X_tst)
X_tst = X_tst.astype('float32')
X_tst /= 255

HBox(children=(IntProgress(value=0, max=4000), HTML(value='')))


CPU times: user 524 ms, sys: 468 ms, total: 992 ms
Wall time: 2.59 s


### Import transfer model

In [5]:
# Download the VGG16 model that we are going to use for transfer learning
# Include_top=False means that we are not going to include the top 3 classification layers
# We are going to create our own top classification layers based on our cactus data
base_model = applications.VGG16(weights='imagenet', include_top=False, input_shape=(32, 32, 3))

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [6]:
# Freeze the weights of the convolutional layers and pooling layers - we will use the pre-trained weights
base_model.trainable = False 
base_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 32, 32, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 32, 32, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 32, 32, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 16, 16, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 16, 16, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 8, 8, 128)         0         
__________

### Training our own custom layers on top of transfer model

In [7]:
# Define a function to create the model in order to be able to do grid search
def create_model(optimizer='adam',
                 nn=128,  lr=0.01, l1=0.01, l2=0.01,
                activation_1 = 'relu', activation_2='sigmoid', dropout=0):  #Set default values for hyperparameters
    
    reg = regularizers.l1_l2(l1=l1, l2=l2)
    model = Sequential()
    model.add(base_model)
    model.add(Flatten())
    model.add(Dense(nn,activation = activation_1, kernel_regularizer=reg))
    model.add(Dropout(dropout))
    model.add(Dense(1, activation = activation_2))
    
    
    # Define optimizer. 
    if optimizer =='sgd':
        optimizer = SGD(lr=lr)
    elif optimizer == 'adam':
        optimizer = Adam(lr=lr)
    elif optimizer == 'rmspop':
        optimizer = RMSprop(lr=lr)
        
    # Compile model
    model.compile(optimizer = optimizer , loss= 'binary_crossentropy', metrics = ['accuracy'])
    

    return model

In [8]:
# Definie paramter grid values for random grid search.

# Optimizer options
optimizer = ['rmsprop','sgd','adam']

# Learning algorithm parameters
lr=[1e-2, 1e-3, 1e-4, 1e-5]


# Activation
activation_1 =['relu', 'sigmoid', 'tanh']
activation_2 =['relu', 'sigmoid', 'tanh']

# Neurons in each layer
nn=[128,256,512,1024]


# Dropout and regularisation
dropout = [0, 0.1, 0.2, 0.3, 0.4 ,0.5]

l1 = [0, 0.01, 0.003, 0.001,0.0001]
l2 = [0, 0.01, 0.003, 0.001,0.0001]

# Dictionary summary
param_grid = dict(
                  nn=nn,
                  activation_1=activation_1, activation_2=activation_2, l1=l1, l2=l2, lr=lr, dropout =dropout, 
                  optimizer=optimizer
                 )

In [9]:
# Model class to use in the scikit random search CV 
# Grid search only on 10 epochs -- this is all the Kaggle kernel can handle in the allowed time
model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=128, verbose=1)

In [10]:
# Define grid search
# We use 3-fold cross validation and will be using the entire dataset for the gridsearch
grid = RandomizedSearchCV(estimator=model, cv=KFold(3), param_distributions=param_grid, 
                          verbose=10,  n_iter=10, n_jobs=1)

In [11]:
# Define where weights will be stored and callbacks for training. 
file_path = 'weights-aerial-cactus.h5'

callbacks = [
        ModelCheckpoint(file_path, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max'),
        ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 3, verbose = 1, mode = 'min', min_lr = 0.00001),
        EarlyStopping(monitor = 'val_loss', min_delta = 1e-10, patience = 15, verbose = 1, restore_best_weights = True)
        ]

training_path = train_dir
test_path = test_dir

In [12]:
# Perform grid search
grid_result = grid.fit(X_tr, Y_tr, callbacks = callbacks)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] optimizer=adam, nn=512, lr=0.001, l2=0.01, l1=0, dropout=0.5, activation_2=sigmoid, activation_1=relu 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 1/10
Epoch 2/10
 1664/11666 [===>..........................] - ETA: 1s - loss: 0.3387 - acc: 0.9303



Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  optimizer=adam, nn=512, lr=0.001, l2=0.01, l1=0, dropout=0.5, activation_2=sigmoid, activation_1=relu, score=0.957, total=  17.3s
[CV] optimizer=adam, nn=512, lr=0.001, l2=0.01, l1=0, dropout=0.5, activation_2=sigmoid, activation_1=relu 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   17.3s remaining:    0.0s


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  optimizer=adam, nn=512, lr=0.001, l2=0.01, l1=0, dropout=0.5, activation_2=sigmoid, activation_1=relu, score=0.957, total=  13.8s
[CV] optimizer=adam, nn=512, lr=0.001, l2=0.01, l1=0, dropout=0.5, activation_2=sigmoid, activation_1=relu 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   31.1s remaining:    0.0s


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  optimizer=adam, nn=512, lr=0.001, l2=0.01, l1=0, dropout=0.5, activation_2=sigmoid, activation_1=relu, score=0.957, total=  13.6s
[CV] optimizer=sgd, nn=1024, lr=1e-05, l2=0, l1=0.003, dropout=0.5, activation_2=sigmoid, activation_1=relu 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   44.7s remaining:    0.0s


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  optimizer=sgd, nn=1024, lr=1e-05, l2=0, l1=0.003, dropout=0.5, activation_2=sigmoid, activation_1=relu, score=0.744, total=  13.5s
[CV] optimizer=sgd, nn=1024, lr=1e-05, l2=0, l1=0.003, dropout=0.5, activation_2=sigmoid, activation_1=relu 


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   58.2s remaining:    0.0s


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  optimizer=sgd, nn=1024, lr=1e-05, l2=0, l1=0.003, dropout=0.5, activation_2=sigmoid, activation_1=relu, score=0.751, total=  13.4s
[CV] optimizer=sgd, nn=1024, lr=1e-05, l2=0, l1=0.003, dropout=0.5, activation_2=sigmoid, activation_1=relu 


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  1.2min remaining:    0.0s


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  optimizer=sgd, nn=1024, lr=1e-05, l2=0, l1=0.003, dropout=0.5, activation_2=sigmoid, activation_1=relu, score=0.754, total=  13.5s
[CV] optimizer=sgd, nn=1024, lr=0.01, l2=0.01, l1=0, dropout=0.3, activation_2=relu, activation_1=tanh 


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:  1.4min remaining:    0.0s


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  optimizer=sgd, nn=1024, lr=0.01, l2=0.01, l1=0, dropout=0.3, activation_2=relu, activation_1=tanh, score=0.000, total=  13.7s
[CV] optimizer=sgd, nn=1024, lr=0.01, l2=0.01, l1=0, dropout=0.3, activation_2=relu, activation_1=tanh 


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:  1.6min remaining:    0.0s


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  optimizer=sgd, nn=1024, lr=0.01, l2=0.01, l1=0, dropout=0.3, activation_2=relu, activation_1=tanh, score=0.000, total=  13.6s
[CV] optimizer=sgd, nn=1024, lr=0.01, l2=0.01, l1=0, dropout=0.3, activation_2=relu, activation_1=tanh 


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:  1.9min remaining:    0.0s


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  optimizer=sgd, nn=1024, lr=0.01, l2=0.01, l1=0, dropout=0.3, activation_2=relu, activation_1=tanh, score=0.000, total=  13.7s
[CV] optimizer=sgd, nn=512, lr=0.0001, l2=0.0001, l1=0.0001, dropout=0.4, activation_2=tanh, activation_1=tanh 


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:  2.1min remaining:    0.0s


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  optimizer=sgd, nn=512, lr=0.0001, l2=0.0001, l1=0.0001, dropout=0.4, activation_2=tanh, activation_1=tanh, score=0.754, total=  13.9s
[CV] optimizer=sgd, nn=512, lr=0.0001, l2=0.0001, l1=0.0001, dropout=0.4, activation_2=tanh, activation_1=tanh 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  optimizer=sgd, nn=512, lr=0.0001, l2=0.0001, l1=0.0001, dropout=0.4, activation_2=tanh, activation_1=tanh, score=0.752, total=  13.9s
[CV] optimizer=sgd, nn=512, lr=0.0001, l2=0.0001, l1=0.0001, dropout=0.4, activation_2=tanh, activation_1=tanh 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  optimizer=sgd, nn=512, lr=0.0001, l2=0.0001, l1=0.0001, dropout=0.4, activation_2=tanh, activation_1=tanh, score=0.756, total=  13.9s
[CV] optimizer=adam, nn

In [13]:
# Print parameters of model with best results
print(grid_result.best_params_)

{'optimizer': 'adam', 'nn': 512, 'lr': 0.001, 'l2': 0.01, 'l1': 0, 'dropout': 0.5, 'activation_2': 'sigmoid', 'activation_1': 'relu'}


In [14]:
# Save best model .
best_model = grid_result.best_estimator_

### Testing model and prepare submission

In [16]:
# Prediction
test_predictions = best_model.predict(X_tst)



In [17]:
# create predictions data frame
sub_df = pd.DataFrame(test_predictions, columns=['has_cactus'])

# we predict a cactus if according to the model there is over 75% chance of there being a cactus
sub_df['has_cactus'] = sub_df['has_cactus'].apply(lambda x: 1 if x > 0.75 else 0)

In [18]:
# create id column and rearrange
sub_df['id'] = ''
cols = sub_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
sub_df=sub_df[cols]

In [19]:
sub_df

Unnamed: 0,id,has_cactus
0,,1
1,,1
2,,1
3,,1
4,,1
5,,1
6,,0
7,,1
8,,1
9,,0


In [20]:
for i, img in enumerate(Test_imgs):
    sub_df.set_value(i,'id',img)

  


In [21]:
sub_df.head()

Unnamed: 0,id,has_cactus
0,6a571b6df250e9575fb82f8904c325a1.jpg,1
1,2edb0bf826248b088d57e22799464c41.jpg,1
2,ea9422f63363a362ba6f482617006e76.jpg,1
3,1021509e308bf12f71a01cac2ddca97f.jpg,1
4,5eacbb413e5cd4e73cb7b1936758abf1.jpg,1


In [22]:
sub_df.to_csv('submission.csv',index=False)