## Random Grid Search Keras and Tensorflow model for Aerial Cactus Detection in Kaggle

Note : Not all the cells have been run since we did not want to run the grid search again. (It takes over 10 hours to run) However, except for the grid search code, the outputs will be exactly the same as the Final notebook. 

In [2]:
#Importing tensorflow and keras packages for NN

import tensorflow as tf
from keras.models import Sequential, Model, load_model
from keras.layers import Conv2D, Dense, Flatten, BatchNormalization, Dropout, LeakyReLU, Flatten
from keras import regularizers
from keras.optimizers import Adam
from keras.optimizers import SGD
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

#Importing the scikitlearn packages for ML metrics and grid search
from sklearn.metrics import confusion_matrix, roc_auc_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV,KFold
from sklearn.metrics import roc_auc_score


#Importing tqdm for progress bars
from tqdm import tqdm, tqdm_notebook

#Importing pandas and numpy for array manipulation and cv to read in the images as arrays
import numpy as np
import pandas as pd
import cv2 as cv

#For paths
import os

In [3]:
#Setting seed for reproducibility
seed = 12345
np.random.seed(seed)

In [None]:
#Navigating to directory with data
os.listdir('../input')

In [4]:
# Navigating to directory with data
data_dir = '/Users/netinupur/Desktop/machine-learning-project/aerial-cactus-identification'
os.listdir(data_dir)

['.DS_Store',
 'test',
 'weights-aerial-cactus.h5',
 'test.zip',
 'train',
 'train.csv',
 'train.zip',
 'sample_submission.csv']

In [12]:
#Reading in training labels
train_data = pd.read_csv('../input/train.csv')

In [5]:
# Reading in training labels
train_data = pd.read_csv(data_dir+'/train.csv')

In [6]:
training_path = data_dir+'/train/'
test_path = data_dir+ '/test/'

In [13]:
train_data.head()

Unnamed: 0,id,has_cactus
0,0004be2cfeaba1c0361d39e2b000257b.jpg,1
1,000c8a36845c0208e833c79c1bffedd1.jpg,1
2,000d1e9a533f62e55c289303b072733d.jpg,1
3,0011485b40695e9138e92d0b3fb55128.jpg,1
4,0014d7a11e90b62848904c1418fc8cf2.jpg,1


In [7]:
# Convert training images to numpy arrays 

images_train = []
labels_train = []

images = train_data['id'].values
for image_id in tqdm_notebook(images):     #<- tqdm_notebook adds a progress bar in jupyter notebook
    
    image = np.array(cv.imread(training_path + image_id))
    label = train_data[train_data['id'] == image_id]['has_cactus'].values[0]
    
    images_train.append(image)             #<- Add original image
    labels_train.append(label)
    
    # Data Augmentation
    images_train.append(np.flip(image))    #<- Add flipped up down and left-right image
    labels_train.append(label)
    
    images_train.append(np.flipud(image))  #<- Add flipped up down image
    labels_train.append(label)
    
    images_train.append(np.fliplr(image))  #<- Add flipped left right image
    labels_train.append(label)
    
    
images_train = np.asarray(images_train)    #<- Convert combined list to np array
images_train = images_train.astype('float32')
images_train /= 255.                       #<- Normalize          
  
labels_train = np.asarray(labels_train)     # Convert training images to numpy arrays 

HBox(children=(IntProgress(value=0, max=17500), HTML(value='')))




In [8]:
# Convert test images to numpy arrays 

test_images_names = []

for filename in os.listdir(test_path):
    test_images_names.append(filename)
    
test_images_names.sort()

images_test = []

for image_id in tqdm_notebook(test_images_names):
    images_test.append(np.array(cv.imread(test_path + image_id)))
    
images_test = np.asarray(images_test)
images_test = images_test.astype('float32')
images_test /= 255

HBox(children=(IntProgress(value=0, max=4000), HTML(value='')))




In [9]:
#define AUC as metric since Kaggle competition is evaluated on AUC
def auroc(y_true, y_pred):
    return tf.py_func(roc_auc_score, (y_true, y_pred), tf.double)

In [10]:
def create_model(optimizer='adam', filters_1=2,filters_2=2,filters_3=2,filters_4=2,kernel_1=1,kernel_2=1, kernel_3=1,kernel_4=1,
                 nn1=300, nn2=100, nn3 = 50, lr=0.01, l1=0.01, l2=0.01,
                activation = 'relu', dropout_1=0, dropout_2=0, dropout_3=0, dropout_4=0):  #Set default values for hyperparameters
    
    #Apply l1 L2 regularization to the NN layers
    reg = regularizers.l1_l2(l1=l1, l2=l2)
    
    model = Sequential()
    
    #Our input images are 32*32 pixels and have 3 color channels 
    model.add(Conv2D(filters = filters_1, kernel_size = 3, activation = activation, input_shape = (32, 32, 3)))
    
    model.add(Conv2D(filters = filters_2, kernel_size = 3, activation = activation))
    #Normalizing intermediate layers imrpoves training speed
    model.add(BatchNormalization())
    #Dropout reduces training accuracy but improves test and validation accuracy
    model.add(Dropout(dropout_1))
    
    model.add(Conv2D(filters = filters_3, kernel_size = 1, activation = activation))
    model.add(Conv2D(filters = filters_4, kernel_size = 1, activation = activation))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_2))
   
    #Output from convolutional layers converted to a flat array 
    model.add(Flatten())
    
    model.add(Dense(nn1, activation = activation,kernel_regularizer=reg))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_3))
    
    model.add(Dense(nn2, activation = activation,kernel_regularizer=reg))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_4))
    
    model.add(Dense(nn3, activation = 'tanh', kernel_regularizer=reg))
    
    #Output layer
    model.add(Dense(1, activation = 'sigmoid'))
    
    #Define optimizer 
    if optimizer =='sgd':
        optimizer = SGD(lr=lr)
    elif optimizer == 'adam':
        optimizer = Adam(lr=lr)
    elif optimizer == 'rmspop':
        optimizer = RMSprop(lr=lr)
        
    #Compile model
    #We use mean_squared_error instead of the more commonly used binary_crossentropy 
    #because it significantly improved the accuracy
    model.compile(optimizer = optimizer , loss= "mean_squared_error", metrics = ['accuracy',auroc])
    

    return model

In [11]:
# Define parameter grid values for random grid search.

# optimizer options
optimizer = ['rmsprop','sgd','adam']

# learning algorithm parameters
lr=[1e-2, 1e-3, 1e-4]


# activation
activation=['relu', 'sigmoid', 'tanh']

# neurons in each layer
nn1=[300,700,1400,1800,2100]
nn2=[100,400,800,1000]
nn3=[50,150,300,500]

# filters in each layer 
filters_1=[2,4,16,32,64]
filters_2=[2,4,16,32,64]
filters_3=[2,4,16,32,64]
filters_4=[2,4,16,32,64]

# dropout and regularisation
dropout_1 = [0, 0.1, 0.2, 0.3, 0.4 ,0.5]
dropout_2 = [0, 0.1, 0.2, 0.3, 0.4 ,0.5]
dropout_3 = [0, 0.1, 0.2, 0.3, 0.4 ,0.5]
dropout_4 = [0, 0.1, 0.2, 0.3, 0.4 ,0.5]


l1 = [0, 0.01, 0.003, 0.001,0.0001]
l2 = [0, 0.01, 0.003, 0.001,0.0001]

# dictionary summary
param_grid = dict(
                    nn1=nn1, nn2=nn2, nn3=nn3,filters_1=filters_1,filters_2=filters_2,filters_3=filters_3,filters_4=filters_4,
                    activation=activation, l1=l1, l2=l2, lr=lr, dropout_1=dropout_1, dropout_2=dropout_2, dropout_3=dropout_3, dropout_4=dropout_4, 
                  optimizer=optimizer
                 )

In [12]:
# Define where weights will be stored and callbacks for training. 
file_path = 'weights-aerial-cactus.h5'

callbacks = [
        ModelCheckpoint(file_path, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max'),
        #ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 3, verbose = 1, mode = 'min', min_lr = 0.00001),
        EarlyStopping(monitor = 'val_loss', min_delta = 1e-10, patience = 15, verbose = 1, restore_best_weights = True)
        ]


In [13]:
# model class to use in the scikit random search CV 
# Grid search on all 80 epochs on entire data 
model = KerasClassifier(build_fn=create_model, epochs=80, batch_size=128, verbose=1)

In [14]:
# Define grid search
# We use 3-fold cross validation and will be using the entire dataset for the gridsearch
grid = RandomizedSearchCV(estimator=model, cv=KFold(3), param_distributions=param_grid, 
                          verbose=10,  n_iter=10, n_jobs=1)

In [60]:
# Run the grid search on all 80 epochs 
grid_result_2 = grid.fit(images_train, labels_train,callbacks = callbacks )

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] optimizer=rmsprop, nn3=300, nn2=400, nn1=300, lr=0.001, l2=0.01, l1=0.01, filters_4=4, filters_3=4, filters_2=4, filters_1=16, dropout_4=0.2, dropout_3=0.2, dropout_2=0.2, dropout_1=0.2, activation=tanh 
Epoch 1/1




[CV]  optimizer=rmsprop, nn3=300, nn2=400, nn1=300, lr=0.001, l2=0.01, l1=0.01, filters_4=4, filters_3=4, filters_2=4, filters_1=16, dropout_4=0.2, dropout_3=0.2, dropout_2=0.2, dropout_1=0.2, activation=tanh, score=0.7456929802107833, total= 3.4min
[CV] optimizer=rmsprop, nn3=300, nn2=400, nn1=300, lr=0.001, l2=0.01, l1=0.01, filters_4=4, filters_3=4, filters_2=4, filters_1=16, dropout_4=0.2, dropout_3=0.2, dropout_2=0.2, dropout_1=0.2, activation=tanh 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  3.8min remaining:    0.0s


Epoch 1/1
[CV]  optimizer=rmsprop, nn3=300, nn2=400, nn1=300, lr=0.001, l2=0.01, l1=0.01, filters_4=4, filters_3=4, filters_2=4, filters_1=16, dropout_4=0.2, dropout_3=0.2, dropout_2=0.2, dropout_1=0.2, activation=tanh, score=0.7506535807296763, total= 1.9min
[CV] optimizer=rmsprop, nn3=300, nn2=400, nn1=300, lr=0.001, l2=0.01, l1=0.01, filters_4=4, filters_3=4, filters_2=4, filters_1=16, dropout_4=0.2, dropout_3=0.2, dropout_2=0.2, dropout_1=0.2, activation=tanh 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  6.1min remaining:    0.0s


Epoch 1/1
[CV]  optimizer=rmsprop, nn3=300, nn2=400, nn1=300, lr=0.001, l2=0.01, l1=0.01, filters_4=4, filters_3=4, filters_2=4, filters_1=16, dropout_4=0.2, dropout_3=0.2, dropout_2=0.2, dropout_1=0.2, activation=tanh, score=0.75553936487106, total= 1.8min
[CV] optimizer=rmsprop, nn3=50, nn2=400, nn1=2100, lr=0.01, l2=0.0001, l1=0.003, filters_4=4, filters_3=64, filters_2=2, filters_1=2, dropout_4=0.3, dropout_3=0.3, dropout_2=0.1, dropout_1=0.2, activation=sigmoid 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  8.3min remaining:    0.0s


Epoch 1/1
[CV]  optimizer=rmsprop, nn3=50, nn2=400, nn1=2100, lr=0.01, l2=0.0001, l1=0.003, filters_4=4, filters_3=64, filters_2=2, filters_1=2, dropout_4=0.3, dropout_3=0.3, dropout_2=0.1, dropout_1=0.2, activation=sigmoid, score=0.7456929802107833, total= 4.0min
[CV] optimizer=rmsprop, nn3=50, nn2=400, nn1=2100, lr=0.01, l2=0.0001, l1=0.003, filters_4=4, filters_3=64, filters_2=2, filters_1=2, dropout_4=0.3, dropout_3=0.3, dropout_2=0.1, dropout_1=0.2, activation=sigmoid 


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 12.8min remaining:    0.0s


Epoch 1/1
[CV]  optimizer=rmsprop, nn3=50, nn2=400, nn1=2100, lr=0.01, l2=0.0001, l1=0.003, filters_4=4, filters_3=64, filters_2=2, filters_1=2, dropout_4=0.3, dropout_3=0.3, dropout_2=0.1, dropout_1=0.2, activation=sigmoid, score=0.24934641922306514, total= 3.5min
[CV] optimizer=rmsprop, nn3=50, nn2=400, nn1=2100, lr=0.01, l2=0.0001, l1=0.003, filters_4=4, filters_3=64, filters_2=2, filters_1=2, dropout_4=0.3, dropout_3=0.3, dropout_2=0.1, dropout_1=0.2, activation=sigmoid 


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 16.7min remaining:    0.0s


Epoch 1/1
[CV]  optimizer=rmsprop, nn3=50, nn2=400, nn1=2100, lr=0.01, l2=0.0001, l1=0.003, filters_4=4, filters_3=64, filters_2=2, filters_1=2, dropout_4=0.3, dropout_3=0.3, dropout_2=0.1, dropout_1=0.2, activation=sigmoid, score=0.75553936487106, total= 3.1min
[CV] optimizer=adam, nn3=50, nn2=800, nn1=2100, lr=0.01, l2=0.001, l1=0.003, filters_4=64, filters_3=2, filters_2=32, filters_1=2, dropout_4=0, dropout_3=0.2, dropout_2=0.1, dropout_1=0.3, activation=sigmoid 


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed: 20.4min remaining:    0.0s


Epoch 1/1
[CV]  optimizer=adam, nn3=50, nn2=800, nn1=2100, lr=0.01, l2=0.001, l1=0.003, filters_4=64, filters_3=2, filters_2=32, filters_1=2, dropout_4=0, dropout_3=0.2, dropout_2=0.1, dropout_1=0.3, activation=sigmoid, score=0.7456929802107833, total=73.2min
[CV] optimizer=adam, nn3=50, nn2=800, nn1=2100, lr=0.01, l2=0.001, l1=0.003, filters_4=64, filters_3=2, filters_2=32, filters_1=2, dropout_4=0, dropout_3=0.2, dropout_2=0.1, dropout_1=0.3, activation=sigmoid 


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed: 134.8min remaining:    0.0s


Epoch 1/1
[CV]  optimizer=adam, nn3=50, nn2=800, nn1=2100, lr=0.01, l2=0.001, l1=0.003, filters_4=64, filters_3=2, filters_2=32, filters_1=2, dropout_4=0, dropout_3=0.2, dropout_2=0.1, dropout_1=0.3, activation=sigmoid, score=0.7506535807296763, total=114.5min
[CV] optimizer=adam, nn3=50, nn2=800, nn1=2100, lr=0.01, l2=0.001, l1=0.003, filters_4=64, filters_3=2, filters_2=32, filters_1=2, dropout_4=0, dropout_3=0.2, dropout_2=0.1, dropout_1=0.3, activation=sigmoid 


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed: 254.5min remaining:    0.0s


Epoch 1/1
[CV]  optimizer=adam, nn3=50, nn2=800, nn1=2100, lr=0.01, l2=0.001, l1=0.003, filters_4=64, filters_3=2, filters_2=32, filters_1=2, dropout_4=0, dropout_3=0.2, dropout_2=0.1, dropout_1=0.3, activation=sigmoid, score=0.75553936487106, total=70.2min
[CV] optimizer=adam, nn3=300, nn2=400, nn1=1400, lr=0.001, l2=0.0001, l1=0.01, filters_4=16, filters_3=16, filters_2=16, filters_1=2, dropout_4=0.3, dropout_3=0.1, dropout_2=0.1, dropout_1=0.4, activation=relu 


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed: 329.5min remaining:    0.0s


Epoch 1/1
[CV]  optimizer=adam, nn3=300, nn2=400, nn1=1400, lr=0.001, l2=0.0001, l1=0.01, filters_4=16, filters_3=16, filters_2=16, filters_1=2, dropout_4=0.3, dropout_3=0.1, dropout_2=0.1, dropout_1=0.4, activation=relu, score=0.7323647895416788, total= 5.3min
[CV] optimizer=adam, nn3=300, nn2=400, nn1=1400, lr=0.001, l2=0.0001, l1=0.01, filters_4=16, filters_3=16, filters_2=16, filters_1=2, dropout_4=0.3, dropout_3=0.1, dropout_2=0.1, dropout_1=0.4, activation=relu 
Epoch 1/1
[CV]  optimizer=adam, nn3=300, nn2=400, nn1=1400, lr=0.001, l2=0.0001, l1=0.01, filters_4=16, filters_3=16, filters_2=16, filters_1=2, dropout_4=0.3, dropout_3=0.1, dropout_2=0.1, dropout_1=0.4, activation=relu, score=0.7506535807296763, total= 6.8min
[CV] optimizer=adam, nn3=300, nn2=400, nn1=1400, lr=0.001, l2=0.0001, l1=0.01, filters_4=16, filters_3=16, filters_2=16, filters_1=2, dropout_4=0.3, dropout_3=0.1, dropout_2=0.1, dropout_1=0.4, activation=relu 
Epoch 1/1
[CV]  optimizer=adam, nn3=300, nn2=400, nn1=

Epoch 1/1
[CV]  optimizer=adam, nn3=150, nn2=100, nn1=2100, lr=0.001, l2=0.001, l1=0.003, filters_4=32, filters_3=32, filters_2=32, filters_1=16, dropout_4=0.2, dropout_3=0.1, dropout_2=0.2, dropout_1=0, activation=tanh, score=0.7467215222524393, total=19.3min
[CV] optimizer=adam, nn3=150, nn2=100, nn1=2100, lr=0.001, l2=0.001, l1=0.003, filters_4=32, filters_3=32, filters_2=32, filters_1=16, dropout_4=0.2, dropout_3=0.1, dropout_2=0.2, dropout_1=0, activation=tanh 
Epoch 1/1
[CV]  optimizer=adam, nn3=150, nn2=100, nn1=2100, lr=0.001, l2=0.001, l1=0.003, filters_4=32, filters_3=32, filters_2=32, filters_1=16, dropout_4=0.2, dropout_3=0.1, dropout_2=0.2, dropout_1=0, activation=tanh, score=0.7612823039971515, total=20.5min
[CV] optimizer=adam, nn3=150, nn2=100, nn1=2100, lr=0.001, l2=0.001, l1=0.003, filters_4=32, filters_3=32, filters_2=32, filters_1=16, dropout_4=0.2, dropout_3=0.1, dropout_2=0.2, dropout_1=0, activation=tanh 
Epoch 1/1
[CV]  optimizer=adam, nn3=150, nn2=100, nn1=2100

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed: 812.8min finished


Epoch 1/1


In [None]:
cv_results_df = pd.DataFrame(grid_result_2.cv_results_)

In [None]:
cv_results_df.head()

In [63]:
# Print parameters of model with best results
print(grid_result.best_params_)

{'optimizer': 'rmsprop', 'nn3': 150, 'nn2': 100, 'nn1': 300, 'lr': 0.01, 'l2': 0.0001, 'l1': 0, 'filters_4': 32, 'filters_3': 4, 'filters_2': 2, 'filters_1': 16, 'dropout_4': 0.2, 'dropout_3': 0.2, 'dropout_2': 0, 'dropout_1': 0.4, 'activation': 'relu'}


In [None]:
# Save best model .
best_model = grid_result.best_estimator_

In [None]:
# Predict on test data 
predictions = best_model.predict(images_test, verbose = 1)

In [22]:
predictions

array([[9.9924803e-01],
       [9.9927998e-01],
       [8.6081028e-04],
       ...,
       [9.9926019e-01],
       [9.9927843e-01],
       [9.9926466e-01]], dtype=float32)

In [None]:
# Generate submission file 

test_df = pd.read_csv('../input/sample_submission.csv')
X_test = []
images_test = test_df['id'].values

for img_id in tqdm_notebook(images_test):
    X_test.append(cv.imread(test_path + img_id))
    
X_test = np.asarray(X_test)
X_test = X_test.astype('float32')
X_test /= 255

y_test_pred = best_model.predict_proba(X_test)

test_df['has_cactus'] = y_test_pred
test_df.to_csv('aerial-cactus-submission.csv', index = False)