In [1]:
pip install split-folders

Note: you may need to restart the kernel to use updated packages.


In [22]:
import pandas as pd 
import splitfolders

from keras.preprocessing.image import ImageDataGenerator

from keras import models
from keras import layers 
from keras import optimizers
from sklearn.dummy import DummyClassifier
from sklearn.metrics import recall_score
from keras import metrics
from tensorflow.keras.wrappers import scikit_learn
import tensorflow as tf
from tensorflow.keras.layers import Dense
from sklearn.model_selection import GridSearchCV
import numpy as np

In [None]:
#splitfolders.ratio("Data", output="Data_Split",
#    seed=42, ratio=(.64, .16, .2), group_prefix=None, move=True)

### Pre-processing

In [3]:
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [4]:
train_generator = train_datagen.flow_from_directory(
        # This is the target directory
        'Data_Split/train',
        # All images will be resized to 150x150
        target_size=(150, 150),
        batch_size=3747,
        color_mode='grayscale',
        # Since we use binary_crossentropy loss, we need binary labels
        class_mode='binary')


Found 3747 images belonging to 2 classes.


In [5]:
validation_generator = val_datagen.flow_from_directory('Data_Split/val',
                                                        target_size=(150, 150),
                                                        batch_size=936,
                                                        color_mode='grayscale',
                                                        class_mode='binary')

Found 936 images belonging to 2 classes.


In [6]:
test_generator = test_datagen.flow_from_directory('Data_Split/test',
                                                  target_size=(150, 150),
                                                  batch_size=1173,
                                                  color_mode='grayscale',
                                                  class_mode='binary')

Found 1173 images belonging to 2 classes.


In [7]:
#Creating the augumented data

aug_train_images = ImageDataGenerator(rotation_range=30, 
                                   width_shift_range=0.25, 
                                   height_shift_range=0.25, 
                                   shear_range=0.25, 
                                   zoom_range=0.25, 
                                   horizontal_flip=True,
                                   vertical_flip=True)

train_aug = aug_train_images.flow_from_directory('Data_Split/train',
                                                  target_size=(150, 150),
                                                  batch_size=3747,
                                                  color_mode='grayscale',
                                                  class_mode='binary')

Found 3747 images belonging to 2 classes.


In [8]:
train_data, train_labels = next (train_generator)
test_data, test_labels = next (test_generator)
val_data, val_labels = next (validation_generator)

In [None]:
train_data = train_data.reshape(train_data.shape[0], -1)
test_data = test_data.reshape(test_data.shape[0], -1)
val_data = val_data.reshape(val_data.shape[0], -1)
train_data.shape

#### Dummy (baseline model)

In [31]:
dummy_model =  DummyClassifier(strategy='most_frequent')
dummy_model.fit(train_data, train_labels)

DummyClassifier(strategy='most_frequent')

In [32]:
y_preds_dummy = dummy_model.predict(val_data)

In [33]:
dummy_acc = dummy_model.score(val_data, val_labels)
dummy_rec = recall_score(val_labels,y_preds_dummy)
print(dummy_acc)
print(dummy_rec)

0.7297008547008547
1.0


In [35]:
train_data.shape

(3747, 150, 150, 1)

(3747, 22500)

#### Simple Model 1

In [46]:
simple_model = models.Sequential()

In [47]:
simple_model.add(layers.Dense(12, activation='relu', input_shape=(22500,)))
simple_model.add(layers.Dense(1, activation='sigmoid'))                       

In [48]:
simple_model.compile(optimizer='SGD',
                       loss='binary_crossentropy',
                       metrics=['accuracy', metrics.Precision(), metrics.Recall()])

In [49]:
simple_model_fit = simple_model.fit(train_data, train_labels, epochs=10, 
                                    batch_size=32, validation_data= (val_data, val_labels), verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


##### Simple Model GridSearch

In [69]:
def create_model():
    #create model
    model = models.Sequential()
    model.add(Dense(12, activation='relu', input_shape=(22500,)))
    model.add(Dense(1, activation='sigmoid'))
    #Compile model
    model.compile(optimizer='SGD', loss='binary_crossentropy',
                  metrics=['accuracy', metrics.Precision(), metrics.Recall()])
    return model
  

In [70]:
keras_model = scikit_learn.KerasClassifier(create_model, verbose=1)
batch_size = [10, 20]
epochs = [10, 25]
param_grid = dict(batch_size=batch_size, epochs=epochs)


In [None]:
seed = 42
tf.random.set_seed(seed)

In [71]:
grid = GridSearchCV(estimator= keras_model, param_grid=param_grid, cv=5)
grid_result = grid.fit(train_data, train_labels)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 1

Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch

Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25


Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 2

### Convolutional Neural Network

In [87]:
train_data, train_labels = next (train_generator)
test_data, test_labels = next (test_generator)
val_data, val_labels = next (validation_generator)

In [88]:
cnn_model = models.Sequential()
cnn_model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(150,150,1)))
cnn_model.add(layers.MaxPooling2D((2,2)))
cnn_model.add(layers.Conv2D(32, (3,3), activation='relu'))       
cnn_model.add(layers.MaxPooling2D((2,2)))
cnn_model.add(layers.Flatten())
cnn_model.add(Dense(12, activation='relu'))
cnn_model.add(Dense(1, activation='sigmoid'))              

In [89]:
cnn_model.compile(optimizer='SGD',
                       loss='binary_crossentropy',
                       metrics=['accuracy', metrics.Precision(), metrics.Recall()])

In [90]:
cnn_model_fit = cnn_model.fit(train_data, train_labels, epochs=10, 
                                    batch_size=32, validation_data= (val_data, val_labels), verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10

KeyboardInterrupt: 

In [80]:
def create_model():
    #create model
    cnn_model = models.Sequential()
    cnn_model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(150,150,1)))
    cnn_model.add(layers.MaxPooling2D((2,2)))
    cnn_model.add(layers.Conv2D(32, (3,3), activation='relu'))       
    cnn_model.add(layers.MaxPooling2D((2,2)))
    cnn_model.add(layers.Flatten())
    cnn_model.add(Dense(12, activation='relu'))
    cnn_model.add(Dense(1, activation='sigmoid')) 
    #Compile model
    model.compile(optimizer='SGD', loss='binary_crossentropy',
                  metrics=['accuracy', metrics.Precision(), metrics.Recall()])
    return model

In [81]:
cnn_model = scikit_learn.KerasClassifier(create_model, verbose=1)
batch_size = [10, 20]
epochs = [10, 25]
param_grid = dict(batch_size=batch_size, epochs=epochs)

In [None]:
grid = GridSearchCV(estimator= cnn_model, param_grid=param_grid, cv=5)
grid_result = grid.fit(train_data, train_labels)

In [9]:
cnn1_model = models.Sequential()
cnn1_model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(150,150,1)))
cnn1_model.add(layers.MaxPooling2D((2,2)))
cnn1_model.add(layers.Conv2D(32, (3,3), activation='relu'))       
cnn1_model.add(layers.MaxPooling2D((2,2)))
cnn1_model.add(layers.Flatten())
cnn1_model.add(Dense(12, activation='relu'))
cnn1_model.add(Dense(1, activation='sigmoid')) 
    #Compile model
cnn1_model.compile(optimizer='adam', loss='binary_crossentropy',
                  metrics=['accuracy', metrics.Precision(), metrics.Recall()])

cnn1_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 148, 148, 64)      640       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 74, 74, 64)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 72, 72, 32)        18464     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 36, 36, 32)       0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 41472)             0         
                                                                 
 dense (Dense)               (None, 12)                4

In [10]:
cnn1_model_fit = cnn1_model.fit(train_data, 
                                train_labels, 
                                epochs=10, 
                                batch_size=32, 
                                validation_data= (val_data, val_labels), verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
cnn2_model = models.Sequential()
cnn2_model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(150,150,1)))
cnn2_model.add(layers.MaxPooling2D((2,2)))
cnn2_model.add(layers.Conv2D(32, (3,3), activation='relu'))       
cnn2_model.add(layers.MaxPooling2D((2,2)))
cnn2_model.add(layers.Flatten())
cnn2_model.add(Dense(12, activation='relu'))
cnn2_model.add(Dense(1, activation='sigmoid')) 
    #Compile model
cnn2_model.compile(optimizer='rmsprop', loss='binary_crossentropy',
                  metrics=['accuracy', metrics.Precision(), metrics.Recall()])

cnn2_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 148, 148, 64)      640       
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 74, 74, 64)       0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 72, 72, 32)        18464     
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 36, 36, 32)       0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 41472)             0         
                                                                 
 dense_2 (Dense)             (None, 12)               

In [12]:
cnn2_model_fit = cnn2_model.fit(train_data, 
                                train_labels, 
                                epochs=10, 
                                batch_size=32, 
                                validation_data= (val_data, val_labels), verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
cnn3_model = models.Sequential()
cnn3_model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(150,150,1)))
cnn3_model.add(layers.MaxPooling2D((2,2)))
cnn3_model.add(layers.Conv2D(32, (3,3), activation='relu'))       
cnn3_model.add(layers.MaxPooling2D((2,2)))
cnn3_model.add(layers.Flatten())
cnn3_model.add(Dense(12, activation='relu'))
cnn3_model.add(Dense(1, activation='sigmoid')) 
    #Compile model
cnn3_model.compile(optimizer='adadelta', loss='binary_crossentropy',
                  metrics=['accuracy', metrics.Precision(), metrics.Recall()])

cnn3_model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 148, 148, 64)      640       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 74, 74, 64)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 72, 72, 32)        18464     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 36, 36, 32)       0         
 2D)                                                             
                                                                 
 flatten_2 (Flatten)         (None, 41472)             0         
                                                                 
 dense_4 (Dense)             (None, 12)               

In [14]:
cnn3_model_fit = cnn3_model.fit(train_data, 
                                train_labels, 
                                epochs=10, 
                                batch_size=32, 
                                validation_data= (val_data, val_labels), verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
cnn4_model = models.Sequential()
cnn4_model.add(layers.Conv2D(64, (3, 3), activation='relu', 
                            input_shape=(150,150,1)))
cnn4_model.add(layers.MaxPooling2D((2,2)))
cnn4_model.add(layers.Conv2D(32, (3,3), activation='relu'))       
cnn4_model.add(layers.MaxPooling2D((2,2)))
cnn4_model.add(layers.Flatten())
cnn4_model.add(Dense(16, activation='relu'))
cnn4_model.add(Dense(1, activation='sigmoid')) 

cnn4_model.compile(optimizer="adam",
                        loss='binary_crossentropy',
                        metrics=['accuracy', metrics.Precision(), metrics.Recall()])

cnn4_model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 148, 148, 64)      640       
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 74, 74, 64)       0         
 2D)                                                             
                                                                 
 conv2d_9 (Conv2D)           (None, 72, 72, 32)        18464     
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 36, 36, 32)       0         
 2D)                                                             
                                                                 
 flatten_4 (Flatten)         (None, 41472)             0         
                                                                 
 dense_8 (Dense)             (None, 16)               

In [19]:
cnn4_model_fit = cnn4_model.fit(train_data, 
                                train_labels, 
                                epochs=10, 
                                batch_size=32, 
                                validation_data= (val_data, val_labels), verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [20]:
cnn5_model = models.Sequential()
cnn5_model.add(layers.Conv2D(64, (3, 3), activation='relu', 
                            input_shape=(150,150,1)))
cnn5_model.add(layers.MaxPooling2D((2,2)))
cnn5_model.add(layers.Conv2D(32, (3,3), activation='relu'))       
cnn5_model.add(layers.MaxPooling2D((2,2)))
cnn5_model.add(layers.Flatten())
cnn5_model.add(Dense(16, activation='relu'))
cnn5_model.add(Dense(1, activation='sigmoid')) 

cnn5_model.compile(optimizer="adam",
                        loss='binary_crossentropy',
                        metrics=['accuracy', metrics.Precision(), metrics.Recall()])

cnn5_model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_10 (Conv2D)          (None, 148, 148, 64)      640       
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 74, 74, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 72, 72, 32)        18464     
                                                                 
 max_pooling2d_11 (MaxPoolin  (None, 36, 36, 32)       0         
 g2D)                                                            
                                                                 
 flatten_5 (Flatten)         (None, 41472)             0         
                                                                 
 dense_10 (Dense)            (None, 16)               

In [21]:
cnn5_model_fit = cnn5_model.fit(train_data, 
                                train_labels, 
                                epochs=25, 
                                batch_size=32, 
                                validation_data= (val_data, val_labels), verbose=1)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [None]:
cnn6_model = models.Sequential()
cnn6_model.add(layers.Conv2D(64, (3, 3), activation='relu', 
                            input_shape=(150,150,1)))
cnn6_model.add(layers.MaxPooling2D((2,2)))
cnn6_model.add(layers.Conv2D(32, (3,3), activation='relu'))       
cnn6_model.add(layers.MaxPooling2D((2,2)))
cnn6_model.add(layers.Flatten())
cnn6_model.add(Dense(16, activation='relu'))
cnn6_model.add(Dense(16, activation='relu'))
cnn6_model.add(Dense(4, activation='sigmoid')) 

cnn6_model.compile(optimizer="adam",
                        loss='binary_crossentropy',
                        metrics=['accuracy', metrics.Precision(), metrics.Recall()])

cnn6_model.summary()

cnn6_model_fit = cnn6_model.fit(train_data, 
                                train_labels, 
                                epochs=10, 
                                batch_size=32, 
                                validation_data= (val_data, val_labels), verbose=1)