In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
tf.config.run_functions_eagerly(True)

from tensorflow.keras.utils import image_dataset_from_directory

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dropout, Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

# Initialize rng
rng = np.random.default_rng(2022)

auc = tf.keras.metrics.AUC()

In [None]:
# plot diagnostic learning curves
def summarize_diagnostics(history):
  fig, ax = plt.subplots(1,3, figsize=(30, 10))
  # plot loss
  ax[0].set_title('Loss Curves', fontsize=20)
  ax[0].plot(history.history['loss'], label='train')
  ax[0].plot(history.history['val_loss'], label='val')
  ax[0].set_xlabel('Epochs', fontsize=15)
  ax[0].set_ylabel('Loss', fontsize=15)
  ax[0].legend(fontsize=15)
  # plot AUC
  ax[1].set_title('Classification AUC', fontsize=20)
  ax[1].plot(history.history['auc_1'], label='train')
  ax[1].plot(history.history['val_auc_1'], label='val')
  ax[1].set_xlabel('Epochs', fontsize=15)
  ax[1].set_ylabel('AUROC', fontsize=15)
  ax[1].legend(fontsize=15)
  # plot accuracy
  ax[2].set_title('Classification accuracy', fontsize=20)
  ax[2].plot(history.history['accuracy'], label='train')
  ax[2].plot(history.history['val_accuracy'], label='val')
  ax[2].set_xlabel('Epochs', fontsize=15)
  ax[2].set_ylabel('Accuracy', fontsize=15)
  ax[2].legend(fontsize=15)

In [None]:
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
batch_size = 32 # This is a tunable hyperparameter
shape = (128, 128) # note we are reducing the size of the image
# Note: you will use 'grayscale' images for your own model
# but you might need to switch to 'rgb' for pretrained models because they are trained on ImageNet which has only RGB images
data_dir = '/content/drive/MyDrive/Final_Project/Dataset'
train_ds = tf.keras.utils.image_dataset_from_directory(os.path.join(data_dir, 'train/train'),
                                                       seed=rng.integers(500000),
                                                       image_size=shape,
                                                       label_mode="categorical",
                                                       color_mode='grayscale',
                                                       batch_size=batch_size)
train_ds_new = tf.keras.utils.image_dataset_from_directory(os.path.join(data_dir, 'train_new'),
                                                       seed=rng.integers(500000),
                                                       image_size=shape,
                                                       label_mode="categorical",
                                                       color_mode='grayscale',
                                                       batch_size=batch_size)
val_ds = tf.keras.utils.image_dataset_from_directory(os.path.join(data_dir, 'validation/validation'),
                                                     seed=rng.integers(500000),
                                                     image_size=shape,
                                                     label_mode="categorical",
                                                     color_mode='grayscale',
                                                     batch_size=batch_size)
test_ds = tf.keras.utils.image_dataset_from_directory(os.path.join(data_dir, 'test_new'),
                                                      seed=rng.integers(500000),
                                                      image_size=shape,
                                                      label_mode="categorical",
                                                      color_mode='grayscale',
                                                      batch_size=batch_size)



Found 20000 files belonging to 4 classes.




Found 16000 files belonging to 4 classes.
Found 4000 files belonging to 4 classes.
Found 4000 files belonging to 4 classes.


In [None]:
# for images, labels in train_ds.take(1):
#   for i in range(9):
#     ax = plt.subplot(3, 3, i + 1)
#     plt.imshow(images[i].numpy().astype("uint8"))
#     plt.title(train_ds.class_names[labels[i]])
#     plt.axis("off")

In [None]:
for x,y in train_ds.take(1):
  print(x.shape)
  print(y.shape)

(32, 128, 128, 1)
(32, 4)


In [None]:
#Example from HW2, just checking how it handles categorical data
# One sample CNN model
inputs = Input(shape=(128, 128, 1))
filters = 32

layer = inputs
n_layers = 2 # number of layers in a stack of conv layers for a given input size
for i in range(n_layers):
  layer = Conv2D(filters, kernel_size=(7,7), activation='relu', padding='same')(layer)
layer = MaxPooling2D((2, 2))(layer)
layer = Dropout(0.2)(layer)

for i in range(n_layers):
  layer = Conv2D(filters, kernel_size=(5,5), activation='relu', padding='same')(layer)
layer = MaxPooling2D((2, 2))(layer)
layer = Dropout(0.2)(layer)

for i in range(n_layers):
  layer = Conv2D(filters, kernel_size=(5,5), activation='relu', padding='same')(layer)
layer = MaxPooling2D((2, 2))(layer)


layer = Flatten()(layer)

layer = Dropout(0.2)(layer)

outputs = Dense(4, activation='softmax')(layer)

model = Model(inputs, outputs)

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 128, 1)]     0         
                                                                 
 conv2d (Conv2D)             (None, 128, 128, 32)      1600      
                                                                 
 conv2d_1 (Conv2D)           (None, 128, 128, 32)      50208     
                                                                 
 max_pooling2d (MaxPooling2  (None, 64, 64, 32)        0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 64, 64, 32)        0         
                                                                 
 conv2d_2 (Conv2D)           (None, 64, 64, 32)        25632     
                                                             

In [None]:
# early stopping
es = EarlyStopping(monitor='val_loss',
                   mode='min',
                   verbose=1,
                   patience=10,
                   restore_best_weights=True)

# define optimizer
opt = Adam(learning_rate=0.0005)

# Compile
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['acc', f1_m, precision_m, recall_m])
# model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy', auc])



In [None]:
history = model.fit(train_ds_new,
                    epochs=50,
                    batch_size=batch_size,
                    validation_data=val_ds,
                    callbacks=[es],
                    verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 25: early stopping


In [None]:
# Loss and accuracy curves
# summarize_diagnostics(history)

In [None]:
# Evaluate model, wont work as we have no test data.
# _, test_acc, test_auc = model.evaluate(test_ds, verbose=1)
loss, accuracy, f1_score, precision, recall = model.evaluate(test_ds, verbose=1)
# print('AUC:', test_auc)
# print('Accuracy:', test_acc)
print('loss:', loss)
print('Accuracy:', accuracy)
print('f1_score:', f1_score)
print('precision:', precision)
print('recall:', recall)

loss: 0.36208575963974
Accuracy: 0.8797500133514404
f1_score: 0.8790058493614197
precision: 0.8920711278915405
recall: 0.8667500019073486


In [None]:


#   # compile the model
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc',f1_m,precision_m, recall_m])

# # fit the model
# history = model.fit(Xtrain, ytrain, validation_split=0.3, epochs=10, verbose=0)

# # evaluate the model
# loss, accuracy, f1_score, precision, recall = model.evaluate(Xtest, ytest, verbose=0)