In [None]:
import os
import random
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img

# Set constant
from datetime import datetime
NOW  = datetime.now().strftime("%m%d%Y_%H:%M%S")

# Prepare data

In [None]:
training_data = os.listdir('./data/training')


labels = []
for filename in training_data:
    label = filename.split('.')[0]
    if label == 'cat':
        labels.append(0)
    else:
        labels.append(1)

df = pd.DataFrame({
    'name': training_data,
    'labels': labels
})

In [None]:
testing_data = os.listdir('./data/testing')


test_df = pd.DataFrame({
    'name': testing_data,
})
number_of_test_sampless = test_df.shape[0]

### Change labels from int to string to fit requirements for ImageDataGenerator

In [None]:
df['labels'] = df['labels'].replace({0: 'cat', 1: 'dog'})


### Split training data into 75% training, and 25% validation

In [None]:
train_df, validate_df = train_test_split(df, test_size=0.20, random_state=42)

train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)

# Data Generators

## Training

In [None]:
train_datagenerator = ImageDataGenerator(
    rotation_range=10,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagenerator.flow_from_dataframe(
    train_df, 
    "./data/training/", 
    x_col='name',
    y_col='labels',
    target_size=(160, 160),
    class_mode='categorical',
    batch_size=16
)

## Validation

In [None]:
validation_datagenerator = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagenerator.flow_from_dataframe(
    validate_df, 
    "./data/training/", 
    x_col='name',
    y_col='labels',
    target_size=(160, 160),
    class_mode='categorical',
    batch_size=16
)

## Testing

In [None]:
testing_datagenerator = ImageDataGenerator(rescale=1./255)
testing_generator = testing_datagenerator.flow_from_dataframe(
    test_df, 
    "./data/testing/", 
    x_col='name',
    y_col=None,
    class_mode=None,
    batch_size=16,
    target_size=(160, 160),
    shuffle=False
)

## Visualize example of generated training data

In [None]:
sample_df = train_df.sample(n=1).reset_index(drop=True)
sample_generator = train_datagenerator.flow_from_dataframe(
    sample_df, 
    "./data/training/",
    x_col='name',
    y_col='labels',
    target_size=(160, 160),
    class_mode='categorical'
)

In [None]:
plt.figure(figsize=(12, 12))
for i in range(16):
    plt.subplot(4, 4, i+1)
    for X, Y in sample_generator:
        image = X[0]
        plt.imshow(image)
        plt.xticks([])
        plt.yticks([])
        break
plt.tight_layout()
plt.show()

In [None]:
total_train = train_df.shape[0]
total_validate = validate_df.shape[0]
batch_size=15

# Training: fit model on training data

In [None]:
TOTAL_TRAINING_SIZE = train_df.shape[0]
TOTAL_VALIDATION_SIZE = validate_df.shape[0]
BATCH_SIZE = 32
EPOCHS = 10

## Introduce early stopping and handle plateauing

In [None]:
early_stopping = EarlyStopping(patience=5, verbose=True)

reduce_learning_rate = ReduceLROnPlateau(monitor='val_accuracy',
                                        patience=2,
                                        verbose=True,
                                        factor=0.5,
                                        min_lr=1e-5)

# Using Pretrained MobileNetV2

In [None]:
# Configure GPU
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU') 
for physical_device in physical_devices: 
    tf.config.experimental.set_memory_growth(physical_device, True)

In [None]:
from models.pretrained_mobilenet import PretrainedMobileNetV2

mobilenetv2_model = PretrainedMobileNetV2((160, 160, 3)).model

mobilenetv2_model.summary()

In [None]:
mobilenetv2_model_history = mobilenetv2_model.fit(
    x=train_generator,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=TOTAL_VALIDATION_SIZE//BATCH_SIZE,
    steps_per_epoch=TOTAL_TRAINING_SIZE//BATCH_SIZE,
    callbacks=[early_stopping, reduce_learning_rate])

In [None]:
mobilenetv2_model.save_weights(f"pretrained_mobilenetv2_WEIGHTS_{NOW}.h5")
mobilenetv2_model.save(f"pretrained_mobilenetv2_{NOW}.h5")

In [None]:
# https://www.tensorflow.org/tutorials/images/classification#visualize_training_results
def tf_plot(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss=history.history['loss']
    val_loss=history.history['val_loss']

    epochs_range = range(EPOCHS)

    plt.figure(figsize=(16, 9))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()

In [None]:
tf_plot(mobilenetv2_model_history)

# Testing: make predictions with trained model

In [None]:
predictions = mobilenetv2_model.predict(testing_generator, steps=np.ceil(test_df.shape[0]/BATCH_SIZE))

In [None]:
predictions[0]

In [None]:
def plot_image(i, predictions_array, labels, img):
  predictions_array, img = predictions_array, img[i]
  predicted_label_index = np.argmax(predictions_array)
  predicted_label = labels[predicted_label_index]
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])
  plt.imshow(img)
  
  plt.xlabel(f"{predicted_label} ({100*np.max(predictions_array)})")
  plt.show()

In [None]:
test_labels = ['Dog', 'Cat']
test_images = []

for i in range(20):
        X = testing_generator[i]
        image = X[0]
        test_images.append(image)

In [None]:
num_rows = 5
num_cols = 3
num_images = num_rows * num_cols
for i in range(num_images):
  plot_image(i, predictions[i], test_labels, test_images)
