In [None]:
import math
from dataclasses import dataclass

import graphviz
import matplotlib.pyplot as plt
import numpy as np
import pydot
import tensorflow as tf
from IPython.display import SVG
from keras import models
from keras.layers import (Activation, Add, AveragePooling2D,
                          BatchNormalization, Concatenate, Conv2D, Dense,
                          Flatten, GlobalAveragePooling2D, Input, MaxPooling2D,
                          Softmax)
from keras.utils import image_dataset_from_directory
from sklearn.metrics import f1_score, confusion_matrix, ConfusionMatrixDisplay
from tensorflow.keras.utils import model_to_dot
from tqdm import tqdm

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
tf.config.experimental.set_memory_growth(tf.config.list_physical_devices('GPU')[0], True)

# Part 1

In [None]:
def f(x, y):
    return x**2 + y**2 + x*(y + 2) + math.cos(3*x)

def dfdx(x, y):
    return 2*x + y + 2 - 3*math.sin(3*x)

def dfdy(x, y):
    return 2*y + x

In [None]:
@dataclass
class GDPoint():
    x: int
    y: int
    z: int

In [None]:
def gradient_descent(x, y, lr, steps=20):
    data_points = []
    for _ in range(steps):
        z = f(x, y)
        data_points.append(GDPoint(x, y, z))
        dx = dfdx(x, y)
        dy = dfdy(x, y)
        x -= dx * lr
        y -= dy * lr
    return x, y, data_points

In [None]:
results =  []
for x_init in range(-10, 10):
    for y_init in range(-10, 10):
        x_final, y_final, values = gradient_descent(x_init, y_init, 0.1, 100)
        results.append(values)

In [None]:
print(f'{x_final=}, {y_final=}, {values[-1]=}')
fig = plt.figure()
ax = fig.add_subplot()
for result in results:
    zs = [d.z for d in result[95:]]
    ax.plot(zs)
ax.set_title("Gradient descent with all integer start values $x, y \in [-10, 10)$\n for iteration 95 to 100")
plt.ylabel("Function value")
plt.xlabel("Iteration")
plt.xticks(ticks=range(5), labels=range(96, 101))


# Part 2

In [None]:
def getVGG19():
    L2 = 0.0005
    inputs = Input(shape=(32, 32, 3))
    x = Conv2D(64, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2), input_shape=(32, 32, 3))(inputs)
    x = BatchNormalization()(x)
    x = Conv2D(64, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    
    x = MaxPooling2D(pool_size=2, strides=2, padding='same')(x)
    
    x = Conv2D(128, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(128, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    
    x = MaxPooling2D(pool_size=2, strides=2, padding='same')(x)
    
    x = Conv2D(256, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(256, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(256, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(256, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    
    x = MaxPooling2D(pool_size=2, strides=2, padding='same')(x)
    
    x = Conv2D(512, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(512, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(512, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(512, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    
    x = MaxPooling2D(pool_size=2, strides=2, padding='same')(x)
    
    x = Conv2D(512, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(512, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(512, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(512, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    
    x = MaxPooling2D(pool_size=2, strides=2, padding='same')(x)
    
    x = Flatten()(x)
    x = Dense(4096, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Dense(4096, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Dense(4096, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Dense(10, activation='softmax', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)

    model = models.Model(
        inputs=inputs,
        outputs=x
    )
    model.compile(optimizer=tf.keras.optimizers.Adam(amsgrad=True), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
#  https://towardsdatascience.com/understand-and-implement-resnet-50-with-tensorflow-2-0-1190b9b52691
def resnetIdentity(x, filters, L2):
    x_skip = x
    f1, f2 = filters
    x = Conv2D(f1, kernel_size=1, padding='valid', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(f1, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(f2, kernel_size=1, padding='valid')(x)
    x = Add()([x, x_skip])
    x = Activation(tf.keras.activations.relu)(x)
    x = BatchNormalization()(x)
    return x

def resnetConv(x, s, filters, L2):
    x_skip = x
    f1, f2 = filters
    x = Conv2D(f1, kernel_size=1, strides=s, padding='valid', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(f1, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(f2, kernel_size=1, padding='valid', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x_skip = Conv2D(f2, kernel_size=1, strides=s, padding='valid', kernel_regularizer=tf.keras.regularizers.l2(L2))(x_skip)
    x = Add()([x, x_skip])
    x = Activation(tf.keras.activations.relu)(x)
    x = BatchNormalization()(x)
    return x

def getResnet50():
    L2 = 0.001
    inp = Input(shape=(32, 32, 3))
    x = Conv2D(64, kernel_size=7, strides=2, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(inp)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=3, strides=2)(x)
    x = resnetConv(x, s=1, filters=(64,256), L2=L2)
    x = resnetIdentity(x, filters=(64,256), L2=L2)
    x = resnetIdentity(x, filters=(64,256), L2=L2)
    
    x = resnetConv(x, s=2, filters=(128, 512), L2=L2)
    x = resnetIdentity(x, filters=(128, 512), L2=L2)
    x = resnetIdentity(x, filters=(128, 512), L2=L2)
    x = resnetIdentity(x, filters=(128, 512), L2=L2)
    
    x = resnetConv(x, s=2, filters=(256, 1024), L2=L2)
    x = resnetIdentity(x, filters=(256, 1024), L2=L2)
    x = resnetIdentity(x, filters=(256, 1024), L2=L2)
    x = resnetIdentity(x, filters=(256, 1024), L2=L2)
    x = resnetIdentity(x, filters=(256, 1024), L2=L2)
    x = resnetIdentity(x, filters=(256, 1024), L2=L2)
    
    x = resnetConv(x, s=2, filters=(512, 2048), L2=L2)
    x = resnetIdentity(x, filters=(512, 2048), L2=L2)
    x = resnetIdentity(x, filters=(512, 2048), L2=L2)
    
    x = AveragePooling2D(pool_size=2, padding='same')(x)
    x = Flatten()(x)
    x = Dense(10, activation='softmax')(x)
    
    model = models.Model(
        inputs=inp,
        outputs=x
    )
    model.compile(optimizer=tf.keras.optimizers.Adam(amsgrad=True), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
#  https://towardsdatascience.com/creating-densenet-121-with-tensorflow-edbc08a956d8
def denseBlock(x, repetitions):
    for _ in range(repetitions):
        y = Conv2D(128, kernel_size=1, strides=1, padding='same', activation='relu')(x)
        y = BatchNormalization()(y)
        y = Conv2D(32, kernel_size=3, strides=1, padding='same', activation='relu')(y)
        y = BatchNormalization()(y)
        x = Concatenate()((y, x))
    return x

def getDenseNet():
    inp = Input(shape=(32, 32, 3))
    x = Conv2D(64, kernel_size=7, strides=2, padding='same', activation='relu')(inp)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=3, strides=2, padding='same')(x)
    
    x = denseBlock(x, 6)
    
    x = Conv2D(128, kernel_size=1, strides=1, padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = AveragePooling2D(pool_size=2, strides=2, padding='same')(x)
    
    x = denseBlock(x, 12)
    
    x = Conv2D(256, kernel_size=1, strides=1, padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = AveragePooling2D(pool_size=2, strides=2, padding='same')(x)
    
    x = denseBlock(x, 24)
    
    x = Conv2D(512, kernel_size=1, strides=1, padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = AveragePooling2D(pool_size=2, strides=2, padding='same')(x)
    
    x = denseBlock(x, 16)
    
    x = GlobalAveragePooling2D()(x)
    out = Dense(10, activation='softmax')(x)
    
    model = models.Model(
        inputs=inp,
        outputs=out
    )
    model.compile(optimizer=tf.keras.optimizers.Adam(amsgrad=True), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
def mlpconvLayer(x, L2):
    x = Conv2D(192, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(160, kernel_size=1, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(96, kernel_size=1, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    return x

def getNiN():
    L2 = 0.0001
    inp = Input(shape=(32, 32, 3))
    x = Conv2D(128, kernel_size=3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2), input_shape=(32, 32, 3))(inp)
    x = BatchNormalization()(x)
    
    x = mlpconvLayer(x, L2)
    x = MaxPooling2D(pool_size=3, strides=2, padding='same')(x)
    x = mlpconvLayer(x, L2)
    x = MaxPooling2D(pool_size=3, strides=2, padding='same')(x)
    x = mlpconvLayer(x, L2)
    
    x = Conv2D(10, kernel_size=1, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(L2))(x)
    x = BatchNormalization()(x)
    x = GlobalAveragePooling2D()(x)
    x = Flatten()(x)
    x = Softmax()(x)
    
    model = models.Model(
        inputs=inp,
        outputs=x
    )
    return model

In [None]:
vgg19 = getVGG19()
vgg19.summary()

In [None]:
resnet50 = getResnet50()
resnet50.summary()

In [None]:
denseNet = getDenseNet()
denseNet.summary()

In [None]:
NiN = getNiN()
NiN.summary()

## Plot models

In [None]:
def plotModel(model):
    return SVG(model_to_dot(
        model, show_shapes=True, show_layer_activations=True, rankdir='TB',
        expand_nested=False, dpi=60, subgraph=False
    ).create(prog='dot',format='svg'))

In [None]:
plotModel(vgg19)

In [None]:
plotModel(resnet50)

In [None]:
plotModel(denseNet)

In [None]:
plotModel(NiN)

## Load data

In [None]:
training, validation = image_dataset_from_directory('250000_Final',
                                      label_mode='categorical',
                                      image_size=(32, 32),
                                      validation_split=0.2,
                                      shuffle=True,
                                      seed=42,
                                      batch_size=128,
                                      subset='both')



In [None]:
training.class_names

In [None]:
plt.figure(figsize=(10, 10))
for images, labels in training.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(np.argmax((labels[i])))
    plt.axis("off")

In [None]:
def process(images,labels):
    images = images/255.
    return images,labels

training = training.map(process, num_parallel_calls=tf.data.AUTOTUNE)
validation = validation.map(process, num_parallel_calls=tf.data.AUTOTUNE)

AUTOTUNE = tf.data.AUTOTUNE

training = training.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
validation = validation.cache().prefetch(buffer_size=AUTOTUNE)

## Train models

In [None]:
checkpoint_path = "models/vgg19/cp-{epoch:02d}.ckpt"
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 verbose=1)
vgg19_history = vgg19.fit(training, epochs=10, batch_size=128, validation_data=validation, verbose=1, callbacks=[cp_callback])

In [None]:
checkpoint_path = "models/resnet50/cp-{epoch:02d}.ckpt"
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 verbose=1)
resnet50_history = resnet50.fit(training, epochs=10, batch_size=128, validation_data=validation, verbose=1, callbacks=[cp_callback])

In [None]:
checkpoint_path = "models/densenet/cp-{epoch:02d}.ckpt"
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 verbose=1)
densenet_history = denseNet.fit(training, epochs=10, batch_size=128, validation_data=validation, verbose=1, callbacks=[cp_callback])

In [None]:
checkpoint_path = "models/NiN/cp-{epoch:02d}.ckpt"
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 verbose=1)
nin_history = NiN.fit(training, epochs=10, batch_size=128, validation_data=validation, verbose=1, callbacks=[cp_callback])

## Plot training history

In [None]:
plt.plot(vgg19_history.history['loss'], label='Training loss')
plt.plot(vgg19_history.history['val_loss'], label='Validation loss')
plt.legend()
plt.title('Training and validation loss for VGG19')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.show()
plt.plot(vgg19_history.history['accuracy'], label='Training accuracy')
plt.plot(vgg19_history.history['val_accuracy'], label='Validation accuracy')
plt.legend()
plt.title('Training and validation accuracy for VGG19')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')


In [None]:
plt.plot(resnet50_history.history['loss'], label='Training loss')
plt.plot(resnet50_history.history['val_loss'], label='Validation loss')
plt.legend()
plt.title('Training and validation loss for ResNet50')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.show()
plt.plot(resnet50_history.history['accuracy'], label='Training accuracy')
plt.plot(resnet50_history.history['val_accuracy'], label='Validation accuracy')
plt.legend()
plt.title('Training and validation accuracy for ResNet50')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')


In [None]:
plt.plot(densenet_history.history['loss'], label='Training loss')
plt.plot(densenet_history.history['val_loss'], label='Validation loss')
plt.legend()
plt.title('Training and validation loss for DenseNet121')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.show()
plt.plot(densenet_history.history['accuracy'], label='Training accuracy')
plt.plot(densenet_history.history['val_accuracy'], label='Validation accuracy')
plt.legend()
plt.title('Training and validation accuracy for DenseNet121')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')


In [None]:
plt.plot(nin_history.history['loss'], label='Training loss')
plt.plot(nin_history.history['val_loss'], label='Validation loss')
plt.legend()
plt.title('Training and validation loss for NiN')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.show()
plt.plot(nin_history.history['accuracy'], label='Training accuracy')
plt.plot(nin_history.history['val_accuracy'], label='Validation accuracy')
plt.legend()
plt.title('Training and validation accuracy for NiN')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.yticks([x/10 for x in range(2, 11)])


## Get F1-score and confusion matrix

In [None]:
vgg19_predictions = vgg19.predict(validation)
resnet50_predictions = resnet50.predict(validation)
densenet_predictions = denseNet.predict(validation)
NiN_predictions = NiN.predict(validation)
vgg19_predictions = list(np.argmax(vgg19_predictions, axis=1))
resnet50_predictions = list(np.argmax(resnet50_predictions, axis=1))
densenet_predictions = list(np.argmax(densenet_predictions, axis=1))
NiN_predictions = list(np.argmax(NiN_predictions, axis=1))

In [None]:
validation_ds = list(validation)
lables = []
for batch in validation_ds:
    lables += list(np.argmax(batch[1], axis=1))

In [None]:
vgg19_f1 = f1_score(y_true=lables, y_pred=vgg19_predictions, average='micro')
print(f'{vgg19_f1*100:.2f}%')
vgg19_conf_mat = confusion_matrix(lables, vgg19_predictions)
vgg19_conf_disp = ConfusionMatrixDisplay(vgg19_conf_mat)
vgg19_conf_disp.plot()

In [None]:
resnet50_f1 = f1_score(y_true=lables, y_pred=resnet50_predictions, average='micro')
print(f'{resnet50_f1*100:.2f}%')
resnet50_conf_mat = confusion_matrix(lables, resnet50_predictions)
resnet50_conf_disp = ConfusionMatrixDisplay(resnet50_conf_mat)
resnet50_conf_disp.plot()

In [None]:
densenet_f1 = f1_score(y_true=lables, y_pred=densenet_predictions, average='micro')
print(f'{densenet_f1*100:.2f}%')
densenet_conf_mat = confusion_matrix(lables, densenet_predictions)
densenet_conf_disp = ConfusionMatrixDisplay(densenet_conf_mat)
densenet_conf_disp.plot()

In [None]:
NiN_f1 = f1_score(y_true=lables, y_pred=NiN_predictions, average='micro')
print(f'{NiN_f1*100:.2f}%')
NiN_conf_mat = confusion_matrix(lables, NiN_predictions)
NiN_conf_disp = ConfusionMatrixDisplay(NiN_conf_mat)
NiN_conf_disp.plot()