### To DO:
- Hyperparameters for optimizers
- Possibly test all parameters at the same time
- Apply best to CIFAR

In [12]:
#! pip install keras
#! pip install tensorflow
#! pip install torch
! pip install tensorflow[and-cuda]

Defaulting to user installation because normal site-packages is not writeable
Collecting tensorflow[and-cuda]
  Using cached tensorflow-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (489.8 MB)
Installing collected packages: tensorflow
[0mSuccessfully installed tensorflow-2.14.0


In [13]:
import keras as keras
from __future__ import print_function
from keras.datasets import mnist
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import tensorflow as tf
import numpy as np
import pandas as pd


# initializers
from keras.initializers import Zeros  
from keras.initializers import RandomNormal, RandomUniform  
from keras.initializers import glorot_normal, glorot_uniform 
from keras.initializers import he_normal, he_uniform  
from keras.initializers import lecun_normal, lecun_uniform  


# Task 1.1 

In [14]:
# mnist_mlp.py
batch_size = 128
num_classes = 10
epochs = 20

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 512)               401920    
                                                                 
 dropout_2 (Dropout)         (None, 512)               0         
                                                                 
 dense_4 (Dense)             (None, 512)               262656    
                                                                 
 dropout_3 (Dropout)         (None, 512)               0         
                                                                 
 dense_5 (Dense)             (None, 10)                5130      
                                                                 
Total params: 669706 (2.55 MB)
Trainable params: 669706 (2.55 MB)
Non-trainable params: 0 (0.00 Byte)
_____________________________________________

In [15]:
# mnist_cnn.py
batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.8063518404960632
Test accuracy: 0.8277999758720398


# Task  1.2

In [16]:
# mnist_mlp.py + different initilization methods
batch_size = 128
num_classes = 10
epochs = 20

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
seed_value = 42
results = pd.DataFrame(columns=['Initialization Method', 'Test Loss', 'Test Accuracy'])
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

initilization_methods = [Zeros(), 
                       RandomNormal(seed=seed_value), 
                       RandomUniform(seed=seed_value), 
                       glorot_uniform(seed=seed_value), 
                       glorot_normal(seed=seed_value), 
                       he_normal(seed=seed_value), 
                       he_uniform(seed=seed_value), 
                       lecun_normal(seed=seed_value), 
                       lecun_uniform(seed=seed_value)]

for method in initilization_methods:
    model = Sequential()
    model.add(Dense(512, activation='relu', input_shape=(784,), kernel_initializer=method))
    model.add(Dropout(0.2))
    model.add(Dense(512, activation='relu', kernel_initializer=method))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax', kernel_initializer=method))

    # Extract name of method
    method_name = method.__class__.__name__
    
    model.summary()

    
    model.compile(loss='categorical_crossentropy',
                  optimizer=RMSprop(),
                  metrics=['accuracy'])

    
    history = model.fit(x_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_test, y_test))

    
    score = model.evaluate(x_test, y_test, verbose=0)
    results = results.append({'Initialization Method': method_name, 'Test Loss': score[0], 'Test Accuracy': score[1]}, ignore_index=True)
    
    print(results)

60000 train samples
10000 test samples
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 512)               401920    
                                                                 
 dropout_6 (Dropout)         (None, 512)               0         
                                                                 
 dense_9 (Dense)             (None, 512)               262656    
                                                                 
 dropout_7 (Dropout)         (None, 512)               0         
                                                                 
 dense_10 (Dense)            (None, 10)                5130      
                                                                 
Total params: 669706 (2.55 MB)
Trainable params: 669706 (2.55 MB)
Non-trainable params: 0 (0.00 Byte)
_____________________________________________

In [17]:
# mnist_mlp.py + different activation functions
batch_size = 128
num_classes = 10
epochs = 20

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
seed_value = 42
results = pd.DataFrame(columns=['Activation Method', 'Test Loss', 'Test Accuracy'])

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

activation_methods = ['relu', 'sigmoid', 'tanh', 'linear', 'softmax']

for method in activation_methods:
    model = Sequential()
    model.add(Dense(512, activation=method, input_shape=(784,)))
    model.add(Dropout(0.2))
    model.add(Dense(512, activation=method))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))

    
    
    model.summary()

    
    model.compile(loss='categorical_crossentropy',
                  optimizer=RMSprop(),
                  metrics=['accuracy'])

    
    history = model.fit(x_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_test, y_test))

    
    score = model.evaluate(x_test, y_test, verbose=0)
    results = results.append({'Activation Method': method, 'Test Loss': score[0], 'Test Accuracy': score[1]}, ignore_index=True)
    
    print(results)

60000 train samples
10000 test samples
Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_35 (Dense)            (None, 512)               401920    
                                                                 
 dropout_24 (Dropout)        (None, 512)               0         
                                                                 
 dense_36 (Dense)            (None, 512)               262656    
                                                                 
 dropout_25 (Dropout)        (None, 512)               0         
                                                                 
 dense_37 (Dense)            (None, 10)                5130      
                                                                 
Total params: 669706 (2.55 MB)
Trainable params: 669706 (2.55 MB)
Non-trainable params: 0 (0.00 Byte)
____________________________________________

In [18]:
# mnist_mlp.py + different optimizers
batch_size = 128
num_classes = 10
epochs = 20

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
seed_value = 42
results = pd.DataFrame(columns=['Optimizer', 'Test Loss', 'Test Accuracy'])

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

optimizers = [keras.optimizers.Adam, keras.optimizers.SGD, keras.optimizers.RMSprop, keras.optimizers.AdamW]

for optimizer in optimizers:
    model = Sequential()
    model.add(Dense(512, activation='relu', input_shape=(784,)))
    model.add(Dropout(0.2))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))

    
    
    model.summary()

    
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer(),
                  metrics=['accuracy'])

    
    history = model.fit(x_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_test, y_test))

    
    score = model.evaluate(x_test, y_test, verbose=0)
    results = results.append({'Optimizer': optimizer.__name__, 'Test Loss': score[0], 'Test Accuracy': score[1]}, ignore_index=True)
    
    print(results)

60000 train samples
10000 test samples
Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_50 (Dense)            (None, 512)               401920    
                                                                 
 dropout_34 (Dropout)        (None, 512)               0         
                                                                 
 dense_51 (Dense)            (None, 512)               262656    
                                                                 
 dropout_35 (Dropout)        (None, 512)               0         
                                                                 
 dense_52 (Dense)            (None, 10)                5130      
                                                                 
Total params: 669706 (2.55 MB)
Trainable params: 669706 (2.55 MB)
Non-trainable params: 0 (0.00 Byte)
____________________________________________

In [20]:
# mnist_mlp.py + different regularization techniques
batch_size = 128
num_classes = 10
epochs = 20

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape (10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
seed_value = 42
results = pd.DataFrame(columns=['Regularization Method', 'Test Loss', 'Test Accuracy'])

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

regularization_methods = ['None', 'L1', 'L2', 'Dropout']

for method in regularization_methods:
    model = Sequential()
    model.add(Dense(512, activation='relu', input_shape=(784,)))

    if method == 'L1':
        model.add(Dense(512, activation='relu', kernel_regularizer=keras.regularizers.l1(0.01)))
    elif method == 'L2':
        model.add(Dense(512, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)))
    elif method == 'Dropout':
        model.add(Dropout(0.2))
        model.add(Dense(512, activation='relu'))
        model.add(Dropout(0.2))

    model.add(Dense(num_classes, activation='softmax'))

    model.summary()

    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.RMSprop(),
                  metrics=['accuracy'])

    history = model.fit(x_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_test, y_test))

    score = model.evaluate(x_test, y_test, verbose=0)
    results = results.append({'Regularization Method': method, 'Test Loss': score[0], 'Test Accuracy': score[1]}, ignore_index=True)

    print(results)


60000 train samples
10000 test samples
Model: "sequential_26"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_74 (Dense)            (None, 512)               401920    
                                                                 
 dense_75 (Dense)            (None, 10)                5130      
                                                                 
Total params: 407050 (1.55 MB)
Trainable params: 407050 (1.55 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
  Regularization Method  Test Loss  Test Accuracy
0                  None   0.065881         0.9841
Model: "sequential_27"
____________________

In [28]:
# mnist_mlp.py + different regularization techniques
num_classes = 10
epochs = 20

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# Define hyperparameter values to test
learning_rates = [0.00001, 0.1]
momentum_values = [0, 0.9]
epsilon_values = [1e-8, 1e-4]
nesterov_values = [False, True]
batch_sizes = [64, 256]
optimizers = [keras.optimizers.Adam, keras.optimizers.SGD, keras.optimizers.RMSprop]

# Create a DataFrame to store results
results = pd.DataFrame(columns=['Optimizer', 'Learning Rate', 'Momentum', 'Epsilon', 'Nesterov', 'Batch Size', 'Test Loss', 'Test Accuracy'])

# Convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


# Test different hyperparameters
for optimizer in optimizers:
    for lr in learning_rates:
        for momentum in momentum_values:
            for epsilon in epsilon_values:
                for nesterov in nesterov_values:
                    for batch_size in batch_sizes:
                        model = Sequential()
                        model.add(Dense(512, activation='relu', input_shape=(784,)))
                        model.add(Dropout(0.2))
                        model.add(Dense(512, activation='relu'))
                        model.add(Dropout(0.2))
                        model.add(Dense(10, activation='softmax'))

                        optimizer_name = optimizer.__name__
                        optimizer_instance = optimizer(learning_rate=lr)

                        if optimizer_name == 'SGD':
                            optimizer_instance.momentum = momentum
                            optimizer_instance.nesterov = nesterov
                        elif optimizer_name == 'RMSprop':
                            optimizer_instance.epsilon = epsilon

                        model.compile(loss='categorical_crossentropy',
                                      optimizer=optimizer_instance,
                                      metrics=['accuracy'])

                        history = model.fit(x_train, y_train, batch_size=batch_size, epochs=5, verbose=0, validation_data=(x_test, y_test))
                        score = model.evaluate(x_test, y_test, verbose=0)

                        results = results.append({'Optimizer': optimizer_name, 'Learning Rate': lr, 'Momentum': momentum, 'Epsilon': epsilon, 'Nesterov': nesterov, 'Batch Size': batch_size, 'Test Loss': score[0], 'Test Accuracy': score[1]}, ignore_index=True)


# Display the results
pd.set_option('display.max_rows', None)  # To display all rows
pd.set_option('display.max_columns', None)  # To display all columns
print(results)


   Optimizer  Learning Rate Momentum       Epsilon Nesterov Batch Size  \
0       Adam        0.00001        0  1.000000e-08    False         64   
1       Adam        0.00001        0  1.000000e-08    False        256   
2       Adam        0.00001        0  1.000000e-08     True         64   
3       Adam        0.00001        0  1.000000e-08     True        256   
4       Adam        0.00001        0  1.000000e-04    False         64   
5       Adam        0.00001        0  1.000000e-04    False        256   
6       Adam        0.00001        0  1.000000e-04     True         64   
7       Adam        0.00001        0  1.000000e-04     True        256   
8       Adam        0.00001      0.9  1.000000e-08    False         64   
9       Adam        0.00001      0.9  1.000000e-08    False        256   
10      Adam        0.00001      0.9  1.000000e-08     True         64   
11      Adam        0.00001      0.9  1.000000e-08     True        256   
12      Adam        0.00001      0.9  

# Parameters results:
- Initialization method: GlorotUniform 
- Activation function: Relu
- Optimizer: RMSdrop
- Regulazation technique: Dropout
- Optimizer + hyperparameters (Learning rate, momentum, epsilon, nesterov, batch size): SGD, 0.1, 0.9, 1.000000e-04, True, 256