### To DO:
- Hyperparameters for optimizers
- Possibly test all parameters at the same time
- Apply best to CIFAR

In [45]:
#! pip install keras
#! pip install tensorflow
#! pip install torch
! pip install tensorflow[and-cuda]



In [46]:
import keras as keras
from __future__ import print_function
from keras.datasets import mnist
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import tensorflow as tf
import numpy as np
import pandas as pd


# initializers
from keras.initializers import Zeros  
from keras.initializers import RandomNormal, RandomUniform  
from keras.initializers import glorot_normal, glorot_uniform 
from keras.initializers import he_normal, he_uniform  
from keras.initializers import lecun_normal, lecun_uniform  
from tensorflow.keras import regularizers


# Task 1.1 

In [47]:
# mnist_mlp.py
batch_size = 128
num_classes = 10
epochs = 20

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_468"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1268 (Dense)          (None, 512)               401920    
                                                                 
 dropout_816 (Dropout)       (None, 512)               0         
                                                                 
 dense_1269 (Dense)          (None, 512)               262656    
                                                                 
 dropout_817 (Dropout)       (None, 512)               0         
                                                                 
 dense_1270 (Dense)          (None, 10)                5130      
                                                                 
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________
Epoch 1

In [48]:
# mnist_cnn.py
batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.6998873353004456
Test accuracy: 0.8464000225067139


# Task  1.2 MLP Tuning

In [49]:
# mnist_mlp.py + different initilization methods
batch_size = 128
num_classes = 10
epochs = 20

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
seed_value = 42
results = pd.DataFrame(columns=['Initialization Method', 'Test Loss', 'Test Accuracy'])
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

initilization_methods = [Zeros(), 
                       RandomNormal(seed=seed_value), 
                       RandomUniform(seed=seed_value), 
                       glorot_uniform(seed=seed_value), 
                       glorot_normal(seed=seed_value), 
                       he_normal(seed=seed_value), 
                       he_uniform(seed=seed_value), 
                       lecun_normal(seed=seed_value), 
                       lecun_uniform(seed=seed_value)]

for method in initilization_methods:
    model = Sequential()
    model.add(Dense(512, activation='relu', input_shape=(784,), kernel_initializer=method))
    model.add(Dropout(0.2))
    model.add(Dense(512, activation='relu', kernel_initializer=method))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax', kernel_initializer=method))

    # Extract name of method
    method_name = method.__class__.__name__
    
    model.summary()

    
    model.compile(loss='categorical_crossentropy',
                  optimizer=RMSprop(),
                  metrics=['accuracy'])

    
    history = model.fit(x_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_test, y_test))

    
    score = model.evaluate(x_test, y_test, verbose=0)
    
    new_result = pd.DataFrame({'Initialization Method': [method_name], 'Test Loss': [score[0]], 'Test Accuracy': [score[1]]})
    results = pd.concat([results, new_result], ignore_index=True)
    
    print(results)


60000 train samples
10000 test samples
Model: "sequential_470"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1273 (Dense)          (None, 512)               401920    
                                                                 
 dropout_820 (Dropout)       (None, 512)               0         
                                                                 
 dense_1274 (Dense)          (None, 512)               262656    
                                                                 
 dropout_821 (Dropout)       (None, 512)               0         
                                                                 
 dense_1275 (Dense)          (None, 10)                5130      
                                                                 
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________
Epoch 1

  results = pd.concat([results, new_result], ignore_index=True)


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
  Initialization Method  Test Loss  Test Accuracy
0                 Zeros   2.301041         0.1135
1          RandomNormal   0.114204         0.9831
Model: "sequential_472"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1279 (Dense)          (None, 512)               401920    
                                                                 
 dropout_824 (Dropout)       (None, 512)               0         
                                                                 
 dense_1280 (Dense)          (None, 512)               262656    
                                                                 
 dropout_825 (Dropout)       (None, 512)               0         
            

In [50]:
# mnist_mlp.py + different activation functions
batch_size = 128
num_classes = 10
epochs = 20

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
seed_value = 42
results = pd.DataFrame(columns=['Activation Method', 'Test Loss', 'Test Accuracy'])

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

activation_methods = ['relu', 'sigmoid', 'tanh', 'linear', 'softmax']



for method in activation_methods:
    model = Sequential()
    model.add(Dense(512, activation=method, input_shape=(784,)))
    model.add(Dropout(0.2))
    model.add(Dense(512, activation=method))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))

    
    method_name = method
    
    model.summary()
    
    model.compile(loss='categorical_crossentropy',
                  optimizer=RMSprop(),
                  metrics=['accuracy'])
    
    history = model.fit(x_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_test, y_test))
    
    score = model.evaluate(x_test, y_test, verbose=0)
    new_result = pd.DataFrame({'Activation Method': [method_name], 'Test Loss': [score[0]], 'Test Accuracy': [score[1]]})
    results = pd.concat([results, new_result], ignore_index=True)
    
    print(results)


60000 train samples
10000 test samples
Model: "sequential_479"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1300 (Dense)          (None, 512)               401920    
                                                                 
 dropout_838 (Dropout)       (None, 512)               0         
                                                                 
 dense_1301 (Dense)          (None, 512)               262656    
                                                                 
 dropout_839 (Dropout)       (None, 512)               0         
                                                                 
 dense_1302 (Dense)          (None, 10)                5130      
                                                                 
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________
Epoch 1

  results = pd.concat([results, new_result], ignore_index=True)


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
  Activation Method  Test Loss  Test Accuracy
0              relu   0.137991         0.9828
1           sigmoid   0.065342         0.9829
Model: "sequential_481"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1306 (Dense)          (None, 512)               401920    
                                                                 
 dropout_842 (Dropout)       (None, 512)               0         
                                                                 
 dense_1307 (Dense)          (None, 512)               262656    
                                                                 
 dropout_843 (Dropout)       (None, 512)               0         
                        

In [51]:
# mnist_mlp.py + different optimizers
batch_size = 128
num_classes = 10
epochs = 20

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
seed_value = 42
results = pd.DataFrame(columns=['Optimizer', 'Test Loss', 'Test Accuracy'])

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

optimizers = [keras.optimizers.Adam, keras.optimizers.SGD, keras.optimizers.RMSprop]


for optimizer in optimizers:
    model = Sequential()
    model.add(Dense(512, activation='relu', input_shape=(784,)))
    model.add(Dropout(0.2))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))

    # Use the optimizer's name as method_name
    method_name = optimizer.__name__
    
    model.summary()
    
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer(),
                  metrics=['accuracy'])
    
    history = model.fit(x_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_test, y_test))
    
    score = model.evaluate(x_test, y_test, verbose=0)
    new_result = pd.DataFrame({'Optimizer': [method_name], 'Test Loss': [score[0]], 'Test Accuracy': [score[1]]})
    results = pd.concat([results, new_result], ignore_index=True)
    
    print(results)


60000 train samples
10000 test samples
Model: "sequential_484"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1315 (Dense)          (None, 512)               401920    
                                                                 
 dropout_848 (Dropout)       (None, 512)               0         
                                                                 
 dense_1316 (Dense)          (None, 512)               262656    
                                                                 
 dropout_849 (Dropout)       (None, 512)               0         
                                                                 
 dense_1317 (Dense)          (None, 10)                5130      
                                                                 
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________
Epoch 1

  results = pd.concat([results, new_result], ignore_index=True)


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
  Optimizer  Test Loss  Test Accuracy
0      Adam   0.076241         0.9843
1       SGD   0.145389         0.9550
Model: "sequential_486"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1321 (Dense)          (None, 512)               401920    
                                                                 
 dropout_852 (Dropout)       (None, 512)               0         
                                                                 
 dense_1322 (Dense)          (None, 512)               262656    
                                                                 
 dropout_853 (Dropout)       (None, 512)               0         
                                                

In [52]:
# mnist_mlp.py + different regularization techniques
batch_size = 128
num_classes = 10
epochs = 20

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape (10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
seed_value = 42
results = pd.DataFrame(columns=['Regularization Method', 'Test Loss', 'Test Accuracy'])

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

regularization_methods = ['None', 'L1', 'L2', 'Dropout']

for method in regularization_methods:
    model = Sequential()
    model.add(Dense(512, activation='relu', input_shape=(784,)))

    if method == 'L1':
        model.add(Dense(512, activation='relu', kernel_regularizer=keras.regularizers.l1(0.01)))
    elif method == 'L2':
        model.add(Dense(512, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)))
    elif method == 'Dropout':
        model.add(Dropout(0.2))
        model.add(Dense(512, activation='relu'))
        model.add(Dropout(0.2))

    model.add(Dense(num_classes, activation='softmax'))

    # Use the regularization method as method_name
    method_name = method

    model.summary()

    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.RMSprop(),
                  metrics=['accuracy'])

    history = model.fit(x_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_test, y_test))

    score = model.evaluate(x_test, y_test, verbose=0)
    
    new_result = pd.DataFrame({'Regularization Method': [method_name], 'Test Loss': [score[0]], 'Test Accuracy': [score[1]]})
    results = pd.concat([results, new_result], ignore_index=True)

    print(results)


60000 train samples
10000 test samples
Model: "sequential_487"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1324 (Dense)          (None, 512)               401920    
                                                                 
 dense_1325 (Dense)          (None, 10)                5130      
                                                                 
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
  Regularization Method  Test Loss  Test Accuracy
0                  None   0.096896         0.9828
Model: "sequential_488"
________________________________________________

  results = pd.concat([results, new_result], ignore_index=True)


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
  Regularization Method  Test Loss  Test Accuracy
0                  None   0.096896         0.9828
1                    L1   1.466660         0.9718
Model: "sequential_489"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1329 (Dense)          (None, 512)               401920    
                                                                 
 dense_1330 (Dense)          (None, 512)               262656    
                                                                 
 dense_1331 (Dense)          (None, 10)                5130      
                                                                 
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
______

In [53]:
# mnist_mlp.py + different regularization techniques
num_classes = 10
epochs = 20

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# Define hyperparameter values to test
learning_rates = [0.00001, 0.1]
momentum_values = [0, 0.9]
epsilon_values = [1e-8, 1e-4]
nesterov_values = [False, True]
batch_sizes = [64, 256]
optimizers = [keras.optimizers.Adam, keras.optimizers.SGD, keras.optimizers.RMSprop]

# Create a DataFrame to store results
results = pd.DataFrame(columns=['Optimizer', 'Learning Rate', 'Momentum', 'Epsilon', 'Nesterov', 'Batch Size', 'Test Loss', 'Test Accuracy'])

# Convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


# Test different hyperparameters
for optimizer in optimizers:
    for lr in learning_rates:
        for momentum in momentum_values:
            for epsilon in epsilon_values:
                for nesterov in nesterov_values:
                    for batch_size in batch_sizes:
                        model = Sequential()
                        model.add(Dense(512, activation='relu', input_shape=(784,)))
                        model.add(Dropout(0.2))
                        model.add(Dense(512, activation='relu'))
                        model.add(Dropout(0.2))
                        model.add(Dense(10, activation='softmax'))

                        optimizer_name = optimizer.__name__
                        optimizer_instance = optimizer(learning_rate=lr)

                        if optimizer_name == 'SGD':
                            optimizer_instance.momentum = momentum
                            optimizer_instance.nesterov = nesterov
                        elif optimizer_name == 'RMSprop':
                            optimizer_instance.epsilon = epsilon

                        model.compile(loss='categorical_crossentropy',
                                      optimizer=optimizer_instance,
                                      metrics=['accuracy'])

                        history = model.fit(x_train, y_train, batch_size=batch_size, epochs=5, verbose=0, validation_data=(x_test, y_test))
                        score = model.evaluate(x_test, y_test, verbose=0)

                        new_result = pd.DataFrame({'Optimizer': [optimizer_name], 'Learning Rate': [lr], 'Momentum': [momentum], 'Epsilon': [epsilon], 'Nesterov': [nesterov], 'Batch Size': [batch_size], 'Test Loss': [score[0]], 'Test Accuracy': [score[1]]})
                        results = pd.concat([results, new_result], ignore_index=True)



# Display the results
pd.set_option('display.max_rows', None)  # To display all rows
pd.set_option('display.max_columns', None)  # To display all columns
best_results = results.sort_values(by='Test Accuracy', ascending=False).head(5)
print("Top 5 Best Performing Configurations:")
print(best_results)
print(results)


  results = pd.concat([results, new_result], ignore_index=True)


Top 5 Best Performing Configurations:
   Optimizer  Learning Rate Momentum       Epsilon Nesterov Batch Size  \
52       SGD            0.1        0  1.000000e-04    False         64   
60       SGD            0.1      0.9  1.000000e-04    False         64   
48       SGD            0.1        0  1.000000e-08    False         64   
54       SGD            0.1        0  1.000000e-04     True         64   
50       SGD            0.1        0  1.000000e-08     True         64   

    Test Loss  Test Accuracy  
52   0.065562         0.9795  
60   0.068581         0.9783  
48   0.073224         0.9772  
54   0.072099         0.9768  
50   0.072205         0.9768  
   Optimizer  Learning Rate Momentum       Epsilon Nesterov Batch Size  \
0       Adam        0.00001        0  1.000000e-08    False         64   
1       Adam        0.00001        0  1.000000e-08    False        256   
2       Adam        0.00001        0  1.000000e-08     True         64   
3       Adam        0.00001        

# Parameters results:
- Initialization method: GlorotUniform 
- Activation function: Relu
- Optimizer: RMSdrop
- Regulazation technique: Dropout
- -----------------------------------------

# Top 5 Best Performing Configurations:
    Optimizer  Learning Rate Momentum       Epsilon Nesterov Batch Size  \
    62       SGD            0.1      0.9  1.000000e-04     True         64   
    60       SGD            0.1      0.9  1.000000e-04    False         64   
    58       SGD            0.1      0.9  1.000000e-08     True         64   
    50       SGD            0.1        0  1.000000e-08     True         64   
    54       SGD            0.1        0  1.000000e-04     True         64   

    Test       Loss        Test Accuracy  
    62           0.072707         0.9780  
    60           0.073003         0.9778  
    58           0.070178         0.9777  
    50           0.073058         0.9767  
    54           0.075574         0.9765  

# Task 1.2 CNN Tuning

In [54]:
# mnist_cnn.py + initilazation methods
batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

initilization_methods = [Zeros(), 
                       RandomNormal(seed=seed_value), 
                       RandomUniform(seed=seed_value), 
                       glorot_uniform(seed=seed_value), 
                       glorot_normal(seed=seed_value), 
                       he_normal(seed=seed_value), 
                       he_uniform(seed=seed_value), 
                       lecun_normal(seed=seed_value), 
                       lecun_uniform(seed=seed_value)]

# Create a DataFrame to store results
results = pd.DataFrame(columns=['Initialization Method', 'Test Loss', 'Test Accuracy'])

for method in initilization_methods:
    # Create a new model for each initialization method
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape, kernel_initializer=method))
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer=method))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_initializer=method))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax', kernel_initializer=method))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0, validation_data=(x_test, y_test))
    score = model.evaluate(x_test, y_test, verbose=0)

    # Append the results to the DataFrame
    new_row = {'Initialization Method': method.__class__.__name__, 'Test Loss': score[0], 'Test Accuracy': score[1]}
    results = pd.concat([results, pd.DataFrame([new_row])], ignore_index=True)

# Display the results
print(results)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


  results = pd.concat([results, pd.DataFrame([new_row])], ignore_index=True)


  Initialization Method  Test Loss  Test Accuracy
0                 Zeros   2.302520         0.1135
1          RandomNormal   2.070365         0.5179
2         RandomUniform   2.261849         0.3847
3         GlorotUniform   0.743484         0.8445
4          GlorotNormal   0.711782         0.8507
5              HeNormal   0.411512         0.8987
6             HeUniform   0.425241         0.8933
7           LecunNormal   0.483079         0.8877
8          LecunUniform   0.491412         0.8841


In [55]:
# mnist_cnn.py + different activation functions

batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

activation_methods = ['relu', 'sigmoid', 'tanh', 'linear', 'softmax']

# Create a DataFrame to store results
results = pd.DataFrame(columns=['Activation Method', 'Test Loss', 'Test Accuracy'])

for activation_method in activation_methods:
    # Create a new model for each activation function
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation=activation_method, input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation=activation_method))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation=activation_method))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0, validation_data=(x_test, y_test))
    score = model.evaluate(x_test, y_test, verbose=0)

    # Append the results to the DataFrame
    new_row = {'Activation Method': activation_method, 'Test Loss': score[0], 'Test Accuracy': score[1]}
    results = pd.concat([results, pd.DataFrame([new_row])], ignore_index=True)

# Display the results
print(results)


x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


  results = pd.concat([results, pd.DataFrame([new_row])], ignore_index=True)


  Activation Method  Test Loss  Test Accuracy
0              relu   0.683455         0.8465
1           sigmoid   2.301056         0.1135
2              tanh   0.563670         0.8680
3            linear   0.477094         0.8779
4           softmax   2.302552         0.1009


In [56]:
# mnist_cnn.py + different optimizers

batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

optimizers = [keras.optimizers.Adam, keras.optimizers.SGD, keras.optimizers.RMSprop]

# Create a DataFrame to store results
results = pd.DataFrame(columns=['Optimizer', 'Test Loss', 'Test Accuracy'])

for optimizer in optimizers:
    # Create a new model for each optimizer
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=optimizer(),
                  metrics=['accuracy'])

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0, validation_data=(x_test, y_test))
    score = model.evaluate(x_test, y_test, verbose=0)

    # Append the results to the DataFrame
    new_row = {'Optimizer': optimizer.__name__, 'Test Loss': score[0], 'Test Accuracy': score[1]}
    results = pd.concat([results, pd.DataFrame([new_row])], ignore_index=True)

# Display the results
print(results)


x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


  results = pd.concat([results, pd.DataFrame([new_row])], ignore_index=True)


  Optimizer  Test Loss  Test Accuracy
0      Adam   0.031210         0.9915
1       SGD   0.104251         0.9665
2   RMSprop   0.031213         0.9903


In [57]:
# mnist_cnn.py + different regularization methods
batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype ('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

regularization_methods = ['None', 'L1', 'L2', 'Dropout']

# Create a DataFrame to store results
results = pd.DataFrame(columns=['Regularization Method', 'Test Loss', 'Test Accuracy'])

for reg_method in regularization_methods:
    # Create a new model for each regularization method
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape, kernel_regularizer=None))
    
    if reg_method == 'L1':
        model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l1(0.01)))
    elif reg_method == 'L2':
        model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    else:
        model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=None))

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))

    if reg_method == 'Dropout':
        model.add(Dropout(0.5))
    
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0, validation_data=(x_test, y_test))
    score = model.evaluate(x_test, y_test, verbose=0)

    # Append the results to the DataFrame
    new_row = {'Regularization Method': reg_method, 'Test Loss': score[0], 'Test Accuracy': score[1]}
    results = pd.concat([results, pd.DataFrame([new_row])], ignore_index=True)

# Display the results
print(results)


x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


  results = pd.concat([results, pd.DataFrame([new_row])], ignore_index=True)


  Regularization Method  Test Loss  Test Accuracy
0                  None   0.477364         0.8749
1                    L1   5.272748         0.8681
2                    L2   0.836787         0.8767
3               Dropout   0.839515         0.8274


In [60]:
# mnist_cnn.py + different hyperparameters
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

# Define hyperparameter values to test
learning_rates = [0.00001, 0.1]
momentum_values = [0, 0.9]
epsilon_values = [1e-8, 1e-4]
nesterov_values = [False, True]
batch_sizes = [64, 256]
optimizers = [keras.optimizers.Adam, keras.optimizers.SGD, keras.optimizers.RMSprop]

# Create a DataFrame to store results
results = pd.DataFrame(columns=['Learning Rate', 'Momentum', 'Epsilon', 'Nesterov', 'Batch Size', 'Regularization', 'Test Loss', 'Test Accuracy'])

# Test different hyperparameters
for lr, momentum, epsilon, nesterov, batch_size, reg_method in product(learning_rates, momentum_values, epsilon_values, nesterov_values, batch_sizes, regularization_methods):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1), kernel_regularizer=None))
    
    if reg_method == 'L1':
        model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l1(0.01)))
    elif reg_method == 'L2':
        model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    else:
        model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=None))

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))

    if reg_method == 'Dropout':
        model.add(Dropout(0.5))
    
    model.add(Dense(num_classes, activation='softmax'))

    optimizer = keras.optimizers.Adadelta(learning_rate=lr, rho=momentum, epsilon=epsilon)
    model.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy'])
    
    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0, validation_data=(x_test, y_test))
    score = model.evaluate(x_test, y_test, verbose=0)

    # Append the results to the DataFrame
    new_row = {'Learning Rate': lr, 'Momentum': momentum, 'Epsilon': epsilon, 'Nesterov': nesterov, 'Batch Size': batch_size, 'Regularization': reg_method, 'Test Loss': score[0], 'Test Accuracy': score[1]}
    results = pd.concat([results, pd.DataFrame([new_row])], ignore_index=True)

# Display the results
pd.set_option('display.max_rows', None)  # To display all rows
pd.set_option('display.max_columns', None)  # To display all columns 
best_results = results.sort_values(by='Test Accuracy', ascending=False).head(5)
print("Top 5 Best Performing Configurations:")
print(best_results)
print(results)


  results = pd.concat([results, pd.DataFrame([new_row])], ignore_index=True)


Top 5 Best Performing Configurations:
     Learning Rate Momentum  Epsilon Nesterov Batch Size Regularization  \
115            0.1      0.9   0.0001    False         64        Dropout   
123            0.1      0.9   0.0001     True         64        Dropout   
88             0.1        0   0.0001     True         64           None   
83             0.1        0   0.0001    False         64        Dropout   
82             0.1        0   0.0001    False         64             L2   

     Test Loss  Test Accuracy  
115   0.028825         0.9912  
123   0.029777         0.9910  
88    0.036239         0.9899  
83    0.031306         0.9898  
82    0.040284         0.9897  
     Learning Rate Momentum       Epsilon Nesterov Batch Size Regularization  \
0          0.00001        0  1.000000e-08    False         64           None   
1          0.00001        0  1.000000e-08    False         64             L1   
2          0.00001        0  1.000000e-08    False         64             L2   

# Parameters results:
- Initialization method: HeNormal
- Activation function: linear
- Optimizer: Adam
- Regulazation technique: L2
- -----------------------------------------
# Top 5 Best Performing Configurations:
        Learning Rate Momentum  Epsilon Nesterov Batch Size Regularization
    115            0.1      0.9   0.0001    False         64        Dropout   
    123            0.1      0.9   0.0001     True         64        Dropout   
    88             0.1        0   0.0001     True         64           None   
    83             0.1        0   0.0001    False         64        Dropout   
    82             0.1        0   0.0001    False         64             L2   


     Test Loss  Test Accuracy  
    115   0.028825         0.9912  
    123   0.029777         0.9910  
    88    0.036239         0.9899  
    83    0.031306         0.9898  
    82    0.040284         0.9897 