**Deep Learning Notebook 2**

Dataset: MNIST digits dataset available as part of Keras

Objectives:
1. Vanishing & exploding gradients - He initialization
2. Batch normalization
3. Optimizers
4. Regularization: Dropout

**0. First Steps**

In [None]:
# 0.1 Import the required libraries, modules

import tensorflow as tf
from tensorflow import keras

In [None]:
# 0.2 Check the version details

print("TF Version: ", tf.__version__)
print("Keras Version: ", keras.__version__)

TF Version:  2.3.0
Keras Version:  2.4.0


In [None]:
# 0.3 Load the dataset

mnist = keras.datasets.mnist
(X_train_full, y_train_full), (X_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
# 0.4 Check data shape

X_train_full.shape

(60000, 28, 28)

In [None]:
# 0.5 Normalize by dividing by 255

X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test / 255.0

In [None]:
# 0.6 Set class names

class_names = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]

End of First Steps

**1. Model with He initialization, batch normalization & Adam optimizer**

In [None]:
# 1.1 Define model

def defmodel_3(n_hidden = 2, n_neurons = 150, learning_rate = 0.015):
    model = keras.models.Sequential()
    model.add(keras.layers.Flatten(input_shape=[28, 28]))
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation = "relu", kernel_initializer = "he_normal"))
        model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dense(10, activation="softmax"))
    model.compile(loss = "sparse_categorical_crossentropy",
                  optimizer = keras.optimizers.Adam(lr = learning_rate, beta_1 = 0.9, beta_2 = 0.999),
                  metrics = ["accuracy"])
    return model  

# Instantiate model

model_3 = keras.wrappers.scikit_learn.KerasClassifier(defmodel_3)

In [None]:
# 1.2 Grid search

# Import the required libraries/modules

from scipy.stats import reciprocal
from sklearn.model_selection import GridSearchCV
import numpy as np

# Initialize parameters dictionary

param_dict = {
    "n_hidden": [2, 3],
    "n_neurons": [100, 150, 200],
    "learning_rate": [0.01, 0.015, 0.02]
}

grid_search_cv_model_3 = GridSearchCV(model_3, param_dict, cv = 3, n_jobs = -1)
grid_search_cv_model_3.fit(X_train, y_train, epochs = 30, 
                           validation_data=(X_valid, y_valid),
                           callbacks=[keras.callbacks.EarlyStopping(patience=10)])

# Fetch best parameter values

print("Best parameter values: ", grid_search_cv_model_3.best_params_)

# Fetch best score

print("Best accuracy score: ", grid_search_cv_model_3.best_score_)



Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Best parameter values:  {'learning_rate': 0.01, 'n_hidden': 3, 'n_neurons': 150}
Best accuracy score:  0.9740181366602579


In [None]:
# 1.3 Training the model on the entire train split

model_3_final = grid_search_cv_model_3.best_estimator_.model

history_3 = model_3_final.fit(X_train, y_train, epochs=30, validation_data=(X_valid, y_valid))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
# 1.4 Evaluate model on test dataset

model_3_final.evaluate(X_test, y_test)



[0.25465306639671326, 0.9814000129699707]

End of Model 1

**2. Model with Regularization**

In [None]:
# 2.1 Define model

def defmodel_4(n_hidden = 2, n_neurons = 150, learning_rate = 0.015):
    model = keras.models.Sequential()
    model.add(keras.layers.Flatten(input_shape=[28, 28]))
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, 
                                     activation = "relu", 
                                     kernel_initializer = "he_normal"))
        model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.Dropout(rate = 0.05))
    model.add(keras.layers.Dense(10, activation="softmax"))
    model.compile(loss = "sparse_categorical_crossentropy",
                  optimizer = keras.optimizers.Adam(lr = learning_rate, beta_1 = 0.9, beta_2 = 0.999),
                  metrics = ["accuracy"])
    return model  

# Instantiate model

model_4 = keras.wrappers.scikit_learn.KerasClassifier(defmodel_4)

In [None]:
# 2.2 Grid search

# Import the required libraries/modules

from scipy.stats import reciprocal
from sklearn.model_selection import GridSearchCV
import numpy as np

# Initialize parameters dictionary

param_dict = {
    "n_hidden": [2, 3],
    "n_neurons": [100, 150, 200],
    "learning_rate": [0.01, 0.015, 0.02]
}

grid_search_cv_model_4 = GridSearchCV(model_4, param_dict, cv = 3, n_jobs = -1)
grid_search_cv_model_4.fit(X_train, y_train, epochs = 10, 
                           validation_data=(X_valid, y_valid),
                           callbacks=[keras.callbacks.EarlyStopping(patience=10)])

# Fetch best parameter values

print("Best parameter values: ", grid_search_cv_model_4.best_params_)

# Fetch best score

print("Best accuracy score: ", grid_search_cv_model_4.best_score_)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Best parameter values:  {'learning_rate': 0.01, 'n_hidden': 3, 'n_neurons': 200}
Best accuracy score:  0.9709999759991964


In [None]:
# 2.3 Training the model on the entire train split

model_4_final = grid_search_cv_model_4.best_estimator_.model

history_4 = model_4_final.fit(X_train, y_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
# 2.4 Evaluate model on test dataset

model_4_final.evaluate(X_test, y_test)



[0.09918012470006943, 0.9782999753952026]