# Use a simple neural network to classify hand-written digits.

The MNIST dataset consists of images of hand-written digits (0-9) as well as their corresponding labels. We will use a simple shallow neural network implemented in Keras to identify the hand-written digits.
Again, let's first import all the libraries needed for this algorithm.

In [None]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops

from keras.models import Sequential
from keras.layers import Dense, Dropout,BatchNormalization,Activation
from keras.optimizers import Adam
from keras.initializers import glorot_uniform
from keras.callbacks import LearningRateScheduler

import keras.backend as K

Next, let's define some functions to help read in the dataset and convert labels to one-hot vectors.

In [None]:
def load_dataset():
    train_dataset = h5py.File('train_signs.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('test_signs.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig

def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)].T
    return Y

We will now load the MNIST dataset.

In [None]:
# Load dataset
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig = load_dataset()

# load Parameters
X_train = X_train_orig.T
X_test = X_test_orig.T

# Convert training and test labels to one hot matrices
Y_train = convert_to_one_hot(Y_train_orig, 10).T
Y_test = convert_to_one_hot(Y_test_orig, 10).T

Let's explore what the dataset looks like. The X_train and X_test data are of shapes [784, 20000] and [784, 10000] respectively. The first dimension (784) is a flattened out [28x28] containing one hand-written digit image, and the second dimension is the number of images in the dataset (20000 images in the training set, 10000 images in the test set). The Y_train and Y_test are the labels in the shape of [1, 20000] and [1, 10000].

In [None]:
print(X_train_orig.shape)
print(Y_train_orig.shape)
print(X_test_orig.shape)
print(Y_test_orig.shape)

We can display an image in the training set as well as its corresponding label.

In [None]:
# Plot a demo image
idx = 0                                             # the index of the image to display
demoimage = np.reshape(X_train[idx,:], ((28,28)))   # reshape the image to 28x28
plt.imshow(demoimage, cmap='gray')                  # display image in grayscale
plt.show() 

print(Y_train_orig[0,idx])                          # display the corresponding label of the image

Define a Keras model to train the dataset. It consists of 4 Dense layers with BatchNorm and Dropouts, the output layer is a Dense layer with a softmax activation function.

In [None]:
model = Sequential()

model.add(Dense(256, input_dim=784, kernel_initializer=glorot_uniform()))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(128, kernel_initializer=glorot_uniform()))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(Dropout(0.4))

model.add(Dense(64, kernel_initializer=glorot_uniform()))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(Dropout(0.3))

model.add(Dense(32, kernel_initializer=glorot_uniform()))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(10, activation='softmax'))

We'll define a learning rate scheduler for adjustable learning rate tuning during different epochs, as well as a custom Adam optimizer for the model.

In [None]:
def scheduler(epoch):
    if epoch == 10:
        K.set_value(model.optimizer.lr, 0.0003)
    elif epoch == 20:
        K.set_value(model.optimizer.lr, 0.0001)
    return K.get_value(model.optimizer.lr)
change_lr = LearningRateScheduler(scheduler,verbose=1)

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

Compile the model and start training. We'll plot the change in loss and accuracy during the training process.

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'])

history = model.fit(X_train, Y_train,
          epochs=3,
          batch_size=64, 
          callbacks=[change_lr])

# List all data stored in history
print(history.history.keys())

# Plot the change in accuracy
plt.plot(history.history['acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train'], loc='upper left')
plt.show()

# Plot the change in loss
plt.plot(history.history['loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train'], loc='upper left')
plt.show()

Next, let's evaluate how well the system perform on the test dataset.

In [None]:
# Evaluate model w/test data
preds  =  model.evaluate(X_test, Y_test)
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))