# Import Statements

In [1]:
import numpy as np
import pandas as pd
import pickle
import gzip
from matplotlib import pyplot as plt
from tqdm import tqdm

# Read Files

In [2]:
def images_file_read(file_name):
    with gzip.open(file_name, 'r') as f:
        # first 4 bytes is a magic number
        magic_number = int.from_bytes(f.read(4), 'big')
        # second 4 bytes is the number of images
        image_count = int.from_bytes(f.read(4), 'big')
        # third 4 bytes is the row count
        row_count = int.from_bytes(f.read(4), 'big')
        # fourth 4 bytes is the column count
        column_count = int.from_bytes(f.read(4), 'big')
        # rest is the image pixel data, each pixel is stored as an unsigned byte
        # pixel values are 0 to 255
        image_data = f.read()
        images = np.frombuffer(image_data, dtype=np.uint8).reshape((image_count, row_count, column_count))
        return images

In [3]:
def labels_file_read(file_name):
    with gzip.open(file_name, 'r') as f:
        # first 4 bytes is a magic number
        magic_number = int.from_bytes(f.read(4), 'big')
        # second 4 bytes is the number of labels
        label_count = int.from_bytes(f.read(4), 'big')
        # rest is the label data, each label is stored as unsigned byte
        # label values are 0 to 9
        label_data = f.read()
        labels = np.frombuffer(label_data, dtype=np.uint8)
        return labels

In [4]:
train_x = images_file_read('Dataset/mnist/train-images-idx3-ubyte.gz')
print(train_x.shape)

(60000, 28, 28)


In [5]:
# train_x = np.reshape(train_x, (60000,784))
# print(train_x.shape)

In [6]:
train_y = labels_file_read('Dataset/mnist/train-labels-idx1-ubyte.gz')
print(train_y.shape)

(60000,)


In [7]:
test_x = images_file_read('Dataset/mnist/t10k-images-idx3-ubyte.gz')
print(test_x.shape)

(10000, 28, 28)


In [8]:
# test_x = np.reshape(test_x, (10000,784))
# print(test_x.shape)

In [9]:
test_y = labels_file_read('Dataset/mnist/t10k-labels-idx1-ubyte.gz')
print(test_y.shape)

(10000,)


# Preprocessing

In [10]:
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Activation, Input, Flatten, ReLU, Reshape, Softmax
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.initializers import RandomNormal
from keras.models import load_model

In [11]:
# Preprocessing
train_x = (train_x.astype('float32')/255)
test_x = (test_x.astype('float32')/255)

train_y = to_categorical(train_y)
numerical_test_y = test_y
test_y = to_categorical(test_y)

print(train_x.shape, train_y.shape)
print(test_x.shape, test_y.shape)

(60000, 28, 28) (60000, 10)
(10000, 28, 28) (10000, 10)


# Autoencoder

In [12]:
# encoder = Sequential([
#     Flatten(input_shape = (28, 28)),
#     Dense(512),
#     ReLU(),
#     Dense(128),
#     ReLU(),
#     Dense(64),
#     ReLU(),
# ])

# decoder = Sequential([
#     Dense(128, input_shape=(64,)),
#     ReLU(),
#     Dense(512),
#     ReLU(),
#     Dense(784),
#     ReLU(),
#     Reshape((28, 28))
# ])

In [13]:
# inp = Input(shape=(28,28))
# latent_vector = encoder(inp)
# output = decoder(latent_vector)

In [14]:
# model = Model(inputs = inp, outputs = output)
# model.compile(
#     optimizer=Adam(learning_rate=0.01),
#     loss='mean_squared_error'
# )

In [15]:
# EPOCHS = 10
# history = model.fit(train_x, train_x, batch_size=100, epochs=EPOCHS)

In [16]:
# plt.plot([i for i in range(1,EPOCHS+1)], history.history['loss'])
# plt.title('Autoencoder Training Loss Graph')
# plt.ylabel('loss')
# plt.xlabel('epoch')
# plt.legend(['train'], loc='upper right')
# plt.show()

# Autoencoder Training Loss Graph

In [17]:
# Displayed in report

# Classifier

In [18]:
# #Freezing the layers
# for layer in model.layers:
#     layer.trainable = False

In [19]:
# classifier = Sequential([
#     encoder,
#     Dense(32),
#     ReLU(), 
#     Dense(10),
#     Softmax(), 
# ])

In [20]:
# classifier.compile(
#     optimizer=Adam(learning_rate=0.01),
#     loss='categorical_crossentropy',
#     metrics=['accuracy'],
# )

In [21]:
# history_2 = classifier.fit(
#     train_x,
#     train_y,
#     epochs=20,
#     batch_size=32
# )

In [22]:
# plt.plot([i for i in range(1,21)], history_2.history['loss'])
# plt.title('Classifier Training Loss Graph')
# plt.ylabel('loss')
# plt.xlabel('epoch')
# plt.legend(['train'], loc='upper right')
# plt.show()

# Classifier Training Loss Graph

In [23]:
# Displayed in report

# Saving Classifier

In [24]:
# classifier.save('Q3_classifier.h5')

# Loading Classifier

In [25]:
classifier = load_model('Q3_classifier.h5')

# Testing

In [26]:
true_labels = np.argmax(test_y, axis=1)
predictions_raw = classifier.predict(test_x)
predictions = np.argmax(predictions_raw, axis=1)

In [27]:
my_confusion_matrix = np.zeros((10,10), dtype=np.int64)

In [28]:
for i in range(true_labels.shape[0]):
    my_confusion_matrix[true_labels[i]][predictions[i]] += 1

In [29]:
print(my_confusion_matrix)

[[ 961    0    2    0    2    2    9    1    2    1]
 [   0 1122    3    1    1    1    1    1    3    2]
 [  11    1  985    5    2    2    8    8    8    2]
 [   5   12   32  919    0   18    2    7   12    3]
 [   1    4    0    0  936    0   13    3    1   24]
 [  27    2    5   21    9  786    8    2   24    8]
 [   9    5    4    0    8   10  916    2    4    0]
 [   3    4   12    2    5    0    0  980    1   21]
 [   6    2   11   25    7   14    4    7  885   13]
 [   7    3    1    5   24    4    1   23    8  933]]


In [30]:
class_wise_accuracies = np.zeros(10)
for i in range(10):
    class_wise_accuracies[i] = my_confusion_matrix[i][i]/np.sum(my_confusion_matrix[i])
    print('Accuracy of class {}: {}%'.format(i, class_wise_accuracies[i]*100))

print('\nOverall accuracy: {}%'.format(100*np.trace(my_confusion_matrix)/np.sum(my_confusion_matrix, axis=None)))

Accuracy of class 0: 98.06122448979592%
Accuracy of class 1: 98.85462555066078%
Accuracy of class 2: 95.44573643410853%
Accuracy of class 3: 90.99009900990099%
Accuracy of class 4: 95.31568228105907%
Accuracy of class 5: 88.11659192825113%
Accuracy of class 6: 95.61586638830897%
Accuracy of class 7: 95.3307392996109%
Accuracy of class 8: 90.86242299794661%
Accuracy of class 9: 92.46778989098117%

Overall accuracy: 94.23%
