# Original classifier (Copy from slide 882)

In [1]:
import numpy as np, sys
np.random.seed(1)

from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000, 28*28)/255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels), 10))

for i, l in enumerate(labels):
  one_hot_labels[i][l] = 1

labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))

for i, l in enumerate(y_test):
  test_labels[i][l] = 1

def tanh(x):
  return np.tanh(x)

def tanh2deriv(output):
  return 1 - (output ** 2)

def softmax(x):
  temp = np.exp(x)
  return temp / np.sum(temp, axis=1, keepdims=True)

alpha, iterations, hidden_size = (2, 300, 100)
pixels_per_image, num_labels = (784, 10)
batch_size = 100

weights_0_1 = 0.02 * np.random.random((pixels_per_image, hidden_size)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

relu = lambda x:(x >= 0) * x
relu2deriv = lambda x: (x >= 0)

for j in range(iterations):
  correct_cnt = 0
  for i in range(int(len(images) / batch_size)):
    batch_start, batch_end = ((i * batch_size), ((i + 1) * batch_size))
    layer_0 = images[batch_start:batch_end]
    layer_1 = tanh(np.dot(layer_0, weights_0_1))
    dropout_mask = np.random.randint(2, size=layer_1.shape)
    layer_1 *= dropout_mask * 2
    layer_2 = softmax(np.dot(layer_1, weights_1_2))

    for k in range(batch_size):
      correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start + k: batch_start + k + 1]))
    layer_2_delta = (labels[batch_start:batch_end] - layer_2) /(batch_size * layer_2.shape[0])
    layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
    layer_1_delta *= dropout_mask

    weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
    weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
  test_correct_cnt = 0

  for i in range(len(test_images)):
    layer_0 = test_images[i: i+ 1]
    layer_1 = tanh(np.dot(layer_0, weights_0_1))
    layer_2 = np.dot(layer_1, weights_1_2)
    test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
  
  if (j % 10 == 0):
    sys.stdout.write("\n" + "I:" + str(j) + " Test-Acc:" + str(test_correct_cnt/float(len(test_images))) + " Train-Acc:" + str(correct_cnt/float(len(images))))


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz

I:0 Test-Acc:0.394 Train-Acc:0.156
I:10 Test-Acc:0.6867 Train-Acc:0.723
I:20 Test-Acc:0.7025 Train-Acc:0.732
I:30 Test-Acc:0.734 Train-Acc:0.763
I:40 Test-Acc:0.7663 Train-Acc:0.794
I:50 Test-Acc:0.7913 Train-Acc:0.819
I:60 Test-Acc:0.8102 Train-Acc:0.849
I:70 Test-Acc:0.8228 Train-Acc:0.864
I:80 Test-Acc:0.831 Train-Acc:0.867
I:90 Test-Acc:0.8364 Train-Acc:0.885
I:100 Test-Acc:0.8407 Train-Acc:0.883
I:110 Test-Acc:0.845 Train-Acc:0.891
I:120 Test-Acc:0.8481 Train-Acc:0.901
I:130 Test-Acc:0.8505 Train-Acc:0.901
I:140 Test-Acc:0.8526 Train-Acc:0.905
I:150 Test-Acc:0.8555 Train-Acc:0.914
I:160 Test-Acc:0.8577 Train-Acc:0.925
I:170 Test-Acc:0.8596 Train-Acc:0.918
I:180 Test-Acc:0.8619 Train-Acc:0.933
I:190 Test-Acc:0.863 Train-Acc:0.933
I:200 Test-Acc:0.8642 Train-Acc:0.926
I:210 Test-Acc:0.8653 Train-Acc:0.931
I:220 Test-Acc:0.8668 Train-Acc:0.93
I:230 Test-Acc:0.8672 Train-Acc:0.937
I:240 Test-A

# Modified Classifier (Part 1)

In [2]:
import numpy as np, sys
np.random.seed(1)

from keras.datasets import mnist
from keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
# With data augmentation to prevent overfitting (accuracy 0.99286)

# Part C
def augment_images() :
  gen = ImageDataGenerator(
    rotation_range=10,  
    width_shift_range=0.1,  
    height_shift_range=0.1,
  )  

  (X_train, y_train), (X_test, y_test) = mnist.load_data()

  # reshape to be [samples][width][height][channels]
  X_train = X_train.reshape((X_train.shape[0], 28, 28, 1))

  gen = gen.flow(X_train, y_train)

  # configure batch size and retrieve one batch of images
  data_list_x = []
  data_list_y = []
  batch_index = 0

  while batch_index <= gen.batch_index:
      data_x, data_y = gen.next()
      data_list_x.append(data_x[0])
      data_list_y.append(data_y[0])
      batch_index = batch_index + 1

  # now, data_array is the numeric data of whole images
  x_aug_train = np.asarray(data_list_x)
  X_train = np.append(X_train, x_aug_train, axis=0)
  X_train = X_train.reshape((X_train.shape[0], 28, 28))

  y_aug_train = np.asarray(data_list_y)

  y_train = np.append(y_train, y_aug_train, axis=0)

  # sys.exit()
  return (X_train, y_train), (X_test, y_test) 

# (x_train, y_train), (x_test, y_test) = mnist.load_data()

(x_train, y_train), (x_test, y_test) = augment_images()

# Part F
scaler = StandardScaler()
# fit and transform in one step
x_train = x_train.reshape((x_train.shape[0], 28 * 28))
x_train = scaler.fit_transform(x_train)
# inverse transform
x_train = scaler.inverse_transform(x_train)
x_train = x_train.reshape((x_train.shape[0], 28, 28))

images, labels = (x_train[0:1000].reshape(1000, 28*28)/255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels), 10))

for i, l in enumerate(labels):
  one_hot_labels[i][l] = 1

labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))

for i, l in enumerate(y_test):
  test_labels[i][l] = 1

def tanh(x):
  return np.tanh(x)

def tanh2deriv(output):
  return 1 - (output ** 2)

def softmax(x):
  temp = np.exp(x)
  return temp / np.sum(temp, axis=1, keepdims=True)


# Part G
alpha, iterations, hidden_size = (2, 300, 100)
# alpha, iterations, hidden_size = (2, 500, 200)
pixels_per_image, num_labels = (784, 10)
batch_size = 100
# batch_size = 200

weights_0_1 = 0.02 * np.random.random((pixels_per_image, hidden_size)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, hidden_size)) - 0.1
weights_2_3 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

relu = lambda x:(x >= 0) * x
relu2deriv = lambda x: (x >= 0)

for j in range(iterations):
  correct_cnt = 0
  # Part A
  for i in range(int(len(images) / batch_size)):
    batch_start, batch_end = ((i * batch_size), ((i + 1) * batch_size))
    layer_0 = images[batch_start:batch_end]
    
    # Part E
    layer_1 = tanh(np.dot(layer_0, weights_0_1))
    # Part B
    dropout_mask_1 = np.random.randint(2, size=layer_1.shape)
    layer_1 *= dropout_mask_1 * 2
    
    # Part E
    layer_2 = relu(np.dot(layer_1, weights_1_2))
    # Part B
    dropout_mask_2 = np.random.randint(2, size=layer_2.shape)
    layer_2 *= dropout_mask_2 * 2

    # Part D
    layer_3 = softmax(np.dot(layer_2, weights_2_3))

    for k in range(batch_size):
      correct_cnt += int(np.argmax(layer_3[k:k+1]) == np.argmax(labels[batch_start + k: batch_start + k + 1]))
    
    layer_3_delta = (labels[batch_start:batch_end] - layer_3) /(batch_size * layer_3.shape[0])
    
    layer_2_delta = layer_3_delta.dot(weights_2_3.T) * relu2deriv(layer_2)
    layer_2_delta *= dropout_mask_2

    layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
    layer_1_delta *= dropout_mask_1

    weights_2_3 += alpha * layer_2.T.dot(layer_3_delta)
    
    weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
    
    weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
  test_correct_cnt = 0
  total_test_correct_cnt = []
  total_actual_correct_cnt = []
  for i in range(len(test_images)):
    layer_0 = test_images[i: i+ 1]
    layer_1 = relu(np.dot(layer_0, weights_0_1))
    layer_2 = relu(np.dot(layer_1, weights_1_2))
    layer_3 = np.dot(layer_2, weights_2_3)
    test_correct_cnt += int(np.argmax(layer_3) == np.argmax(test_labels[i:i+1]))
    if int(np.argmax(layer_3) == np.argmax(test_labels[i:i+1])) == 1:
      total_test_correct_cnt.append(1)
      total_actual_correct_cnt.append(1)
    else:
      total_test_correct_cnt.append(0)
      total_actual_correct_cnt.append(1)
  if (j % 10 == 0):
    # Part H
    sys.stdout.write("\n" + "I:" + str(j) + " Test-Acc:" + str(test_correct_cnt/float(len(test_images))) + " Train-Acc:" + str(correct_cnt/float(len(images))))

    total_test_correct_cnt = np.asarray(total_test_correct_cnt)
    
    print("\n",f1_score(total_actual_correct_cnt, total_test_correct_cnt, average=None), "\n")
# Part I not sure what top common errors would be here when the code is checking it.


I:0 Test-Acc:0.0825 Train-Acc:0.09
 [0.         0.15242494] 


I:10 Test-Acc:0.4592 Train-Acc:0.376
 [0.         0.62938596] 


I:20 Test-Acc:0.5354 Train-Acc:0.454
 [0.         0.69740784] 


I:30 Test-Acc:0.5918 Train-Acc:0.509
 [0.         0.74356075] 


I:40 Test-Acc:0.5925 Train-Acc:0.557
 [0.         0.74411303] 


I:50 Test-Acc:0.5839 Train-Acc:0.573
 [0.         0.73729402] 


I:60 Test-Acc:0.5821 Train-Acc:0.571
 [0.        0.7358574] 


I:70 Test-Acc:0.6016 Train-Acc:0.588
 [0.         0.75124875] 


I:80 Test-Acc:0.6344 Train-Acc:0.632
 [0.         0.77630935] 


I:90 Test-Acc:0.6748 Train-Acc:0.665
 [0.         0.80582756] 


I:100 Test-Acc:0.7186 Train-Acc:0.706
 [0.         0.83626207] 


I:110 Test-Acc:0.7465 Train-Acc:0.714
 [0.         0.85485256] 


I:120 Test-Acc:0.7661 Train-Acc:0.751
 [0.         0.86756129] 


I:130 Test-Acc:0.7825 Train-Acc:0.76
 [0.         0.87798036] 


I:140 Test-Acc:0.7948 Train-Acc:0.778
 [0.         0.88566971] 


I:150 Test-Acc:0.8031 Tr

# Keras implementation (Part 2)

In [3]:
# We haven't learned much on this so I had to look up this guide: https://keras.io/examples/vision/mnist_convnet/

####Setup####

import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

####Prepare the data####

# Model / data parameters
num_class = 10 # There are 10 classes, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0
input_shape = (28, 28, 1) # Changing to 28 by 28, similar to what we did above

# loading mnist data
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scaling the training and testing data appropriately from 0 to 1
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255

# Making shape of images 28 by 28
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

# 
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices. Needed for keras model
y_train = keras.utils.to_categorical(y_train, num_class)
y_test = keras.utils.to_categorical(y_test, num_class)

####Build the model#### 
# This is the model they created to get 99% accuracy. Crazy. Mine is similar to an extent. However if I copy this same process, my results are much lower.
# possibliy there is an issue with my code :(
mnist_model = keras.Sequential(
    [
        keras.Input(shape=input_shape), # input layer
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), # relu layer
        layers.MaxPooling2D(pool_size=(2, 2)), # not sure why we need this. possibly why my code doesn't have as good accuracy 
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), # another relu layer
        layers.MaxPooling2D(pool_size=(2, 2)), # 
        layers.Flatten(),
        layers.Dropout(0.5), # dropout rate
        layers.Dense(num_class, activation="softmax"), # softmax for result
    ]
)

mnist_model.summary()

####Train the mnist_model####
batch_size = 128
epochs = 15

mnist_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

mnist_model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

####Evaluate the trained mnist_model####
score = mnist_model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1600)              0         
_________________________________________________________________
dropout (Dropout)            (None, 1600)              0         
_________________________________________________