In [16]:
from tensorflow.keras.datasets import mnist
import numpy as np 
import sys

In [17]:
def tanh(x):
    return np.tanh(x)

def tanh2deriv(output):
    return 1 - (output ** 2)

def softmax(x):
    temp = np.exp(x)
    return x / np.sum(temp)

In [18]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [19]:
images, labels = (x_train[0:1000].reshape(1000,28*28)/ 255, y_train[0:1000])
one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels
    
test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

In [None]:
np.random.seed(1)

lr = 0.005
iterations = 200
hidden_size = 40
pixels_per_image = 784
num_labels = 10
batch_size = 100

weights_0_1 = 0.02 * np.random.random((pixels_per_image, hidden_size)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for epoch in range(iterations):
    correct_cnt=0
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = (i*batch_size, (i+1) * batch_size)
        layer_0 = images[batch_start:batch_end]
        layer_1 = tanh(np.dot(layer_0, weights_0_1))
        dropout = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout*2
        layer_2 = softmax(np.dot(layer_1, weights_1_2))
        
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))
            layer_2_delta = (labels[batch_start:batch_end]-layer_2) / (batch_size * layer_2.shape[0])
            layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
            layer_1_delta *= dropout
            weights_1_2 += lr * layer_1.T.dot(layer_2_delta)
            weights_0_1 += lr * layer_0.T.dot(layer_1_delta)
            
            
        test_correct_cnt = 0
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = tanh(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1,weights_1_2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        if(epoch % 10 == 0):
            sys.stdout.write("\n"+ "I:" + str(epoch) + \
            " Test-Acc:"+str(test_correct_cnt/float(len(test_images)))+\
            " Train-Acc:" + str(correct_cnt/float(len(images))))



I:0 Test-Acc:0.0581 Train-Acc:0.009
I:0 Test-Acc:0.0601 Train-Acc:0.025
I:0 Test-Acc:0.0638 Train-Acc:0.036
I:0 Test-Acc:0.0688 Train-Acc:0.045
I:0 Test-Acc:0.0729 Train-Acc:0.052
I:0 Test-Acc:0.0784 Train-Acc:0.065
I:0 Test-Acc:0.0826 Train-Acc:0.071
I:0 Test-Acc:0.0872 Train-Acc:0.079
I:0 Test-Acc:0.0908 Train-Acc:0.09
I:0 Test-Acc:0.0949 Train-Acc:0.106
I:10 Test-Acc:0.2436 Train-Acc:0.022
I:10 Test-Acc:0.2425 Train-Acc:0.042
I:10 Test-Acc:0.2464 Train-Acc:0.061
I:10 Test-Acc:0.2427 Train-Acc:0.078
I:10 Test-Acc:0.243 Train-Acc:0.101
I:10 Test-Acc:0.2406 Train-Acc:0.119
I:10 Test-Acc:0.2443 Train-Acc:0.139
I:10 Test-Acc:0.2359 Train-Acc:0.168
I:10 Test-Acc:0.2372 Train-Acc:0.185
I:10 Test-Acc:0.2405 Train-Acc:0.207
I:20 Test-Acc:0.2912 Train-Acc:0.025
I:20 Test-Acc:0.2922 Train-Acc:0.041
I:20 Test-Acc:0.2942 Train-Acc:0.065
I:20 Test-Acc:0.2929 Train-Acc:0.082
I:20 Test-Acc:0.2938 Train-Acc:0.105
I:20 Test-Acc:0.2918 Train-Acc:0.129
I:20 Test-Acc:0.2938 Train-Acc:0.153
I:20 Test-Ac