In [1]:
# Derivatives

In [2]:
import numpy as np
from keras.datasets import mnist

Using TensorFlow backend.


In [3]:
np.random.seed(1)

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [5]:
images = x_train[0:1000].reshape(1000, 28*28) / 255
labels = y_train[0:1000]
one_hot_labels = np.zeros((len(labels), 10))
for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

In [6]:
test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1

In [7]:
def tanh(x):
    return np.tanh(x)
def tanh2deriv(output):
    return 1 - (output ** 2)

In [8]:
def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

In [9]:
alpha = 2
iterations = 300
hidden_size = 100
pixels_per_image = 784
num_labels = 10
batch_size = 100

In [10]:
weights_0_1 = 0.02 * np.random.random((pixels_per_image, hidden_size)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

In [11]:
for j in range(iterations):
    correct_count = 0
    for i in range(len(images) // batch_size):
        batch_start = batch_size * i
        batch_end = batch_size * (i+1)
        layer_0 = images[batch_start:batch_end]
        layer_1 = tanh(np.dot(layer_0, weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1, weights_1_2))
        
        for k in range(batch_size):
            correct_count += np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1])
        
        layer_2_delta = (layer_2 - labels[batch_start:batch_end]) / (batch_size * layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask
        
        weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)
    
    test_correct_count = 0
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = tanh(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        test_correct_count += np.argmax(layer_2) == np.argmax(test_labels[i:i+1])
    
    if j % 10 == 9:
        print("I:" + str(j) + " Test acc:" + str(test_correct_count/len(test_images)) + " Train acc:" + str(correct_count / len(images)))

I:9 Test acc:0.6882 Train acc:0.725
I:19 Test acc:0.7003 Train acc:0.725
I:29 Test acc:0.7289 Train acc:0.766
I:39 Test acc:0.7634 Train acc:0.809
I:49 Test acc:0.7899 Train acc:0.819
I:59 Test acc:0.8078 Train acc:0.849
I:69 Test acc:0.8221 Train acc:0.856
I:79 Test acc:0.8303 Train acc:0.874
I:89 Test acc:0.836 Train acc:0.887
I:99 Test acc:0.8403 Train acc:0.891
I:109 Test acc:0.8447 Train acc:0.889
I:119 Test acc:0.8483 Train acc:0.896
I:129 Test acc:0.8494 Train acc:0.902
I:139 Test acc:0.8521 Train acc:0.909
I:149 Test acc:0.8549 Train acc:0.915
I:159 Test acc:0.8573 Train acc:0.92
I:169 Test acc:0.859 Train acc:0.923
I:179 Test acc:0.8627 Train acc:0.932
I:189 Test acc:0.8624 Train acc:0.929
I:199 Test acc:0.8634 Train acc:0.923
I:209 Test acc:0.8648 Train acc:0.934
I:219 Test acc:0.8663 Train acc:0.94
I:229 Test acc:0.8666 Train acc:0.935
I:239 Test acc:0.8677 Train acc:0.945
I:249 Test acc:0.8685 Train acc:0.94
I:259 Test acc:0.8691 Train acc:0.933
I:269 Test acc:0.8704 Train 