In [20]:
# Overfitting MNIST

In [21]:
import numpy as np
from keras.datasets import mnist
np.random.seed(1)

In [22]:
relu = lambda x: (x > 0) * x
relu2deriv = lambda x: x > 0

In [23]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [24]:
images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255, y_train[0:1000])

In [25]:
one_hot_labels = np.zeros((len(labels), 10))
for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

In [26]:
test_images = x_test.reshape(len(x_test), 28*28) / 255

In [27]:
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1

In [28]:
alpha = 0.005
iterations = 350
hidden_size = 40
pixels_per_image = 784
num_labels = 10

In [29]:
weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

In [11]:
for j in range(iterations):
    error = 0.0
    correct_count = 0
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        
        error += np.sum((layer_2 - labels[i:i+1]) **2)
        correct_count += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        
        layer_2_delta = layer_2 - labels[i:i+1]
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        
        weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)

    print("\nI:" + str(j) + " Error:" + str(error/float(len(images)))[0:5] + " Correct:" + str(correct_count/float(len(images))))
    
    if not j % 10 or j == iterations - 1:
        error = 0.0
        correct_count = 0
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            
            error += np.sum((layer_2 - test_labels[i:i+1]) ** 2)
            correct_count += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
        print("\nTest error:" + str(error/float(len(test_images)))[0:5] + " Test accuracy:" + str(correct_count/float(len(test_images))))


I:0 Error:0.003 Correct:0.999
Test error:0.355 Test accuracy:0.8291

I:1 Error:0.003 Correct:0.999

I:2 Error:0.003 Correct:0.999

I:3 Error:0.003 Correct:0.999

I:4 Error:0.003 Correct:0.999

I:5 Error:0.003 Correct:0.999

I:6 Error:0.003 Correct:0.999

I:7 Error:0.003 Correct:0.999

I:8 Error:0.003 Correct:0.999

I:9 Error:0.003 Correct:0.999

I:10 Error:0.003 Correct:0.999
Test error:0.356 Test accuracy:0.8288

I:11 Error:0.003 Correct:0.999

I:12 Error:0.003 Correct:0.999

I:13 Error:0.003 Correct:0.999

I:14 Error:0.003 Correct:0.999

I:15 Error:0.003 Correct:0.999

I:16 Error:0.003 Correct:0.999

I:17 Error:0.003 Correct:0.999

I:18 Error:0.003 Correct:0.999

I:19 Error:0.003 Correct:0.999

I:20 Error:0.003 Correct:0.999
Test error:0.357 Test accuracy:0.8287

I:21 Error:0.003 Correct:0.999

I:22 Error:0.003 Correct:0.999

I:23 Error:0.003 Correct:0.999

I:24 Error:0.003 Correct:0.999

I:25 Error:0.003 Correct:0.999

I:26 Error:0.003 Correct:0.999

I:27 Error:0.003 Correct:0.999



I:232 Error:0.001 Correct:1.0

I:233 Error:0.001 Correct:1.0

I:234 Error:0.001 Correct:1.0

I:235 Error:0.001 Correct:1.0

I:236 Error:0.001 Correct:1.0

I:237 Error:0.001 Correct:1.0

I:238 Error:0.001 Correct:1.0

I:239 Error:0.001 Correct:1.0

I:240 Error:0.001 Correct:1.0
Test error:0.365 Test accuracy:0.8251

I:241 Error:0.001 Correct:1.0

I:242 Error:0.001 Correct:1.0

I:243 Error:0.001 Correct:1.0

I:244 Error:0.001 Correct:1.0

I:245 Error:0.001 Correct:1.0

I:246 Error:0.001 Correct:1.0

I:247 Error:0.001 Correct:1.0

I:248 Error:0.001 Correct:1.0

I:249 Error:0.001 Correct:1.0

I:250 Error:0.001 Correct:1.0
Test error:0.365 Test accuracy:0.8247

I:251 Error:0.001 Correct:1.0

I:252 Error:0.001 Correct:1.0

I:253 Error:0.001 Correct:1.0

I:254 Error:0.001 Correct:1.0

I:255 Error:0.001 Correct:1.0

I:256 Error:0.001 Correct:1.0

I:257 Error:0.001 Correct:1.0

I:258 Error:0.001 Correct:1.0

I:259 Error:0.001 Correct:1.0

I:260 Error:0.001 Correct:1.0
Test error:0.365 Test acc

In [13]:
# Dropout

In [30]:
alpha = 0.005
iterations = 350
hidden_size = 40
pixels_per_image = 784
num_labels = 10

In [31]:
weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

In [None]:
for j in range(iterations):
    error = 0.0
    correct_count = 0
    
    for i in range(len(images)):
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_1 *= dropout_mask
        layer_2 = np.dot(layer_1, weights_1_2)
        
        error += np.sum((layer_2 - labels[i:i+1]) **2)
        correct_count += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        
        layer_2_delta = layer_2 - labels[i:i+1]
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask
        
        weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)
    
    if not j % 10 or j == iterations - 1:
        error = 0.0
        correct_count = 0
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            
            error += np.sum((layer_2 - test_labels[i:i+1]) ** 2)
            correct_count += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
        print("\nI:" + str(j) + " Error:" + str(error/float(len(images)))[0:5] + " Correct:" + str(correct_count/float(len(images))))
        print("\nTest error:" + str(error/float(len(test_images)))[0:5] + " Test accuracy:" + str(correct_count/float(len(test_images))))
i = 0
layer_0 = images[i:i+1]


I:0 Error:0.904 Correct:0.233

Test error:0.772 Test accuracy:0.4891

I:1 Error:0.771 Correct:0.441

I:2 Error:0.704 Correct:0.51

I:3 Error:0.658 Correct:0.582

I:4 Error:0.616 Correct:0.608

I:5 Error:0.592 Correct:0.629

I:6 Error:0.556 Correct:0.661

I:7 Error:0.534 Correct:0.67

I:8 Error:0.517 Correct:0.7

I:9 Error:0.488 Correct:0.723

I:10 Error:0.468 Correct:0.738

Test error:0.501 Test accuracy:0.8042

I:11 Error:0.475 Correct:0.747

I:12 Error:0.448 Correct:0.767

I:13 Error:0.428 Correct:0.783

I:14 Error:0.423 Correct:0.783

I:15 Error:0.421 Correct:0.792

I:16 Error:0.415 Correct:0.786

I:17 Error:0.417 Correct:0.789

I:18 Error:0.417 Correct:0.792

I:19 Error:0.397 Correct:0.784

I:20 Error:0.386 Correct:0.814

Test error:0.507 Test accuracy:0.8443

I:21 Error:0.373 Correct:0.826

I:22 Error:0.386 Correct:0.817

I:23 Error:0.392 Correct:0.801

I:24 Error:0.392 Correct:0.799

I:25 Error:0.372 Correct:0.823

I:26 Error:0.366 Correct:0.828

I:27 Error:0.373 Correct:0.822




I:226 Error:0.267 Correct:0.89

I:227 Error:0.263 Correct:0.905

I:228 Error:0.255 Correct:0.907

I:229 Error:0.267 Correct:0.886

I:230 Error:0.249 Correct:0.907

Test error:0.600 Test accuracy:0.8409

I:231 Error:0.244 Correct:0.91

I:232 Error:0.258 Correct:0.905

I:233 Error:0.258 Correct:0.911

I:234 Error:0.257 Correct:0.899

I:235 Error:0.260 Correct:0.902

I:236 Error:0.252 Correct:0.911

I:237 Error:0.252 Correct:0.912

I:238 Error:0.239 Correct:0.922

I:239 Error:0.262 Correct:0.912

I:240 Error:0.258 Correct:0.905

Test error:0.683 Test accuracy:0.837

I:241 Error:0.256 Correct:0.907

I:242 Error:0.251 Correct:0.909

I:243 Error:0.244 Correct:0.918

I:244 Error:0.242 Correct:0.921

I:245 Error:0.264 Correct:0.892

I:246 Error:0.271 Correct:0.897

I:247 Error:0.259 Correct:0.907

I:248 Error:0.245 Correct:0.911

I:249 Error:0.233 Correct:0.929

I:250 Error:0.248 Correct:0.912

Test error:0.691 Test accuracy:0.8429

I:251 Error:0.257 Correct:0.907

I:252 Error:0.237 Correct:0