In [1]:
# Overfitting MNIST

In [2]:
import numpy as np
from keras.datasets import mnist
np.random.seed(1)

Using TensorFlow backend.


In [3]:
relu = lambda x: (x > 0) * x
relu2deriv = lambda x: x > 0

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [5]:
images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255, y_train[0:1000])

In [6]:
one_hot_labels = np.zeros((len(labels), 10))
for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

In [7]:
test_images = x_test.reshape(len(x_test), 28*28) / 255

In [8]:
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1

In [9]:
alpha = 0.005
iterations = 300
hidden_size = 100
pixels_per_image = 784
num_labels = 10

In [10]:
weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

In [11]:
for j in range(iterations):
    error = 0.0
    correct_count = 0
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        
        error += np.sum((layer_2 - labels[i:i+1]) **2)
        correct_count += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        
        layer_2_delta = layer_2 - labels[i:i+1]
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        
        weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)
    
    if not j % 10 or j == iterations - 1:
        test_error = 0.0
        test_correct_count = 0
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            
            test_error += np.sum((layer_2 - test_labels[i:i+1]) ** 2)
            test_correct_count += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        

        print("\nI:" + str(j) + " Train error:" + str(error/float(len(images)))[0:5] + " Train accuracy:" + str(correct_count/float(len(images))))
        print("Test error:" + str(test_error/float(len(test_images)))[0:5] + " Test accuracy:" + str(test_correct_count/float(len(test_images))))


I:0 Train error:0.664 Train accuracy:0.634
Test error:0.569 Test accuracy:0.6984

I:10 Train error:0.161 Train accuracy:0.971
Test error:0.319 Test accuracy:0.8765

I:20 Train error:0.096 Train accuracy:0.997
Test error:0.297 Test accuracy:0.8858

I:30 Train error:0.065 Train accuracy:1.0
Test error:0.293 Test accuracy:0.8876

I:40 Train error:0.047 Train accuracy:1.0
Test error:0.294 Test accuracy:0.8888

I:50 Train error:0.036 Train accuracy:1.0
Test error:0.296 Test accuracy:0.8877

I:60 Train error:0.028 Train accuracy:1.0
Test error:0.297 Test accuracy:0.8868

I:70 Train error:0.023 Train accuracy:1.0
Test error:0.299 Test accuracy:0.8869

I:80 Train error:0.019 Train accuracy:1.0
Test error:0.300 Test accuracy:0.8863

I:90 Train error:0.016 Train accuracy:1.0
Test error:0.302 Test accuracy:0.8847

I:100 Train error:0.013 Train accuracy:1.0
Test error:0.303 Test accuracy:0.8846

I:110 Train error:0.011 Train accuracy:1.0
Test error:0.305 Test accuracy:0.8838

I:120 Train error:0.

In [12]:
# Dropout

In [13]:
alpha = 0.005
iterations = 300
hidden_size = 100
pixels_per_image = 784
num_labels = 10

In [14]:
weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

In [15]:
for j in range(iterations):
    error = 0.0
    correct_count = 0
    
    for i in range(len(images)):
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1, weights_1_2)
        
        error += np.sum((layer_2 - labels[i:i+1]) ** 2)
        correct_count += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        
        layer_2_delta = layer_2 - labels[i:i+1]
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask
        
        weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)
    
    if not j % 10 or j == iterations - 1:
        test_error = 0.0
        test_correct_count = 0
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            
            test_error += np.sum((layer_2 - test_labels[i:i+1]) ** 2)
            test_correct_count += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
        print("\nI:" + str(j) + " Train error:" + str(error/float(len(images)))[0:5] + " Train accuracy:" + str(correct_count/float(len(images))))
        print("Test error:" + str(test_error/float(len(test_images)))[0:5] + " Test accuracy:" + str(test_correct_count/float(len(test_images))))
i = 0
layer_0 = images[i:i+1]


I:0 Train error:0.899 Train accuracy:0.381
Test error:0.634 Test accuracy:0.6519

I:10 Train error:0.396 Train accuracy:0.822
Test error:0.360 Test accuracy:0.8334

I:20 Train error:0.319 Train accuracy:0.881
Test error:0.318 Test accuracy:0.8515

I:30 Train error:0.278 Train accuracy:0.909
Test error:0.302 Test accuracy:0.8649

I:40 Train error:0.257 Train accuracy:0.914
Test error:0.291 Test accuracy:0.8702

I:50 Train error:0.246 Train accuracy:0.945
Test error:0.284 Test accuracy:0.871

I:60 Train error:0.231 Train accuracy:0.953
Test error:0.289 Test accuracy:0.8736

I:70 Train error:0.222 Train accuracy:0.962
Test error:0.281 Test accuracy:0.8762

I:80 Train error:0.203 Train accuracy:0.967
Test error:0.277 Test accuracy:0.8791

I:90 Train error:0.201 Train accuracy:0.961
Test error:0.284 Test accuracy:0.8737

I:100 Train error:0.204 Train accuracy:0.969
Test error:0.281 Test accuracy:0.8771

I:110 Train error:0.186 Train accuracy:0.964
Test error:0.283 Test accuracy:0.8706

I:1

In [16]:
# Mini-batched gradient descent

In [17]:
alpha = 0.1
iterations = 300
hidden_size = 100
pixels_per_image = 784
num_labels = 10
batch_size = 100

In [18]:
weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

In [19]:
for j in range(iterations):
    error = 0.0
    correct_count = 0
    
    for i in range(int(len(images) / batch_size)):
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        
        batch_start, batch_end = (i * batch_size, (i+1) * batch_size)
        layer_0 = images[batch_start:batch_end]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1, weights_1_2)
        
        error += np.sum((layer_2 - labels[batch_start:batch_end]) ** 2)
        for k in range(batch_size):
            correct_count += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))
        
        layer_2_delta = (layer_2 - labels[batch_start:batch_end]) / batch_size
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask
        
        weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)
    
    if not j % 10 or j == iterations - 1:
        test_error = 0.0
        test_correct_count = 0
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            
            test_error += np.sum((layer_2 - test_labels[i:i+1]) ** 2)
            test_correct_count += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
        print("\nI:" + str(j) + " Train error:" + str(error/float(len(images)))[0:5] + " Train accuracy:" + str(correct_count/float(len(images))))
        print("Test error:" + str(test_error/float(len(test_images)))[0:5] + " Test accuracy:" + str(test_correct_count/float(len(test_images))))
i = 0
layer_0 = images[i:i+1]


I:0 Train error:1.344 Train accuracy:0.161
Test error:0.829 Test accuracy:0.3751

I:10 Train error:0.613 Train accuracy:0.656
Test error:0.558 Test accuracy:0.733

I:20 Train error:0.513 Train accuracy:0.74
Test error:0.471 Test accuracy:0.7828

I:30 Train error:0.449 Train accuracy:0.779
Test error:0.419 Test accuracy:0.8048

I:40 Train error:0.406 Train accuracy:0.817
Test error:0.383 Test accuracy:0.8194

I:50 Train error:0.373 Train accuracy:0.844
Test error:0.362 Test accuracy:0.8314

I:60 Train error:0.362 Train accuracy:0.857
Test error:0.346 Test accuracy:0.8379

I:70 Train error:0.346 Train accuracy:0.857
Test error:0.336 Test accuracy:0.8451

I:80 Train error:0.325 Train accuracy:0.866
Test error:0.323 Test accuracy:0.8527

I:90 Train error:0.314 Train accuracy:0.886
Test error:0.317 Test accuracy:0.8569

I:100 Train error:0.304 Train accuracy:0.896
Test error:0.313 Test accuracy:0.8583

I:110 Train error:0.299 Train accuracy:0.897
Test error:0.307 Test accuracy:0.861

I:120