<a href="https://colab.research.google.com/github/woodRock/grokking-deep-learning/blob/main/chapter_9_modeling_probabilites_and_nonlinearities.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Chapter 9 | Modelling probabilites and nonlinearities

In [4]:
import numpy as np

# Freeze the seed for reproducability.
np.random.seed(1)

# Load the dataset
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

# Activation functions
def tanh(x):
    return np.tanh(x)

def tanh2deriv(output):
    return 1 - (output ** 2)

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

# Hyperparameters
alpha = 2
iterations = 300
hidden_size = 100
input_dim = 784
output_dim = 10
batch_size = 100

# Initialize the neural network.
weights_0_1 = 0.02 * np.random.random((input_dim, hidden_size)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, output_dim)) - 0.1

# Training loop
for j in range(iterations):
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size), ((i+1)*batch_size))
        input, target = images[batch_start:batch_end], labels[batch_start:batch_end]

        # Foward pass
        layer_0 = input
        layer_1 = tanh(np.dot(layer_0, weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1, weights_1_2))
        prediction = layer_2

        for k in range(batch_size):
            correct_cnt += int(np.argmax(prediction[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))

        # Back propagation
        layer_2_delta = (target - prediction) / (batch_size * prediction.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)* tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask

        # Update the weights
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    # Evaluate on the test set.
    test_correct_cnt = 0

    for i in range(len(test_images)):
        input, target = test_images[i:i+1], test_labels[i:i+1]
        layer_0 = input
        layer_1 = tanh(np.dot(layer_0, weights_0_1))
        layer_2 = softmax(np.dot(layer_1, weights_1_2))
        prediction = layer_2
        test_correct_cnt += int(np.argmax(prediction) == np.argmax(target))

    if (j % 10 == 0):
        print(f"I: {j}\tTraining accuracy: {correct_cnt/float(len(images))} Test accuracy: {test_correct_cnt / float(len(test_images))}")

I: 0	Training accuracy: 0.156 Test accuracy: 0.394
I: 10	Training accuracy: 0.723 Test accuracy: 0.6867
I: 20	Training accuracy: 0.732 Test accuracy: 0.7025
I: 30	Training accuracy: 0.763 Test accuracy: 0.734
I: 40	Training accuracy: 0.794 Test accuracy: 0.7663
I: 50	Training accuracy: 0.819 Test accuracy: 0.7913
I: 60	Training accuracy: 0.849 Test accuracy: 0.8102
I: 70	Training accuracy: 0.864 Test accuracy: 0.8228
I: 80	Training accuracy: 0.867 Test accuracy: 0.831
I: 90	Training accuracy: 0.885 Test accuracy: 0.8364
I: 100	Training accuracy: 0.883 Test accuracy: 0.8407
I: 110	Training accuracy: 0.891 Test accuracy: 0.845
I: 120	Training accuracy: 0.901 Test accuracy: 0.8481
I: 130	Training accuracy: 0.901 Test accuracy: 0.8505
I: 140	Training accuracy: 0.905 Test accuracy: 0.8526
I: 150	Training accuracy: 0.914 Test accuracy: 0.8555
I: 160	Training accuracy: 0.925 Test accuracy: 0.8577
I: 170	Training accuracy: 0.918 Test accuracy: 0.8596
I: 180	Training accuracy: 0.933 Test accura

# Multiple hidden layers

In [73]:
import numpy as np

# Freeze the seed for reproducability.
np.random.seed(1)

# Load the dataset
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

# Activation functions
def tanh(x):
    return np.tanh(x)

def tanh2deriv(output):
    return 1 - (output ** 2)

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

# Hyperparameters
alpha = 2
iterations = 300
hidden_size = 100
num_hidden = 0
input_dim = 784
output_dim = 10
batch_size = 100

# Initialize the neural network.
layers = []
input_layer = 0.02 * np.random.random((input_dim, hidden_size)) - 0.01
output_layer = 0.2 * np.random.random((hidden_size, output_dim)) - 0.1
hidden_dim = [np.random.random((hidden_size, hidden_size)) for _ in range(num_hidden)]
layers.append(input_layer)
layers.extend(hidden_dim)
layers.append(output_layer)

# Training loop
for j in range(iterations):
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size), ((i+1)*batch_size))
        input, target = images[batch_start:batch_end], labels[batch_start:batch_end]

        # Foward pass
        layer = [0] * (len(layers) + 1)
        for i, _ in enumerate(range(len(layer))):
            if i == 0:
                layer[i] = input
            elif i == len(layer) - 1:
                layer[i] = softmax(np.dot(layer[i-1], layers[i-1]))
            else:
                layer[i] = tanh(np.dot(layer[i-1], layers[i-1]))
                dropout_mask = np.random.randint(2, size=layer[i].shape)
                layer[i] *= dropout_mask * 2

        prediction = layer[-1]

        for k in range(batch_size):
            correct_cnt += int(np.argmax(prediction[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))

        # Back propagation
        deltas = [0] * (len(layers) + 1)
        for i in reversed(range(len(deltas))):
            if i == len(layer) - 1:
                deltas[i] = (target - prediction) / (batch_size * prediction.shape[0])
            else:
                deltas[i] = deltas[i+1].dot(layers[i].T) * tanh2deriv(layer[i])
                dropout_mask = np.random.randint(2, size=layer[i].shape)
                deltas[i] *= dropout_mask

        # Update the weights.
        for i in reversed(range(len(deltas) - 1)):
            layers[i] += alpha * layer[i].T.dot(deltas[i+1])

    if (j % 10 == 0):
        # Evaluate on the test set.
        test_correct_cnt = 0

        for i in range(len(test_images)):
            input, target = test_images[i:i+1], test_labels[i:i+1]

            # Foward pass
            layer = [0] * (len(layers) + 1)
            for i, _ in enumerate(range(len(layer))):
                if i == 0:
                    layer[i] = input
                elif i == len(layer) - 1:
                    layer[i] = softmax(np.dot(layer[i-1], layers[i-1]))
                else:
                    layer[i] = tanh(np.dot(layer[i-1], layers[i-1]))

            prediction = layer[-1]
            test_correct_cnt += int(np.argmax(prediction) == np.argmax(target))

        print(f"I: {j}\tTraining accuracy: {correct_cnt/float(len(images))} Test accuracy: {test_correct_cnt / float(len(test_images))}")

I: 0	Training accuracy: 0.158 Test accuracy: 0.3942
I: 10	Training accuracy: 0.712 Test accuracy: 0.6857
I: 20	Training accuracy: 0.731 Test accuracy: 0.7027
I: 30	Training accuracy: 0.762 Test accuracy: 0.7366
I: 40	Training accuracy: 0.796 Test accuracy: 0.7752
I: 50	Training accuracy: 0.825 Test accuracy: 0.7987
I: 60	Training accuracy: 0.849 Test accuracy: 0.8139
I: 70	Training accuracy: 0.844 Test accuracy: 0.823
I: 80	Training accuracy: 0.864 Test accuracy: 0.8287
I: 90	Training accuracy: 0.876 Test accuracy: 0.8369
I: 100	Training accuracy: 0.89 Test accuracy: 0.8402
I: 110	Training accuracy: 0.887 Test accuracy: 0.8464
I: 120	Training accuracy: 0.892 Test accuracy: 0.8483
I: 130	Training accuracy: 0.899 Test accuracy: 0.8495
I: 140	Training accuracy: 0.912 Test accuracy: 0.8518
I: 150	Training accuracy: 0.903 Test accuracy: 0.855
I: 160	Training accuracy: 0.914 Test accuracy: 0.856
I: 170	Training accuracy: 0.917 Test accuracy: 0.8588
I: 180	Training accuracy: 0.919 Test accura