<a href="https://colab.research.google.com/github/woodRock/grokking-deep-learning/blob/main/chapter_10_neural_learning_about_edges_and_corners.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Chapter 10 | Neural learning about edges and corners

In [29]:
import numpy as np

# Freeze the random seed for reproducability.
np.random.seed(1)

# Load the dataset.
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Take the first 1000 images, and normalize the features between 0 and 1.
images, labels = (X_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])

# Convert to one hot encoding
one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = X_test.reshape(len(X_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

# Activation functions
def tanh(x):
    return np.tanh(x)

def tanh2deriv(output):
    return 1 - (output ** 2)

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

# Hyperparameters
alpha = 2
iterations = 300
input_dim = 784
output_dim = 10
batch_size = 128

input_rows = 28
input_cols = 28

kernel_rows = 3
kernel_cols = 3
num_kernels = 16
hidden_size = ((input_rows - kernel_rows) * (input_cols - kernel_cols)) * num_kernels

# Initialize the network.
kernels = 0.02 * np.random.random((kernel_rows*kernel_cols, num_kernels)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, output_dim)) - 0.1

def get_image_section(layer, row_from, row_to, col_from, col_to):
    """ Select a subregion in a batch of images. """
    section = layer[:, row_from:row_to, col_from:col_to]
    return section.reshape(-1,1, row_to-row_from, col_to-col_from)

# Training loop
for j in range(iterations):
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size), ((i+1) * batch_size))
        input, target = images[batch_start:batch_end], labels[batch_start:batch_end]

        # Forward pass
        layer_0 = input
        layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)

        sections = list()
        for row_start in range(layer_0.shape[1] - kernel_rows):
            for col_start in range(layer_0.shape[2] - kernel_cols):
                section = get_image_section(layer_0,
                                            row_start,
                                            row_start + kernel_rows,
                                            col_start,
                                            col_start + kernel_rows)
                sections.append(section)

        expanded_input = np.concatenate(sections, axis=1)
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0]*es[1],-1)

        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0],-1))
        # Dropout
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1, weights_1_2))
        prediction = layer_2

        for k in range(batch_size):
            true_label = labels[batch_start+k:batch_start+k+1]
            pred_label = prediction[k:k+1]
            correct_cnt += int(np.argmax(pred_label) == np.argmax(true_label))

        # Back progation
        layer_2_delta = (target - prediction) / (batch_size * layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask

        # Update the weights
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        lld_reshape = layer_1_delta.reshape(kernel_output.shape)
        k_update = flattened_input.T.dot(lld_reshape)
        kernels -= alpha * k_update

    # Every 50 iterations and the final iteration once finished training.
    if (j % 50 == 0 or j == iterations - 1):

        # Evaluate on the test set.
        test_correct_cnt = 0
        for i in range(int(len(test_images) / batch_size)):
            batch_start, batch_end = ((i * batch_size), ((i+1) * batch_size))
            input, target = test_images[batch_start:batch_end], test_labels[batch_start:batch_end]

            # Forward pass
            layer_0 = input
            layer_0 = layer_0.reshape(layer_0.shape[0],28,28)

            sections = list()

            for row_start in range(layer_0.shape[1] - kernel_rows):
                for col_start in range(layer_0.shape[2] - kernel_cols):
                    section = get_image_section(layer_0,
                                                row_start,
                                                row_start + kernel_rows,
                                                col_start,
                                                col_start + kernel_rows)
                    sections.append(section)

            expanded_input = np.concatenate(sections, axis=1)
            es = expanded_input.shape
            flattened_input = expanded_input.reshape(es[0]*es[1],-1)

            kernel_output = flattened_input.dot(kernels)
            layer_1 = tanh(kernel_output.reshape(es[0],-1))
            # Dropout
            dropout_mask = np.random.randint(2, size=layer_1.shape)
            layer_1 *= dropout_mask * 2
            layer_2 = softmax(np.dot(layer_1, weights_1_2))
            prediction = layer_2

            for k in range(batch_size):
                true_label = test_labels[batch_start+k:batch_start+k+1]
                pred_label = prediction[k:k+1]
                test_correct_cnt += int(np.argmax(pred_label) == np.argmax(true_label))

        print(f"I: {j} \t Training correct: {correct_cnt/float(len(y_train)):.4f} \t Test correct: {test_correct_cnt/float(len(y_test)):.4f}")



I: 0 	 Training correct: 0.0009 	 Test correct: 0.0444
I: 50 	 Training correct: 0.0070 	 Test correct: 0.4642
I: 100 	 Training correct: 0.0122 	 Test correct: 0.7449
I: 150 	 Training correct: 0.0128 	 Test correct: 0.7932
I: 200 	 Training correct: 0.0133 	 Test correct: 0.8115
I: 250 	 Training correct: 0.0132 	 Test correct: 0.8242
I: 299 	 Training correct: 0.0139 	 Test correct: 0.8250
