<a href="https://colab.research.google.com/github/woodRock/grokking-deep-learning/blob/main/chapter_6_building_your_first_deep_neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Chapter 6 - Building your first deep neural network

In [None]:
import numpy as np

np.random.seed(1)

def sigmoid(x):
    """
    The sigmoid activation function.

    Args:
        x (tensor): the input.

    Returns:
        (tensor): the sigmoid of the input.

    References:
        1. Cybenko, G. (1989).
           Approximation by superpositions of a sigmoidal function.
           Mathematics of control, signals and systems, 2(4), 303-314.
    """
    return 1 / (1 + np.exp(-x))

def sigmoid2deriv(x):
    """
    The first derivative of the sigmoid function wrt x

    Args:
        x (tensor): the input.

    Returns:
        (tensor): the derivite of sigmoid wrt x.
    """
    return x * (1 - x)

def relu(x):
    """ ReLU activation function.

    Args:
      x (tensor): input

    Returns:
      tensor: ReLU activation function.

    References:
      1. Nair, V., & Hinton, G. E. (2010).
         Rectified linear units improve restricted boltzmann machines.
         In Proceedings of the 27th international conference on
         machine learning (ICML-10) (pp. 807-814).
    """
    return (x > 0) * x

def relu2deriv(output):
    """ Derivative of ReLU activation function.

    Args:
      output (tensor): input

    Returns:
      tensor: derivative of the ReLU activation function.
    """
    return output > 0

dataset = "XOR"

# Create the dataset
X, y = None, None
if dataset == "XOR":
    X = np.array([[0,0],
              [0,1],
              [1,0],
              [1,1]])

    y = np.array([[0,1,1,0]]).T
elif dataset == "traffic":
    X = np.array([[1,0,1],
                [0,1,1],
                [0,0,1],
                [1,1,1]])

    y = np.array([[1,1,0,0]]).T

# Hyperparameters
alpha = 0.2
input_dim = X.shape[1]
hidden_size = 4
num_hidden = 0
output_dim = y.shape[1]
epochs = None

activation, activation2deriv = None, None
if dataset == "XOR":
    activation = sigmoid
    activation2deriv = sigmoid2deriv
    epochs = 1_000
else:
    activation = relu
    activation2deriv = relu2deriv
    epochs = 60

# Intialize the neural network.
layers = []
input_layer = 2 * np.random.random((input_dim, hidden_size)) - 1
output_layer = 2 * np.random.random((hidden_size, output_dim)) - 1
hidden = [(2 * np.random.random((hidden_size, hidden_size)) - 1) for _ in range(num_hidden)]
layers.append(input_layer)
layers.extend(hidden)
layers.append(output_layer)

predictions = None

# Training loop
for iteration in range(epochs):
    loss = 0
    predictions = []
    for i in range(len(X)):
        layer = [0] * (len(layers) + 1)
        input, target = X[i:i+1], y[i:i+1]

        # Get the predictions.
        for i, _ in enumerate(range(len(layer))):
            if i == 0:
                layer[i] = input
            elif i == len(layer) - 1:
                layer[i] = np.dot(layer[i-1], layers[i-1])
            else:
                layer[i] = activation(np.dot(layer[i-1], layers[i-1]))

        prediction = layer[-1]
        predictions.append(prediction)

        loss += np.sum((prediction - target) ** 2)

        # Calculate the deltas
        deltas = [0] * (len(layers) + 1)
        for i in reversed(range(len(deltas))):
            if i == len(layer) - 1:
                deltas[i] = (prediction - target)
            else:
                deltas[i] = deltas[i+1].dot(layers[i].T) * activation2deriv(layer[i])

        # Update the weights.
        for i in reversed(range(len(deltas) - 1)):
            layers[i] -= alpha * layer[i].T.dot(deltas[i+1])

    # Telemetry data from training.
    if (iteration % 10 == 9):
        print(f"Error: {loss / len(X)}")

print(f"Final prediction: {predictions}")

if dataset == "traffic":
    # For the traffic light problem with 60 epochs of training.
    assert loss == 1.5055622665134859e-05

Error: 0.2947385072776239
Error: 0.29330455333141053
Error: 0.2917345572239876
Error: 0.2900972952944217
Error: 0.2883489700271515
Error: 0.28644081285187584
Error: 0.2843127714463367
Error: 0.2818881944330225
Error: 0.27906964880477764
Error: 0.27573604235999793
Error: 0.27174185096284664
Error: 0.2669202053092612
Error: 0.2610925801332521
Error: 0.25408807424776814
Error: 0.24577354330833157
Error: 0.23609115642571044
Error: 0.22509317091769848
Error: 0.21295899006991653
Error: 0.19998249785113123
Error: 0.1865295919308148
Error: 0.1729799380865851
Error: 0.1596731867279794
Error: 0.1468746410336888
Error: 0.13476420217254448
Error: 0.12344334054602568
Error: 0.11295152436170765
Error: 0.10328478623559728
Error: 0.09441207461406893
Error: 0.08628769509231815
Error: 0.07885979749794902
Error: 0.0720756106835031
Error: 0.06588430978410661
Error: 0.060238314662885706
Error: 0.055093642143192144
Error: 0.05040975450581798
Error: 0.04614919500773203
Error: 0.04227718501395385
Error: 0.038

# MNIST Binary Classification between 6 and 9

In [38]:
import numpy as np
from tqdm import tqdm

np.random.seed(1)

def sigmoid(x):
    """
    The sigmoid activation function.

    Args:
        x (tensor): the input.

    Returns:
        (tensor): the sigmoid of the input.

    References:
        1. Cybenko, G. (1989).
           Approximation by superpositions of a sigmoidal function.
           Mathematics of control, signals and systems, 2(4), 303-314.
    """
    return 1 / (1 + np.exp(-x))

def sigmoid2deriv(x):
    """
    The first derivative of the sigmoid function wrt x

    Args:
        x (tensor): the input.

    Returns:
        (tensor): the derivite of sigmoid wrt x.
    """
    return x * (1 - x)

def relu(x):
    """ ReLU activation function.

    Args:
      x (tensor): input

    Returns:
      tensor: ReLU activation function.

    References:
      1. Nair, V., & Hinton, G. E. (2010).
         Rectified linear units improve restricted boltzmann machines.
         In Proceedings of the 27th international conference on
         machine learning (ICML-10) (pp. 807-814).
    """
    return (x > 0) * x

def relu2deriv(output):
    """ Derivative of ReLU activation function.

    Args:
      output (tensor): input

    Returns:
      tensor: derivative of the ReLU activation function.
    """
    return output > 0

import tensorflow as tf
import numpy as np

# Download MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()


# Create the dataset

def create_binary_dataset(x, y, class_1, class_2):
    """ Function to create binary classification dataset.

    Args:
      x (tensor): the features.
      y (tensor): the class labels.
      class_1 (int): the first class.
      class_2 (int): the second class.

    Returns:
      x_binary (tensor): the binary features.
      y_binary (tensor): the binary class labels.
    """
    # Get indices of the two classes
    idx = np.where((y == class_1) | (y == class_2))
    x_binary = x[idx]
    y_binary = y[idx]

    # Convert labels to binary (0 and 1)
    y_binary = np.where(y_binary == class_1, 0, 1)

    return x_binary, y_binary

# Create binary datasets for 6 and 9
x_train_binary, y_train_binary = create_binary_dataset(x_train, y_train, 6, 9)
x_test_binary, y_test_binary = create_binary_dataset(x_test, y_test, 6, 9)

# Normalize pixel values
x_train_binary = x_train_binary.astype('float32') / 255
x_test_binary = x_test_binary.astype('float32') / 255

X, y = x_train_binary, y_train_binary
y = np.array([y]).T
X = X.flatten().reshape(len(X), 28*28)

# Hyperparameters
alpha = 0.2
input_dim = X.shape[1]
hidden_size = 4
num_hidden = 0
output_dim = y.shape[1]
activation = sigmoid
activation2deriv = sigmoid2deriv
epochs = 100

# Intialize the neural network.
layers = []
input_layer = 2 * np.random.random((input_dim, hidden_size)) - 1
output_layer = 2 * np.random.random((hidden_size, output_dim)) - 1
hidden = [(2 * np.random.random((hidden_size, hidden_size)) - 1) for _ in range(num_hidden)]
layers.append(input_layer)
layers.extend(hidden)
layers.append(output_layer)

predictions = None

# Training loop
for iteration in (pbar := tqdm(range(epochs), desc="Training MLP")):
    loss = 0
    predictions = []
    for i in range(len(X)):
        layer = [0] * (len(layers) + 1)
        input, target = X[i:i+1], y[i:i+1]

        # Forward pass
        for i, _ in enumerate(range(len(layer))):
            if i == 0:
                layer[i] = input
            elif i == len(layer) - 1:
                layer[i] = np.dot(layer[i-1], layers[i-1])
            else:
                layer[i] = activation(np.dot(layer[i-1], layers[i-1]))

        prediction = layer[-1]
        predictions.append(prediction)

        loss += np.sum((prediction - target) ** 2)

        # Back propagation
        deltas = [0] * (len(layers) + 1)
        for i in reversed(range(len(deltas))):
            if i == len(layer) - 1:
                deltas[i] = (prediction - target)
            else:
                deltas[i] = deltas[i+1].dot(layers[i].T) * activation2deriv(layer[i])

        # Update the weights.
        for i in reversed(range(len(deltas) - 1)):
            layers[i] -= alpha * layer[i].T.dot(deltas[i+1])

    # Telemetry data from training.
    if (iteration % 10 == 9):
        pbar.set_description(f"Error: {loss / len(X)}")

print(f"Final prediction: {predictions}")

def balanced_accuracy(y_true, y_pred):
    from sklearn.metrics import balanced_accuracy_score
    return balanced_accuracy_score(y_true, y_pred)

pred = [1 if x > 0.5 else 0 for x in predictions]
balanced_accuracy(pred, y)

Error: 0.00011943341677543656: 100%|██████████| 100/100 [01:26<00:00,  1.16it/s]


Final prediction: [array([[1.00008327]]), array([[6.84823237e-07]]), array([[4.95657673e-05]]), array([[1.00007398]]), array([[1.00016431]]), array([[1.47277291e-05]]), array([[1.00001205]]), array([[4.45433719e-08]]), array([[2.25393679e-05]]), array([[0.99972188]]), array([[1.00005133]]), array([[0.99914852]]), array([[1.00017342]]), array([[1.00023351]]), array([[6.57509778e-07]]), array([[2.00805187e-06]]), array([[4.17582588e-07]]), array([[0.84540462]]), array([[6.42182467e-06]]), array([[1.02626935]]), array([[1.24791494e-07]]), array([[0.00011726]]), array([[-2.96509572e-06]]), array([[1.02103351]]), array([[1.01682924]]), array([[1.10294698e-06]]), array([[1.81267709e-05]]), array([[1.0134515]]), array([[1.2827868e-08]]), array([[5.56458575e-08]]), array([[1.01015902]]), array([[1.008377]]), array([[3.07783672e-05]]), array([[1.00701868]]), array([[1.50387098e-08]]), array([[1.00563642]]), array([[1.00453504]]), array([[1.00340429]]), array([[1.00284974]]), array([[1.00223877]

0.9999155262713296