In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

In [2]:
# Load and prepare data
(x_train, y_train), _ = mnist.load_data()
x_train = x_train.reshape(-1, 784).astype(np.float32) / 255.0
y_train = to_categorical(y_train, 10)

In [3]:
# Parameters
input_size = 784
hidden_size = 128
output_size = 10
batch_size = 64
epochs = 7
learning_rate = 0.01

In [4]:
# Initialize weights (same sizes as in predictive coding)
tf.random.set_seed(42)
W1 = tf.Variable(tf.random.normal((input_size, hidden_size), stddev=0.01))  # notice: input x W1
b1 = tf.Variable(tf.zeros([hidden_size]))

W2 = tf.Variable(tf.random.normal((hidden_size, output_size), stddev=0.01))
b2 = tf.Variable(tf.zeros([output_size]))

In [5]:
# Activation functions
def relu(x):
    return tf.maximum(0, x)

def softmax(x):
    e_x = tf.exp(x - tf.reduce_max(x, axis=1, keepdims=True))
    return e_x / (tf.reduce_sum(e_x, axis=1, keepdims=True) + 1e-9)

In [6]:
# Training loop
for epoch in range(epochs):
    total_correct = 0
    total_samples = 0

    for i in range(0, x_train.shape[0], batch_size):
        x_batch = x_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]

        with tf.GradientTape() as tape:
            # Forward pass
            z1 = tf.matmul(x_batch, W1) + b1        # shape: [batch, hidden]
            a1 = relu(z1)                           # apply ReLU
            z2 = tf.matmul(a1, W2) + b2             # shape: [batch, output]
            y_pred = softmax(z2)                    # output probabilities

            # Loss (categorical cross-entropy)
            loss = tf.reduce_mean(
                tf.keras.losses.categorical_crossentropy(y_batch, y_pred)
            )

        # Backward pass (automatic differentiation)
        grads = tape.gradient(loss, [W1, b1, W2, b2])

        # Update weights
        W1.assign_sub(learning_rate * grads[0])
        b1.assign_sub(learning_rate * grads[1])
        W2.assign_sub(learning_rate * grads[2])
        b2.assign_sub(learning_rate * grads[3])

        # Accuracy
        preds = tf.argmax(y_pred, axis=1).numpy()
        labels = tf.argmax(y_batch, axis=1).numpy()
        acc = np.mean(preds == labels)
        total_correct += acc * x_batch.shape[0]
        total_samples += x_batch.shape[0]

    print(f"Epoch {epoch+1}: Accuracy = {(total_correct / total_samples) * 100:.2f}%")

Epoch 1: Accuracy = 57.33%
Epoch 2: Accuracy = 83.17%
Epoch 3: Accuracy = 87.67%
Epoch 4: Accuracy = 89.14%
Epoch 5: Accuracy = 89.99%
Epoch 6: Accuracy = 90.59%
Epoch 7: Accuracy = 91.04%
