In [1]:
import tensorflow as tf
import numpy as np

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Preprocess dataset
x_train, x_test = x_train.reshape(-1, 28*28).astype(np.float32) / 255.0, x_test.reshape(-1, 28*28).astype(np.float32) / 255.0
y_train, y_test = tf.one_hot(y_train, depth=10), tf.one_hot(y_test, depth=10)

# Define network parameters
input_size = 784  # 28x28 pixels
hidden_size1 = 128
hidden_size2 = 64
output_size = 10  # Digits 0-9
learning_rate = 0.01
epochs = 20
batch_size = 128

# Initialize weights and biases
weights = {
    "W1": tf.Variable(tf.random.normal([input_size, hidden_size1], stddev=0.1)),
    "W2": tf.Variable(tf.random.normal([hidden_size1, hidden_size2], stddev=0.1)),
    "W3": tf.Variable(tf.random.normal([hidden_size2, output_size], stddev=0.1)),
}

biases = {
    "b1": tf.Variable(tf.zeros([hidden_size1])),
    "b2": tf.Variable(tf.zeros([hidden_size2])),
    "b3": tf.Variable(tf.zeros([output_size])),
}

# Define feed-forward function
def forward_propagation(x):
    z1 = tf.matmul(x, weights["W1"]) + biases["b1"]
    a1 = tf.nn.relu(z1)
    
    z2 = tf.matmul(a1, weights["W2"]) + biases["b2"]
    a2 = tf.nn.relu(z2)
    
    z3 = tf.matmul(a2, weights["W3"]) + biases["b3"]
    output = tf.nn.softmax(z3)  # Softmax for classification
    return output

# Loss function (Cross-entropy)
def compute_loss(y_pred, y_true):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true))

# Training function with backpropagation
def train_step(x_batch, y_batch):
    with tf.GradientTape() as tape:
        y_pred = forward_propagation(x_batch)
        loss = compute_loss(y_pred, y_batch)
    
    # Compute gradients
    gradients = tape.gradient(loss, list(weights.values()) + list(biases.values()))
    
    # Update weights and biases using Gradient Descent
    for i, key in enumerate(weights.keys()):
        weights[key].assign_sub(learning_rate * gradients[i])
    for i, key in enumerate(biases.keys()):
        biases[key].assign_sub(learning_rate * gradients[len(weights) + i])
    
    return loss

# Training loop
num_batches = x_train.shape[0] // batch_size
for epoch in range(epochs):
    avg_loss = 0
    for i in range(num_batches):
        batch_x = x_train[i * batch_size:(i + 1) * batch_size]
        batch_y = y_train[i * batch_size:(i + 1) * batch_size]
        loss = train_step(batch_x, batch_y)
        avg_loss += loss / num_batches

    print(f"Epoch {epoch+1}, Loss: {avg_loss.numpy():.4f}")

# Evaluation
y_pred_test = forward_propagation(x_test)
correct_predictions = tf.equal(tf.argmax(y_pred_test, axis=1), tf.argmax(y_test, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))

print(f"Test Accuracy: {accuracy.numpy() * 100:.2f}%")


Epoch 1, Loss: 2.2877
Epoch 2, Loss: 2.2343
Epoch 3, Loss: 2.1234
Epoch 4, Loss: 1.9871
Epoch 5, Loss: 1.8813
Epoch 6, Loss: 1.8269
Epoch 7, Loss: 1.7605
Epoch 8, Loss: 1.6965
Epoch 9, Loss: 1.6628
Epoch 10, Loss: 1.6416
Epoch 11, Loss: 1.6268
Epoch 12, Loss: 1.6157
Epoch 13, Loss: 1.6071
Epoch 14, Loss: 1.6002
Epoch 15, Loss: 1.5945
Epoch 16, Loss: 1.5896
Epoch 17, Loss: 1.5855
Epoch 18, Loss: 1.5819
Epoch 19, Loss: 1.5788
Epoch 20, Loss: 1.5760
Test Accuracy: 90.98%
