In [1]:
import numpy as np
import tensorflow as tf

# Question 2: Single Layer Perceptron



In [2]:
# Load the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

# Check the shape of the data
print("Original training images shape:", train_images.shape)
print("Original training labels shape:", train_labels.shape)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Original training images shape: (60000, 28, 28)
Original training labels shape: (60000,)


In [3]:
# --- Preprocess the Data ---

# Normalize the pixel values to be between 0 and 1
# Convert to float32 first to ensure the division results in a float, not an integer.
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

# Reshape the images from (28, 28) to a flat vector of 784 elements
train_images_flat = train_images.reshape(train_images.shape[0], 784)
test_images_flat = test_images.reshape(test_images.shape[0], 784)

# Check the new shape to confirm it's correct
print(f"Original training images shape: {train_images.shape}")
print(f"Reshaped training images shape: {train_images_flat.shape}")
print(f"Reshaped test images shape: {test_images_flat.shape}")

Original training images shape: (60000, 28, 28)
Reshaped training images shape: (60000, 784)
Reshaped test images shape: (10000, 784)


In [6]:
# --- One-Hot Encode the Labels ---
from tensorflow.keras.utils import to_categorical

# Convert labels to one-hot encoding
train_labels_one_hot = to_categorical(train_labels, num_classes=10)
test_labels_one_hot = to_categorical(test_labels, num_classes=10)

# Check the new shape of the labels and look at an example
print("Original training labels shape:", train_labels.shape)
print("One-hot encoded training labels shape:", train_labels_one_hot.shape)

print("\n--- Example ---")
print("Original first label:", train_labels[0])
print("One-hot encoded first label:", train_labels_one_hot[0])

Original training labels shape: (60000,)
One-hot encoded training labels shape: (60000, 10)

--- Example ---
Original first label: 5
One-hot encoded first label: [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


In [7]:
# --- Initialize SLP Parameters ---

# Define hyperparameters
learning_rate = 0.1

# Initialize the weights and biases
# Weights: A matrix of 784 x 10 small random numbers
# Biases: A vector of 10 small random numbers
weights = np.random.rand(784, 10) * 0.1
biases = np.random.rand(10) * 0.1

print("Weights matrix shape:", weights.shape)
print("Biases vector shape:", biases.shape)

Weights matrix shape: (784, 10)
Biases vector shape: (10,)


### Training on small subset (6000 samples)

In [8]:
# --- Prepare for Training ---

# Define the sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Using a smaller subset of the training data for initial runs
num_train_samples = 6000
train_images_small = train_images_flat[:num_train_samples]
train_labels_small = train_labels_one_hot[:num_train_samples]

print("Using a smaller training set for initial runs.")
print("Images shape:", train_images_small.shape)
print("Labels shape:", train_labels_small.shape)

Using a smaller training set for initial runs.
Images shape: (6000, 784)
Labels shape: (6000, 10)


In [9]:
# --- SLP Training Loop ---

# Set the number of epochs
epochs = 10

for epoch in range(epochs):
    print(f"--- Epoch {epoch + 1}/{epochs} ---")
    
    # Loop through each image (x) and its one-hot label (d) in small training set
    for x, d in zip(train_images_small, train_labels_small):
        
        # 1. FORWARD PASS
        # Calculate the output 'y' for all 10 neurons
        
         # Calculate the raw output v 
        raw_output_v = np.dot(x, weights) + biases
         # Apply the sigmoid activation function to get the final output y
        final_output_y = sigmoid(raw_output_v)

        # 2. BACKWARD PASS (Weight and Bias Update)
        # Using the Delta Rule:
        # w(t+1) = w(t) + α[d(t)-y(t)]y(t)[1-y(t)]x(t)
        # b(t+1) = b(t) + α[d(t)-y(t)]y(t)[1-y(t)]

        # Calculate the delta term
        delta = (d - final_output_y) * final_output_y * (1 - final_output_y)

        # Update weights matrix
        weights += learning_rate * np.outer(x, delta)
        # Update biases vector
        biases += learning_rate * delta

    # 3. TRACK PERFORMANCE (at the end of each epoch)

    # Calculate predictions on the small training set
    predictions = sigmoid(np.dot(train_images_small, weights) + biases)

    # Get the class with the highest output value for each image
    predicted_classes = np.argmax(predictions, axis=1)

    # Get the true classes (non-one-hot)
    true_classes = np.argmax(train_labels_small, axis=1)
    
    accuracy = np.mean(predicted_classes == true_classes)
    print(f"Training Accuracy: {accuracy * 100:.2f}%")

--- Epoch 1/10 ---
Training Accuracy: 89.45%
--- Epoch 2/10 ---
Training Accuracy: 90.97%
--- Epoch 3/10 ---
Training Accuracy: 91.70%
--- Epoch 4/10 ---
Training Accuracy: 92.18%
--- Epoch 5/10 ---
Training Accuracy: 92.55%
--- Epoch 6/10 ---
Training Accuracy: 92.73%
--- Epoch 7/10 ---
Training Accuracy: 92.85%
--- Epoch 8/10 ---
Training Accuracy: 93.00%
--- Epoch 9/10 ---
Training Accuracy: 93.27%
--- Epoch 10/10 ---
Training Accuracy: 93.43%


In [10]:
# --- Evaluate the SLP on the Test Set ---

# Perform a forward pass on the full test data using the final trained weights and biases
test_predictions = sigmoid(np.dot(test_images_flat, weights) + biases)

# Get the predicted class for each test image by finding the index of the max output value
test_predicted_classes = np.argmax(test_predictions, axis=1)

# Get the true class for each test image from the one-hot encoded labels
test_true_classes = np.argmax(test_labels_one_hot, axis=1)

# Calculate the accuracy by comparing the predicted classes to the true classes
test_accuracy = np.mean(test_predicted_classes == test_true_classes)

print(f"Final Test Accuracy: {test_accuracy * 100:.2f}%")

Final Test Accuracy: 89.97%


### Training on full subset (60000 samples)

In [11]:
# --- SLP Training Loop ---

# Set the number of epochs
epochs = 10

for epoch in range(epochs):
    print(f"--- Epoch {epoch + 1}/{epochs} ---")
    
    # Loop through each image (x) and its one-hot label (d) in small training set
    for x, d in zip(train_images_flat, train_labels_one_hot):
        
        # 1. FORWARD PASS
        # Calculate the output 'y' for all 10 neurons
        
         # Calculate the raw output v 
        raw_output_v = np.dot(x, weights) + biases
         # Apply the sigmoid activation function to get the final output y
        final_output_y = sigmoid(raw_output_v)

        # 2. BACKWARD PASS (Weight and Bias Update)
        # Using the Delta Rule:
        # w(t+1) = w(t) + α[d(t)-y(t)]y(t)[1-y(t)]x(t)
        # b(t+1) = b(t) + α[d(t)-y(t)]y(t)[1-y(t)]

        # Calculate the delta term
        delta = (d - final_output_y) * final_output_y * (1 - final_output_y)

        # Update weights matrix
        weights += learning_rate * np.outer(x, delta)
        # Update biases vector
        biases += learning_rate * delta

    # 3. TRACK PERFORMANCE (at the end of each epoch)

    # Calculate predictions on the small training set
    predictions = sigmoid(np.dot(train_images_flat, weights) + biases)

    # Get the class with the highest output value for each image
    predicted_classes = np.argmax(predictions, axis=1)

    # Get the true classes (non-one-hot)
    true_classes = np.argmax(train_labels_one_hot, axis=1)
    
    accuracy = np.mean(predicted_classes == true_classes)
    print(f"Training Accuracy: {accuracy * 100:.2f}%")

--- Epoch 1/10 ---
Training Accuracy: 90.13%
--- Epoch 2/10 ---
Training Accuracy: 90.55%
--- Epoch 3/10 ---
Training Accuracy: 90.87%
--- Epoch 4/10 ---
Training Accuracy: 91.09%
--- Epoch 5/10 ---
Training Accuracy: 91.24%
--- Epoch 6/10 ---
Training Accuracy: 91.32%
--- Epoch 7/10 ---
Training Accuracy: 91.42%
--- Epoch 8/10 ---
Training Accuracy: 91.49%
--- Epoch 9/10 ---
Training Accuracy: 91.53%
--- Epoch 10/10 ---
Training Accuracy: 91.57%
