In [1]:
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Dense, Flatten
from keras.models import Sequential
from keras.utils import to_categorical
from keras.datasets import mnist

In [2]:
# Load MNIST handwritten digit data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [15]:

# Load and preprocess data
(train_X, train_y), (test_X, test_y) = mnist.load_data()
train_X = train_X.reshape(train_X.shape[0], -1) / 255.0
test_X = test_X.reshape(test_X.shape[0], -1) / 255.0
train_y_one_hot = np.eye(10)[train_y]
test_y_one_hot = np.eye(10)[test_y]

# Split training data into training and validation sets
split_idx = int(0.8 * train_X.shape[0])
train_X, val_X = train_X[:split_idx], train_X[split_idx:]
train_y_one_hot, val_y_one_hot = train_y_one_hot[:split_idx], train_y_one_hot[split_idx:]

# Activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def softmax(x):
    exps = np.exp(x - np.max(x, axis=0, keepdims=True))
    return exps / np.sum(exps, axis=0, keepdims=True)

# Initialize parameters
input_size = 784  # 28x28 pixels flattened
hidden_size = 128  # Number of hidden neurons
output_size = 10  # 10 classes for digits 0-9
w_i_h = np.random.randn(hidden_size, input_size) * 0.01
w_h_o = np.random.randn(output_size, hidden_size) * 0.01
b_i_h = np.zeros((hidden_size, 1))
b_h_o = np.zeros((output_size, 1))

# Forward and backward propagation for batches
def forward_backward_propagate(x_batch, y_batch, w_i_h, b_i_h, w_h_o, b_h_o):
    # Forward pass
    hidden_input = np.dot(w_i_h, x_batch) + b_i_h
    hidden_output = sigmoid(hidden_input)
    final_input = np.dot(w_h_o, hidden_output) + b_h_o
    final_output = softmax(final_input)

    # Compute error and cost (Cross-entropy loss)
    m = x_batch.shape[1]
    error = final_output - y_batch
    cost = 0.5 * np.sum(error ** 2)

    # # Backward pass
    delta_output = final_output - y_batch
    delta_hidden = np.dot(w_h_o.T, delta_output) * sigmoid_derivative(hidden_input)

    # Gradient for weights and biases
    grad_w_h_o = np.dot(delta_output, hidden_output.T)
    grad_b_h_o = np.sum(delta_output, axis=1, keepdims=True)
    grad_w_i_h = np.dot(delta_hidden, x_batch.T)
    grad_b_i_h = np.sum(delta_hidden, axis=1, keepdims=True)

    # Update weights and biases
    w_h_o -= learn_rate * grad_w_h_o
    b_h_o -= learn_rate * grad_b_h_o
    w_i_h -= learn_rate * grad_w_i_h
    b_i_h -= learn_rate * grad_b_i_h

    return cost

def calculate_accuracy(predictions, labels):
    return np.mean(np.argmax(predictions, axis=0) == np.argmax(labels, axis=0))

def predict(x, w_i_h, b_i_h, w_h_o, b_h_o):
    # Perform a forward pass to get predictions
    hidden_input = np.dot(w_i_h, x) + b_i_h
    hidden_output = sigmoid(hidden_input)
    final_input = np.dot(w_h_o, hidden_output) + b_h_o
    final_output = softmax(final_input)
    return final_output

# Training the model with mini-batches
epochs = 100
learn_rate = 0.01
batch_size = 64
for epoch in range(epochs):
    train_loss, val_loss = 0, 0
    train_acc, val_acc = 0, 0

    # Training phase
    for i in range(0, train_X.shape[0], batch_size):
        x_batch = train_X[i:i + batch_size].T
        y_batch = train_y_one_hot[i:i + batch_size].T
        cost = forward_backward_propagate(x_batch, y_batch, w_i_h, b_i_h, w_h_o, b_h_o)
        train_loss += cost
        train_acc += calculate_accuracy(predict(x_batch, w_i_h, b_i_h, w_h_o, b_h_o), y_batch)

    # Validation phase
    val_predictions = predict(val_X.T, w_i_h, b_i_h, w_h_o, b_h_o)
    val_loss = -np.sum(val_y_one_hot * np.log(val_predictions.T + 1e-8)) / val_X.shape[0]
    val_acc = calculate_accuracy(val_predictions, val_y_one_hot.T)

    # Logging
    print(f"Epoch {epoch+1}/100")
    print(f"3/3 [==============================] - 0s 18ms/step - loss: {train_loss/train_X.shape[0]:.4f} - accuracy: {train_acc/(train_X.shape[0]//batch_size):.4f} - val_loss: {val_loss:.4f} - val_accuracy: {val_acc:.4f}")

# Evaluate the model on test data
test_predictions = predict(test_X.T, w_i_h, b_i_h, w_h_o, b_h_o)
test_accuracy = calculate_accuracy(test_predictions, test_y_one_hot.T)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [16]:
def predict_single_example(x, w_i_h, b_i_h, w_h_o, b_h_o):
    # Perform a forward pass to get predictions
    hidden_input = np.dot(w_i_h, x) + b_i_h
    hidden_output = sigmoid(hidden_input)
    final_input = np.dot(w_h_o, hidden_output) + b_h_o
    final_output = softmax(final_input)

    # Predict the class (index of the highest probability)
    predicted_class = np.argmax(final_output)

    return predicted_class

# Predict classes for all test examples
test_predicted_classes = []
for i in range(len(test_X)):
    x_test_single = test_X[i].reshape(-1, 1)  # Reshape for a single example
    predicted_class = predict_single_example(x_test_single, w_i_h, b_i_h, w_h_o, b_h_o)
    test_predicted_classes.append(predicted_class)
    print(f"Test example {i+1}: Predicted class: {predicted_class}")

# Evaluate the model on test data
test_accuracy = calculate_accuracy(np.eye(10)[test_predicted_classes].T, test_y_one_hot.T)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Test example 5002: Predicted class: 9
Test example 5003: Predicted class: 9
Test example 5004: Predicted class: 8
Test example 5005: Predicted class: 4
Test example 5006: Predicted class: 1
Test example 5007: Predicted class: 0
Test example 5008: Predicted class: 6
Test example 5009: Predicted class: 0
Test example 5010: Predicted class: 9
Test example 5011: Predicted class: 6
Test example 5012: Predicted class: 8
Test example 5013: Predicted class: 6
Test example 5014: Predicted class: 1
Test example 5015: Predicted class: 1
Test example 5016: Predicted class: 9
Test example 5017: Predicted class: 8
Test example 5018: Predicted class: 9
Test example 5019: Predicted class: 2
Test example 5020: Predicted class: 3
Test example 5021: Predicted class: 5
Test example 5022: Predicted class: 5
Test example 5023: Predicted class: 9
Test example 5024: Predicted class: 4
Test example 5025: Predicted class: 2
Test example 5026: Pred

In [17]:
from sklearn.metrics import confusion_matrix

# Convert one-hot encoded test predictions to class labels
test_pred_labels = np.argmax(test_predictions, axis=0)

# Convert one-hot encoded true labels to class labels
test_true_labels = np.argmax(test_y_one_hot, axis=1)

# Calculate the confusion matrix
conf_matrix = confusion_matrix(test_true_labels, test_pred_labels)

print("Confusion Matrix:")
print(conf_matrix)


Confusion Matrix:
[[ 969    1    1    2    1    1    3    1    1    0]
 [   0 1126    2    0    0    1    2    1    2    1]
 [   3    0 1012    2    2    0    3    5    5    0]
 [   0    1    4  991    0    2    0    5    3    4]
 [   1    0    1    0  959    0    5    1    1   14]
 [   4    0    0   11    1  861    5    1    5    4]
 [   7    3    0    1    3    5  936    0    3    0]
 [   0    3    6    2    0    1    0 1007    3    6]
 [   3    0    5    5    3    2    1    3  950    2]
 [   2    2    0    5    8    3    0    5    2  982]]


In [18]:
def calculate_confusion_matrix(true_labels, predicted_labels, num_classes=10):

    conf_matrix = np.zeros((num_classes, num_classes), dtype=int)

    # Fill the confusion matrix
    for true_label, predicted_label in zip(true_labels, predicted_labels):
        conf_matrix[true_label, predicted_label] += 1

    return conf_matrix

conf_matrix = calculate_confusion_matrix(test_true_labels, test_pred_labels)
print(conf_matrix)


[[ 969    1    1    2    1    1    3    1    1    0]
 [   0 1126    2    0    0    1    2    1    2    1]
 [   3    0 1012    2    2    0    3    5    5    0]
 [   0    1    4  991    0    2    0    5    3    4]
 [   1    0    1    0  959    0    5    1    1   14]
 [   4    0    0   11    1  861    5    1    5    4]
 [   7    3    0    1    3    5  936    0    3    0]
 [   0    3    6    2    0    1    0 1007    3    6]
 [   3    0    5    5    3    2    1    3  950    2]
 [   2    2    0    5    8    3    0    5    2  982]]
