Option 1

In [None]:
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], -1) / 255.0
X_test = X_test.reshape(X_test.shape[0], -1) / 255.0
y_train_oh = to_categorical(y_train, num_classes=10)
y_test_oh = to_categorical(y_test, num_classes=10)

input_size = X_train.shape[1] 
output_size = 10

np.random.seed(42)
W = np.random.randn(input_size, output_size) * 0.01
b = np.zeros((1, output_size))

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def forward(X, W, b):
    z = X.dot(W) + b
    return softmax(z)

def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-9)) / m

learning_rate = 0.1
epochs = 100

for epoch in range(epochs):
    y_pred = forward(X_train, W, b)
    loss = cross_entropy_loss(y_train_oh, y_pred)

    m = X_train.shape[0]
    dz = (y_pred - y_train_oh) / m
    dW = X_train.T.dot(dz)
    db = np.sum(dz, axis=0, keepdims=True)

    W -= learning_rate * dW
    b -= learning_rate * db

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

y_test_pred = forward(X_test, W, b)
y_test_labels = np.argmax(y_test_pred, axis=1)
accuracy = np.mean(y_test_labels == y_test)
print(f"Test set accuracy: {accuracy:.4f}")


Epoch 10/100, Loss: 1.6152
Epoch 20/100, Loss: 1.2221
Epoch 30/100, Loss: 1.0147
Epoch 40/100, Loss: 0.8897
Epoch 50/100, Loss: 0.8063
Epoch 60/100, Loss: 0.7464
Epoch 70/100, Loss: 0.7011
Epoch 80/100, Loss: 0.6654
Epoch 90/100, Loss: 0.6365
Epoch 100/100, Loss: 0.6126
Test set accuracy: 0.8706


Test set accuracy: 87.06%

Given the more robust network and utilization of vectorization, the greater accuracy regarding the test set provides a clear benefit to the usages of vectorization along with the single layer network.