In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:

# Load datasets
train_data = pd.read_csv('MNIST-train.csv')

# Shuffle the training data
train_data = train_data.sample(frac=1).reset_index(drop=True)

# Separate features and labels
X_train = train_data.iloc[:, :-1].values  # Features (all columns except the last one)
y_train = train_data.iloc[:, -1].values  # Labels (last column)

# Split the training data into training and validation sets (90% training, 10% validation)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

# Normalize the data
X_train = X_train / 255.0
X_val = X_val / 255.0

# transpose to match the format
X_train = X_train.T
X_val = X_val.T


print("Training set:", X_train.shape, y_train.shape)
print("Validation set:", X_val.shape, y_val.shape)


In [None]:

class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.rand(hidden_size, input_size) - 0.5
        self.b1 = np.random.rand(hidden_size, 1) - 0.5
        self.W2 = np.random.rand(output_size, hidden_size) - 0.5
        self.b2 = np.random.rand(output_size, 1) - 0.5

    def ReLU(self, Z):
        return np.maximum(Z, 0)

    def softmax(self, Z):
        A = np.exp(Z) / np.sum(np.exp(Z), axis=0, keepdims=True)
        return A

    def forward_prop(self, X):
        Z1 = self.W1.dot(X) + self.b1
        A1 = self.ReLU(Z1)
        Z2 = self.W2.dot(A1) + self.b2
        A2 = self.softmax(Z2)
        return Z1, A1, Z2, A2

    def ReLU_deriv(self, Z):
        return Z > 0

    def one_hot(self, Y):
        one_hot_Y = np.zeros((Y.size, Y.max() + 1))
        one_hot_Y[np.arange(Y.size), Y] = 1
        one_hot_Y = one_hot_Y.T
        return one_hot_Y

    def backward_prop(self, Z1, A1, Z2, A2, X, Y):
        one_hot_Y = self.one_hot(Y)
        m = X.shape[1]
        dZ2 = A2 - one_hot_Y
        dW2 = 1 / m * dZ2.dot(A1.T)
        db2 = 1 / m * np.sum(dZ2, axis=1, keepdims=True)
        dZ1 = self.W2.T.dot(dZ2) * self.ReLU_deriv(Z1)
        dW1 = 1 / m * dZ1.dot(X.T)
        db1 = 1 / m * np.sum(dZ1, axis=1, keepdims=True)
        return dW1, db1, dW2, db2

    def update_params(self, dW1, db1, dW2, db2, alpha):
        self.W1 -= alpha * dW1
        self.b1 -= alpha * db1
        self.W2 -= alpha * dW2
        self.b2 -= alpha * db2

    def get_predictions(self, A2):
        return np.argmax(A2, 0)

    def get_accuracy(self, predictions, Y):
        return np.sum(predictions == Y) / Y.size


    def fit(self, X, Y, alpha, iterations):
     
        for i in range(iterations):
            Z1, A1, Z2, A2 = self.forward_prop(X)
            dW1, db1, dW2, db2 = self.backward_prop(Z1, A1, Z2, A2, X, Y)
            self.update_params(dW1, db1, dW2, db2, alpha)
            if i % 10 == 0:
                predictions = self.get_predictions(A2)
                accuracy = self.get_accuracy(predictions, Y)
                print(f"Iteration: {i}, Accuracy: {accuracy}")

    def predict(self, X):
        _, _, _, A2 = self.forward_prop(X)
        predictions = self.get_predictions(A2)
        return predictions


# train the neural network
nn = SimpleNeuralNetwork(input_size=784, hidden_size=16, output_size=10)
nn.fit(X_train, y_train, alpha=0.1, iterations=500)

# Function to test a prediction
def test_prediction(index, nn, X, Y):
    current_image = X[:, index, None]
    prediction = nn.predict(current_image)
    label = Y[index]
    print("Prediction: ", prediction)
    print("Label: ", label)

    current_image = current_image.reshape((28, 28)) * 255
    plt.imshow(current_image, cmap='gray', interpolation='nearest')
    plt.show()

# Test predictions
test_prediction(20, nn, X_train, y_train)
test_prediction(0, nn, X_train, y_train)
test_prediction(5, nn, X_train, y_train)
test_prediction(12, nn, X_train, y_train)
