## Name: Ziad Wael Abdl Hamed        ID: 20200211
## Name: Mazen Mohamed Kamal         ID: 20200411


In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

In [2]:
def load_mnist():
    # Load MNIST dataset
    mnist = fetch_openml('mnist_784', version=1, cache=True, as_frame=False,parser='auto')
    X = mnist.data
    y = mnist.target
    return X, y

In [3]:
def standardize_data(X):
    # Standardize the dataset
    x_mean = np.mean(X, axis=0)
    x_std = np.std(X, axis=0)
    x_std[x_std == 0] = 1  # avoid division by zero
    X_standardized = (X - x_mean) / x_std
    return X_standardized

In [4]:
def divide_data(X, y, test_size=0.2):
    # Divide data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    return X_train, X_test, y_train, y_test


In [None]:
def softMax(x):
    # Softmax function
    return np.exp(x) / np.sum(np.exp(x))

In [5]:
def one_hot_encoding(y):
    # Perform one-hot encoding
    num_classes = len(np.unique(y))
    num_samples = len(y)
    encoded = np.zeros((num_samples, num_classes))
    for i in range(num_samples):
        encoded[i, int(y[i])] = 1
    return encoded

In [6]:
def initialize_weights(size_of_layers):
    # Initialize weights using Xavier initialization
    num_layers = len(size_of_layers)
    
    weights = []
    for i in range(num_layers - 1):
        weight_matrix = np.random.randn(size_of_layers[i], size_of_layers[i+1])
        weights.append(weight_matrix)
    return weights

In [7]:
def initialize_biases(size_of_layers):
    # Initialize biases with zeros
    num_layers = len(size_of_layers)
    biases = []
    for i in range(num_layers - 1):
        bias_vector = np.random.randn(size_of_layers[i+1])
        biases.append(bias_vector)
    return biases

In [21]:
def sigmoid(x):
    # Sigmoid activation function
    return 1 / (1 + np.exp(-x))

In [9]:
def forward_pass(X, weights, biases):
    # Forward pass through the network
    num_layers = len(weights) + 1
    layer_outputs = [X]
    for i in range(num_layers - 1):
        layer_input = np.dot(layer_outputs[i], weights[i]) + biases[i]
        layer_output = sigmoid(layer_input)
        layer_outputs.append(layer_output)

    return layer_outputs

In [10]:
def calculate_loss(y_true, y_pred):
    # Calculate Mean Squared Error loss
    loss = np.mean((y_true - y_pred) ** 2)
    return loss

In [11]:
def backpropagation(x, y, layer_outputs, weights, biases, learning_rate):
    num_layers = len(weights)
    errors = [layer_outputs[-1] - y]
    for i in range(num_layers-1, -1, -1):
        d_weights = np.dot(layer_outputs[i].T, errors[-1])
        weights[i] -= learning_rate * d_weights
        biases[i] -= learning_rate * np.mean(errors[-1])
        errors.append(np.dot(errors[-1], weights[i].T) * layer_outputs[i] * (1 - layer_outputs[i]))

    return weights, biases

In [12]:
def train_neural_network(X_train, y_train, num_of_layers, size_of_layers, learning_rate, num_epochs):
    weights = initialize_weights(size_of_layers)
    biases = initialize_biases(size_of_layers)

    for epoch in range(num_epochs):
        layer_outputs = forward_pass(X_train, weights, biases)
        weights, biases = backpropagation(X_train, y_train, layer_outputs, weights, biases, learning_rate)

    return weights, biases


In [13]:
def predict(X, weights, biases):
    layer_outputs = forward_pass(X, weights, biases)
    predictions = np.argmax(layer_outputs[-1], axis=1)
    return predictions

In [14]:
def calculate_accuracy(predictions, targets):
    # Calculate accuracy
    correct = np.sum(predictions == targets)
    total = len(predictions)
    accuracy = (correct / total) * 100
    return accuracy

In [15]:
def NN(x, y, num_of_layers, size_of_layers, learning_rate=0.01, num_epochs=100):
    X_standardized = standardize_data(x)
    X_train, X_test, y_train, y_test = divide_data(X_standardized, y)
    y_train_encoded = one_hot_encoding(y_train)
    y_test_encoded = one_hot_encoding(y_test)  
    size_of_layers.insert(0, X_train.shape[1])  # add input layer size to the beginning of the list
    weights, biases = train_neural_network(X_train, y_train_encoded, num_of_layers, size_of_layers, learning_rate, num_epochs)

    train_predictions = predict(X_train, weights, biases)
    test_predictions = predict(X_test, weights, biases)

    train_accuracy = calculate_accuracy(train_predictions, y_train_encoded.argmax(axis=1))
    test_accuracy = calculate_accuracy(test_predictions, y_test_encoded.argmax(axis=1))

    return train_accuracy, test_accuracy

In [16]:
# Load MNIST dataset
X, y = load_mnist()

In [29]:
# Test different architectures
architectures = [
    (2, [20, 10]),
    (3, [5, 10, 10]),
    (3, [20, 10, 5])
]


In [28]:
for i, (num_layers, size_of_layers) in enumerate(architectures):
    print(f"Architecture {i+1}")
    train_accuracy, test_accuracy = NN(X, y, num_layers, size_of_layers)
    print(f"Test Accuracy: {test_accuracy}")

Architecture 1
Test Accuracy: 85.2%
Architecture 2
Test Accuracy: 87.1%
Architecture 3
Test Accuracy: 88.2%
