In [6]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# loading dataset
X_train = pd.read_csv(r"Task_2\x_train.csv").values  # features
y_train = pd.read_csv(r"Task_2\y_train.csv").values  # labels

X_test = pd.read_csv(r"Task_2\x_test.csv").values
y_test = pd.read_csv(r"Task_2\y_test.csv").values

# ReLU activation function
def relu(x):
    return np.maximum(0, x)

# Derivative of ReLU activation function
def relu_derivative(x):
    return (x > 0).astype(float)


# softmax activation function
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)

# Cross-entropy loss function
def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-8)) / m

# one hot encoding
def one_hot_encoding(y, num_classes=4):
    one_hot = np.zeros((y.size, num_classes))
    one_hot[np.arange(y.size), y] = 1
    return one_hot

# Function to compute accuracy
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)


# Neural Network class with backpropagation and gradient descent
class NeuralNetwork:
    def __init__(self, input_size, hidden1_size, hidden2_size, output_size, learning_rate=0.01):
        # Initialize weights and biases
        np.random.seed(42)  # For reproducibility
        self.weights_input_hidden1 = np.random.randn(input_size, hidden1_size) * 0.01
        self.bias_hidden1 = np.zeros((1, hidden1_size))

        self.weights_hidden1_hidden2 = np.random.randn(hidden1_size, hidden2_size) * 0.01
        self.bias_hidden2 = np.zeros((1, hidden2_size))

        self.weights_hidden2_output = np.random.randn(hidden2_size, output_size) * 0.01
        self.bias_output = np.zeros((1, output_size))

        self.learning_rate = learning_rate

    def forward(self, X):
        # Input to hidden layer 1
        # print(X.shape)
        z1 = np.dot(X, self.weights_input_hidden1) + self.bias_hidden1
        # print(z1.shape)
        a1 = relu(z1)
        # print(a1.shape)

        # Hidden layer 1 to hidden layer 2
        z2 = np.dot(a1, self.weights_hidden1_hidden2) + self.bias_hidden2
        # print(z2.shape)
        a2 = relu(z2)
        # print(a2.shape)

        # Hidden layer 2 to output
        z3 = np.dot(a2, self.weights_hidden2_output) + self.bias_output
        # print(z3.shape)
        output = softmax(z3)
        # print(output.shape)

        return output, a2, z2, a1, z1

    def backward(self, X, y_true, output, a2, z2, a1, z1):
        m = X.shape[0]  # Number of samples

        # Compute gradient of output layer (Softmax + Cross-entropy loss)
        dz3 = output - y_true
        dw3 = np.dot(a2.T, dz3) / m
        db3 = np.sum(dz3, axis=0, keepdims=True) / m

        # Backpropagate to hidden layer 2
        dz2 = np.dot(dz3, self.weights_hidden2_output.T) * relu_derivative(a2)
        dw2 = np.dot(a1.T, dz2) / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m

        # Backpropagate to hidden layer 1
        dz1 = np.dot(dz2, self.weights_hidden1_hidden2.T) * relu_derivative(a1)
        dw1 = np.dot(X.T, dz1) / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m

        # Update weights and biases using gradient descent
        self.weights_hidden2_output -= self.learning_rate * dw3
        print(self.bias_output)
        self.bias_output -= self.learning_rate * db3
        print(self.bias_output)

        self.weights_hidden1_hidden2 -= self.learning_rate * dw2
        self.bias_hidden2 -= self.learning_rate * db2

        self.weights_input_hidden1 -= self.learning_rate * dw1
        self.bias_hidden1 -= self.learning_rate * db1

    # fucntion to train the model and return the loss for train and test 
    def train(self, X_train, y_train, X_test, y_test, epochs=50):
        train_loss = []
        test_loss = []
        train_accuracy = []
        test_accuracy = []

        for i in range(epochs):
            # Forward pass
            y_pred, a2_train, z2_train, a1_train, z1_train = self.forward(X_train)
            # print(y_pred.shape)
            y_test_pred, _, _, _, _ = self.forward(X_test)
            # print("forward pass done")

            # one hot 
            # print(y_train.shape)
            one_hot_y_train = one_hot_encoding(y_train)
            # print(one_hot_y_train.shape)
            one_hot_y_test = one_hot_encoding(y_test)
            # print("one hot done")

            # Compute loss
            loss = cross_entropy_loss(one_hot_y_train, y_pred)
            test_loss.append(cross_entropy_loss(one_hot_y_test, y_test_pred))
            train_loss.append(loss)
            # print("loss computed")

            # Backward pass
            # print("backward pass started")
            # print(one_hot_y_train.shape)
            # print(y_pred.shape)
            self.backward(X_train, one_hot_y_train, y_pred, a2_train, z2_train, a1_train, z1_train)
            # print("backward pass done")

            # Compute accuracy
            y_pred = np.argmax(y_pred, axis=1)
            y_test_pred = np.argmax(y_test_pred, axis=1)
            train_accuracy.append(accuracy(y_train, y_pred))
            test_accuracy.append(accuracy(y_test, y_test_pred))
            # print("accuracy computed")

            if i % 1 == 0:
                # print loss and accuracy for train and test
                # print(train_loss)
                # print(test_loss)
                # print(train_accuracy)
                # print(test_accuracy)
                print(f"Epoch: {i}, Train Loss: {loss}, Test Loss: {test_loss[-1]}, Train Accuracy: {train_accuracy[-1]}, Test Accuracy: {test_accuracy[-1]}")


if __name__ == "__main__":
    input_size = 14
    hidden1_size = 100
    hidden2_size = 40
    output_size = 4
    learning_rate = 1

    nn = NeuralNetwork(input_size, hidden1_size, hidden2_size, output_size, learning_rate)
    nn.train(X_train, y_train, X_test, y_test, epochs=50)

[[0. 0. 0. 0.]]
[[0.75000574 0.74998778 0.7499874  0.75001908]]
Epoch: 0, Train Loss: 5.545177297371861, Test Loss: 5.545177297402191, Train Accuracy: 0.25734852017994864, Test Accuracy: 0.2550251691735208
[[0.75000574 0.74998778 0.7499874  0.75001908]]
[[1.49996208 1.49996136 1.49996376 1.5001128 ]]
Epoch: 1, Train Loss: 5.545177390814024, Test Loss: 5.5451773914504265, Train Accuracy: 0.22167851033010744, Test Accuracy: 0.22434276143067353
[[1.49996208 1.49996136 1.49996376 1.5001128 ]]
[[2.24982796 2.24994708 2.24987332 2.25035164]]
Epoch: 2, Train Loss: 5.545177966874004, Test Loss: 5.545177968954516, Train Accuracy: 0.19971890914050686, Test Accuracy: 0.2034757909300034
[[2.24982796 2.24994708 2.24987332 2.25035164]]
[[2.9995308  2.99988046 2.99968742 3.00090133]]
Epoch: 3, Train Loss: 5.545180742216713, Test Loss: 5.545180744358687, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
[[2.9995308  2.99988046 2.99968742 3.00090133]]
[[3.74878263 3.7497577  3.749



[[6.02341836 6.00615222 6.01536776 5.95506166]]
[[nan nan nan nan]]
Epoch: 8, Train Loss: nan, Test Loss: nan, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
[[nan nan nan nan]]
[[nan nan nan nan]]
Epoch: 9, Train Loss: nan, Test Loss: nan, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
[[nan nan nan nan]]
[[nan nan nan nan]]


KeyboardInterrupt: 