In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [23]:
# ReLU activation function
def relu(x):
    return np.maximum(0, x)

# Derivative of ReLU activation function
def relu_derivative(x):
    return (x > 0).astype(float)


# softmax activation function
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)

# Cross-entropy loss function
def cross_entropy_loss(y_true, new_y_pred):
    m = y_true.shape[0]
    return -np.sum(y_true * np.log(new_y_pred + 1e-8)) / m

# one hot encoding
def one_hot_encoding(y):
    one_hot = np.zeros((y.size, y.max() + 1))
    one_hot[np.arange(y.size), y] = 1
    return one_hot

# Function to compute accuracy
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

In [43]:
# loading dataset
X_train = pd.read_csv(r"Task_2\x_train.csv").values  # features
y_train = pd.read_csv(r"Task_2\y_train.csv").values  # labels

X_test = pd.read_csv(r"Task_2\x_test.csv").values
y_test = pd.read_csv(r"Task_2\y_test.csv").values


In [59]:
# Neural Network class with backpropagation and gradient descent
class NeuralNetwork:
    def __init__(self, input_size, hidden1_size, hidden2_size, output_size, learning_rate=0.01):
        # Initialize weights and biases
        np.random.seed(42)  # For reproducibility
        self.weights_input_hidden1 = np.random.randn(input_size, hidden1_size) * 0.01
        self.bias_hidden1 = np.zeros((1, hidden1_size))

        self.weights_hidden1_hidden2 = np.random.randn(hidden1_size, hidden2_size) * 0.01
        self.bias_hidden2 = np.zeros((1, hidden2_size))

        self.weights_hidden2_output = np.random.randn(hidden2_size, output_size) * 0.01
        self.bias_output = np.zeros((1, output_size))

        self.learning_rate = learning_rate

    def forward(self, X):
        # Input to hidden layer 1
        # print(X.shape)
        z1 = np.dot(X, self.weights_input_hidden1) + self.bias_hidden1
        # print(z1.shape)
        a1 = relu(z1)
        # print(a1.shape)

        # Hidden layer 1 to hidden layer 2
        z2 = np.dot(a1, self.weights_hidden1_hidden2) + self.bias_hidden2
        # print(z2.shape)
        a2 = relu(z2)
        # print(a2.shape)

        # Hidden layer 2 to output
        z3 = np.dot(a2, self.weights_hidden2_output) + self.bias_output
        # print(z3.shape)
        output = softmax(z3)
        # print(output.shape)

        return output, a2, z2, a1, z1

    def backward(self, X, y_true, output, a2, z2, a1, z1):
        m = X.shape[0]  # Number of samples

        # Compute gradient of output layer (Softmax + Cross-entropy loss)
        dz3 = output - y_true
        dw3 = np.dot(a2.T, dz3) / m
        db3 = np.sum(dz3, axis=0, keepdims=True) / m

        # Backpropagate to hidden layer 2
        dz2 = np.dot(dz3, self.weights_hidden2_output.T) * relu_derivative(z2)
        dw2 = np.dot(a1.T, dz2) / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m

        # Backpropagate to hidden layer 1
        dz1 = np.dot(dz2, self.weights_hidden1_hidden2.T) * relu_derivative(z1)
        dw1 = np.dot(X.T, dz1) / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m

        # Update weights and biases using gradient descent
        self.weights_hidden2_output -= self.learning_rate * dw3
        self.bias_output -= self.learning_rate * db3

        self.weights_hidden1_hidden2 -= self.learning_rate * dw2
        self.bias_hidden2 -= self.learning_rate * db2

        self.weights_input_hidden1 -= self.learning_rate * dw1
        self.bias_hidden1 -= self.learning_rate * db1

    # fucntion to train the model and return the loss for train and test 
    def train(self, X_train, y_train, X_test, y_test, epochs=50):
        train_loss = []
        test_loss = []
        train_accuracy = []
        test_accuracy = []

        for i in range(epochs):
            # Forward pass
            y_pred, a2_train, z2_train, a1_train, z1_train = self.forward(X_train)
            # print(y_pred.shape)
            y_test_pred, _, _, _, _ = self.forward(X_test)
            # print("forward pass done")

            # one hot 
            # print(y_train.shape)
            one_hot_y_train = one_hot_encoding(y_train)
            # print(one_hot_y_train.shape)
            one_hot_y_test = one_hot_encoding(y_test)
            # print("one hot done")

            # Compute loss
            loss = cross_entropy_loss(one_hot_y_train, y_pred)
            test_loss.append(cross_entropy_loss(one_hot_y_test, y_test_pred))
            train_loss.append(loss)
            # print("loss computed")

            # Backward pass
            # print("backward pass started")
            # print(one_hot_y_train.shape)
            # print(y_pred.shape)
            self.backward(X_train, one_hot_y_train, y_pred, a2_train, z2_train, a1_train, z1_train)
            # print("backward pass done")

            # Compute accuracy
            y_pred = np.argmax(y_pred, axis=1)
            y_test_pred = np.argmax(y_test_pred, axis=1)
            train_accuracy.append(accuracy(y_train, y_pred))
            test_accuracy.append(accuracy(y_test, y_test_pred))
            # print("accuracy computed")

            if i % 1 == 0:
                # print loss and accuracy for train and test
                print(f"Epoch: {i}, Train Loss: {loss}, Test Loss: {test_loss[-1]}, Train Accuracy: {train_accuracy[-1]}, Test Accuracy: {test_accuracy[-1]}")


        

In [58]:
y_pred = np.array([0.45, 0.65, 0.34, 0.78])
np.argmax(y_pred, axis=0)

3

In [30]:
y_train.shape

(13106, 1)

In [60]:
input_size = 14
hidden1_size = 100
hidden2_size = 40
output_size = 4
learning_rate = 1

nn = NeuralNetwork(input_size, hidden1_size, hidden2_size, output_size, learning_rate)
nn.train(X_train, y_train, X_test, y_test, epochs=50)

Epoch: 0, Train Loss: 5.545177297371861, Test Loss: 5.545177297402191, Train Accuracy: 0.25734852017994864, Test Accuracy: 0.2550251691735208
Epoch: 1, Train Loss: 5.545177390814024, Test Loss: 5.5451773914504265, Train Accuracy: 0.22167851033010744, Test Accuracy: 0.22434276143067353
Epoch: 2, Train Loss: 5.545177966874004, Test Loss: 5.545177968954516, Train Accuracy: 0.19971890914050686, Test Accuracy: 0.2034757909300034
Epoch: 3, Train Loss: 5.545180742216713, Test Loss: 5.545180744358687, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
Epoch: 4, Train Loss: 5.545196919841026, Test Loss: 5.545196921414362, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
Epoch: 5, Train Loss: 5.545287058801378, Test Loss: 5.545287043881181, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
Epoch: 6, Train Loss: 5.545384111757237, Test Loss: 5.545383991340962, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238


  if sys.path[0] == '':
  if sys.path[0] == '':


Epoch: 8, Train Loss: nan, Test Loss: nan, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
Epoch: 9, Train Loss: nan, Test Loss: nan, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
Epoch: 10, Train Loss: nan, Test Loss: nan, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
Epoch: 11, Train Loss: nan, Test Loss: nan, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
Epoch: 12, Train Loss: nan, Test Loss: nan, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
Epoch: 13, Train Loss: nan, Test Loss: nan, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
Epoch: 14, Train Loss: nan, Test Loss: nan, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
Epoch: 15, Train Loss: nan, Test Loss: nan, Train Accuracy: 0.19800091561117045, Test Accuracy: 0.20238095238095238
Epoch: 16, Train Loss: nan, Test Loss: nan, Train Accuracy: 0.198000915611

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# ReLU activation function
def relu(x):
    return np.maximum(0, x)

# Derivative of ReLU activation function
def relu_derivative(x):
    return (x > 0).astype(float)

# Softmax function with numerical stability
def softmax(x):
    shift_x = x - np.max(x, axis=1, keepdims=True)  # Subtract max for stability
    exp_x = np.exp(shift_x)
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# Cross-entropy loss function with stability (prevents log(0))
def cross_entropy_loss(y_true, y_pred):
    n_samples = y_true.shape[0]
    y_pred = np.clip(y_pred, 1e-10, 1.0)  # Clip values to avoid log(0)
    correct_confidences = y_pred[range(n_samples), y_true]
    loss = -np.sum(np.log(correct_confidences)) / n_samples
    return loss

# Function to compute accuracy
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

# Backpropagation and weight update with gradient clipping
def backprop(X, y, W1, b1, W2, b2, W3, b3, learning_rate, clip_value=5.0):
    # Forward propagation
    z1 = np.dot(X, W1) + b1
    a1 = relu(z1)  # ReLU activation

    z2 = np.dot(a1, W2) + b2
    a2 = relu(z2)  # ReLU activation

    z3 = np.dot(a2, W3) + b3
    output = softmax(z3)  # Softmax activation

    # Calculate loss
    loss = cross_entropy_loss(y, output)

    # Backward propagation
    m = y.shape[0]
    output[range(m), y] -= 1
    output /= m

    dW3 = np.dot(a2.T, output)
    db3 = np.sum(output, axis=0, keepdims=True)

    da2 = np.dot(output, W3.T)
    dz2 = da2 * relu_derivative(z2)  # ReLU derivative

    dW2 = np.dot(a1.T, dz2)
    db2 = np.sum(dz2, axis=0, keepdims=True)

    da1 = np.dot(dz2, W2.T)
    dz1 = da1 * relu_derivative(z1)  # ReLU derivative

    dW1 = np.dot(X.T, dz1)
    db1 = np.sum(dz1, axis=0, keepdims=True)

    # Gradient clipping to avoid exploding gradients
    dW1 = np.clip(dW1, -clip_value, clip_value)
    dW2 = np.clip(dW2, -clip_value, clip_value)
    dW3 = np.clip(dW3, -clip_value, clip_value)
    db1 = np.clip(db1, -clip_value, clip_value)
    db2 = np.clip(db2, -clip_value, clip_value)
    db3 = np.clip(db3, -clip_value, clip_value)

    # Update weights and biases
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    W3 -= learning_rate * dW3
    b3 -= learning_rate * db3

    return W1, b1, W2, b2, W3, b3, loss

# Train the neural network
def train_neural_network(X_train, y_train, X_test, y_test, learning_rate, epochs):
    input_size = X_train.shape[1]
    hidden_size1 = 100
    hidden_size2 = 40
    output_size = 4  # Number of classes

    # Initialize weights and biases with small random values
    np.random.seed(42)
    W1 = np.random.randn(input_size, hidden_size1) * 0.01
    b1 = np.zeros((1, hidden_size1))
    W2 = np.random.randn(hidden_size1, hidden_size2) * 0.01
    b2 = np.zeros((1, hidden_size2))
    W3 = np.random.randn(hidden_size2, output_size) * 0.01
    b3 = np.zeros((1, output_size))

    train_costs = []
    test_costs = []
    train_accs = []
    test_accs = []

    for epoch in range(epochs):
        # Backpropagation and weight updates
        W1, b1, W2, b2, W3, b3, train_loss = backprop(X_train, y_train, W1, b1, W2, b2, W3, b3, learning_rate)

        # Forward pass for training accuracy
        z1 = np.dot(X_train, W1) + b1
        a1 = relu(z1)  # ReLU activation
        z2 = np.dot(a1, W2) + b2
        a2 = relu(z2)  # ReLU activation
        z3 = np.dot(a2, W3) + b3
        train_output = softmax(z3)
        y_train_pred = np.argmax(train_output, axis=1)
        train_acc = accuracy(y_train, y_train_pred)

        # Forward pass for test accuracy
        z1_test = np.dot(X_test, W1) + b1
        a1_test = relu(z1_test)  # ReLU activation
        z2_test = np.dot(a1_test, W2) + b2
        a2_test = relu(z2_test)  # ReLU activation
        z3_test = np.dot(a2_test, W3) + b3
        test_output = softmax(z3_test)
        y_test_pred = np.argmax(test_output, axis=1)
        test_acc = accuracy(y_test, y_test_pred)

        # Calculate test loss
        test_loss = cross_entropy_loss(y_test, test_output)

        # Store loss and accuracy values
        train_costs.append(train_loss)
        test_costs.append(test_loss)
        train_accs.append(train_acc)
        test_accs.append(test_acc)

        # Print progress every 10 epochs
        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch + 1} | Train Loss: {train_loss:.4f} | Test Loss: {test_loss:.4f} | Train Acc: {train_acc:.4f} | Test Acc: {test_acc:.4f}")

    return train_costs, test_costs, train_accs, test_accs

# Plot training results
def plot_metrics(train_costs, test_costs, train_accs, test_accs, learning_rate):
    epochs = len(train_costs)
    plt.figure(figsize=(14, 5))

    plt.subplot(1, 2, 1)
    plt.plot(range(epochs), train_costs, label='Train Loss')
    plt.plot(range(epochs), test_costs, label='Test Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title(f'Loss vs Epochs (Learning Rate = {learning_rate})')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(range(epochs), train_accs, label='Train Accuracy')
    plt.plot(range(epochs), test_accs, label='Test Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title(f'Accuracy vs Epochs (Learning Rate = {learning_rate})')
    plt.legend()

    plt.tight_layout()
    plt.show()



X_train = pd.read_csv(r"Task_2\x_train.csv").values  # features
y_train = pd.read_csv(r"Task_2\y_train.csv").values  # labels

X_test = pd.read_csv(r"Task_2\x_test.csv").values
y_test = pd.read_csv(r"Task_2\y_test.csv").values


learning_rates = [1, 0.1, 0.001]
epochs = 50

for lr in learning_rates:
    print(f"\nTraining with learning rate: {lr}")
    train_costs, test_costs, train_accs, test_accs = train_neural_network(X_train, y_train, X_test, y_test, lr, epochs)
    plot_metrics(train_costs, test_costs, train_accs, test_accs, lr)
