In [2]:
import numpy as np
import pandas as pd

In [3]:
# Define ReLU activation function and its derivative
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

# Define Softmax function
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # Numerical stability
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

# Cross-entropy loss function
def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-8)) / m

# One-hot encoding for labels
def one_hot_encode(y, num_classes):
    one_hot = np.zeros((y.size, num_classes))
    one_hot[np.arange(y.size), y] = 1
    return one_hot

# Neural Network class with backpropagation and gradient descent
class NeuralNetwork:
    def __init__(self, input_size, hidden1_size, hidden2_size, output_size, learning_rate=0.01):
        # Initialize weights and biases
        np.random.seed(42)  # For reproducibility
        self.weights_input_hidden1 = np.random.randn(input_size, hidden1_size) * 0.01
        self.bias_hidden1 = np.zeros((1, hidden1_size))

        self.weights_hidden1_hidden2 = np.random.randn(hidden1_size, hidden2_size) * 0.01
        self.bias_hidden2 = np.zeros((1, hidden2_size))

        self.weights_hidden2_output = np.random.randn(hidden2_size, output_size) * 0.01
        self.bias_output = np.zeros((1, output_size))

        self.learning_rate = learning_rate

    def forward(self, X):
        # Input to hidden layer 1
        self.z1 = np.dot(X, self.weights_input_hidden1) + self.bias_hidden1
        self.a1 = relu(self.z1)

        # Hidden layer 1 to hidden layer 2
        self.z2 = np.dot(self.a1, self.weights_hidden1_hidden2) + self.bias_hidden2
        self.a2 = relu(self.z2)

        # Hidden layer 2 to output
        self.z3 = np.dot(self.a2, self.weights_hidden2_output) + self.bias_output
        self.output = softmax(self.z3)

        return self.output

    def backward(self, X, y_true):
        m = X.shape[0]  # Number of samples

        # Compute gradient of output layer (Softmax + Cross-entropy loss)
        dz3 = self.output - y_true
        dw3 = np.dot(self.a2.T, dz3) / m
        db3 = np.sum(dz3, axis=0, keepdims=True) / m

        # Backpropagate to hidden layer 2
        dz2 = np.dot(dz3, self.weights_hidden2_output.T) * relu_derivative(self.z2)
        dw2 = np.dot(self.a1.T, dz2) / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m

        # Backpropagate to hidden layer 1
        dz1 = np.dot(dz2, self.weights_hidden1_hidden2.T) * relu_derivative(self.z1)
        dw1 = np.dot(X.T, dz1) / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m

        # Update weights and biases using gradient descent
        self.weights_hidden2_output -= self.learning_rate * dw3
        self.bias_output -= self.learning_rate * db3

        self.weights_hidden1_hidden2 -= self.learning_rate * dw2
        self.bias_hidden2 -= self.learning_rate * db2

        self.weights_input_hidden1 -= self.learning_rate * dw1
        self.bias_hidden1 -= self.learning_rate * db1

    def train(self, X, y, epochs=1000):
        for epoch in range(epochs):
            # Forward pass
            y_pred = self.forward(X)
            
            # Compute loss
            loss = cross_entropy_loss(y, y_pred)
            
            # Backward pass (backpropagation)
            self.backward(X, y)
            
            if (epoch + 1) % 100 == 0:
                print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss}')

# Example usage:
if __name__ == "__main__":
    # Hyperparameters
    input_size = 14
    hidden_layer1_size = 100
    hidden_layer2_size = 40
    output_size = 4
    learning_rate = 0.01
    epochs = 1000

    # Create a neural network
    nn = NeuralNetwork(input_size, hidden_layer1_size, hidden_layer2_size, output_size, learning_rate)

    # Generate random input data (X) and labels (y) for 5 samples
    X = np.random.randn(5, input_size)  # 5 samples, 14 features each
    y = np.random.randint(0, 4, size=5)  # Random labels (from 0 to 3)
    
    # One-hot encode the labels
    y_encoded = one_hot_encode(y, output_size)

    # Train the neural network
    nn.train(X, y_encoded, epochs=epochs)

    # Perform a forward pass on the input data to get the predicted probabilities
    predictions = nn.forward(X)
    
    # Convert predictions to Pandas DataFrame for easier visualization (optional)
    predictions_df = pd.DataFrame(predictions, columns=[f'Class_{i}' for i in range(output_size)])
    
    print("\nPredicted Probabilities:")
    print(predictions_df)


Epoch 100/1000, Loss: 1.2995414654035606
Epoch 200/1000, Loss: 1.2440732431471346
Epoch 300/1000, Loss: 1.2073306817754061
Epoch 400/1000, Loss: 1.1818556261554292
Epoch 500/1000, Loss: 1.1634265664822026
Epoch 600/1000, Loss: 1.1495810392073493
Epoch 700/1000, Loss: 1.138801922226387
Epoch 800/1000, Loss: 1.1301310937694344
Epoch 900/1000, Loss: 1.122936027495496
Epoch 1000/1000, Loss: 1.1167728951532916

Predicted Probabilities:
    Class_0   Class_1   Class_2   Class_3
0  0.374872  0.061664  0.376161  0.187303
1  0.374468  0.061598  0.376819  0.187116
2  0.375829  0.061723  0.375026  0.187421
3  0.375613  0.061686  0.375376  0.187324
4  0.375180  0.061741  0.375214  0.187865


In [4]:
import numpy as np
import pandas as pd

# ReLU activation function
def relu(x):
    return np.maximum(0, x)

# ReLU derivative
def relu_derivative(x):
    return np.where(x > 0, 1, 0)

# Softmax function
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # For numerical stability
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

# Cross-entropy loss function
def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-8)) / m

In [51]:
# Load data point, weights, and biases from CSV files
data_point = np.array([-1, 1, 1, 1, -1, -1, 1, -1, 1, 1, -1, -1, 1, 1])

w0_input_hidden1 = pd.read_csv(
    r'Task_1\a\w.csv', index_col=0, nrows=14, header=None).values
w0_hidden1_hidden2 = pd.read_csv(r'Task_1\a\w.csv', index_col=0, skiprows=range(14), nrows=100, usecols=range(41), header=None).values
w0_hidden2_output = pd.read_csv(r'Task_1\a\w.csv', index_col=0, skiprows=range(114), nrows=40, usecols=range(5), header=None).values

b0_hidden1 = pd.read_csv(
    r'Task_1\a\b.csv', index_col=0, nrows=1, header=None).values
b0_hidden2 = pd.read_csv(r'Task_1\a\b.csv', index_col=0, skiprows=range(1), nrows=1, usecols=range(41), header=None).values
b0_output = pd.read_csv(r'Task_1\a\b.csv', index_col=0, skiprows=range(2), nrows=1, usecols=range(5), header=None).values

w1_input_hidden1 = pd.read_csv(
    r'Task_1\b\w-100-40-4.csv', index_col=0, nrows=14, header=None).values
w1_hidden1_hidden2 = pd.read_csv(r'Task_1\b\w-100-40-4.csv', index_col=0, skiprows=range(14), nrows=100, usecols=range(41), header=None).values
w1_hidden2_output = pd.read_csv(r'Task_1\b\w-100-40-4.csv', index_col=0, skiprows=range(114), nrows=40, usecols=range(5), header=None).values

b1_hidden1 = pd.read_csv(
    r'Task_1\b\b-100-40-4.csv', index_col=0, nrows=1, header=None).values
b1_hidden2 = pd.read_csv(r'Task_1\b\b-100-40-4.csv', index_col=0, skiprows=range(1), nrows=1, usecols=range(41), header=None).values
b1_output = pd.read_csv(r'Task_1\b\b-100-40-4.csv', index_col=0, skiprows=range(2), nrows=1, usecols=range(5), header=None).values

correct_grad_w0_input_hidden1 = pd.read_csv(
    r'Task_1\a\true-dw.csv', nrows=14, header=None).values
correct_grad_w0_hidden1_hidden2 = pd.read_csv(r'Task_1\a\true-dw.csv', skiprows=range(14), nrows=100, usecols=range(40), header=None).values
correct_grad_w0_hidden2_output = pd.read_csv(r'Task_1\a\true-dw.csv', skiprows=range(114), nrows=40, usecols=range(4), header=None).values

correct_grad_b0_hidden1 = pd.read_csv(
    r'Task_1\a\true-db.csv', nrows=1, header=None).values
correct_grad_b0_hidden2 = pd.read_csv(r'Task_1\a\true-db.csv', skiprows=range(1), nrows=1, usecols=range(40), header=None).values
correct_grad_b0_output = pd.read_csv(r'Task_1\a\true-db.csv', skiprows=range(2), nrows=1, usecols=range(4), header=None).values


In [52]:
correct_grad_w0_input_hidden1.shape

(14, 100)

In [47]:
w0_input_hidden1.shape, w0_hidden1_hidden2.shape, w0_hidden2_output.shape, b0_hidden1.shape, b0_hidden2.shape, b0_output.shape

((14, 100), (100, 40), (40, 4), (1, 100), (1, 40), (1, 4))

In [14]:
W0.shape

(153, 100)

In [None]:
import numpy as np
import pandas as pd

# ReLU activation function
def relu(x):
    return np.maximum(0, x)

# ReLU derivative
def relu_derivative(x):
    return np.where(x > 0, 1, 0)

# Softmax function
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # For numerical stability
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

# Cross-entropy loss function
def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-8)) / m

# Load data point, weights, and biases from CSV files
def load_data():
    data_point = pd.read_csv('data_point.txt', header=None).values
    
    W0 = pd.read_csv('Task_1/a/W0.csv', index_col=0).values
    b0 = pd.read_csv('Task_1/a/b0.csv', index_col=0).values
    
    W1 = pd.read_csv('Task_1/b/W1.csv', index_col=0).values
    b1 = pd.read_csv('Task_1/b/b1.csv', index_col=0).values
    
    correct_grad_W0 = pd.read_csv('Task_1/a/grad_W0.csv', header=None).values
    correct_grad_b0 = pd.read_csv('Task_1/a/grad_b0.csv', header=None).values
    
    return data_point, W0, b0, W1, b1, correct_grad_W0, correct_grad_b0

# Perform forward pass
def forward_pass(X, W0, b0, W1, b1):
    # First layer forward pass
    z1 = np.dot(X, W0) + b0
    a1 = relu(z1)

    # Second layer forward pass (output layer with Softmax)
    z2 = np.dot(a1, W1) + b1
    output = softmax(z2)
    
    return z1, a1, output

# Perform backpropagation to calculate gradients
def backward_pass(X, y_true, z1, a1, output, W1):
    m = X.shape[0]
    
    # Output layer gradient (Softmax + Cross-Entropy)
    dz2 = output - y_true
    dW1 = np.dot(a1.T, dz2) / m
    db1 = np.sum(dz2, axis=0, keepdims=True) / m

    # Hidden layer 1 gradient
    dz1 = np.dot(dz2, W1.T) * relu_derivative(z1)
    dW0 = np.dot(X.T, dz1) / m
    db0 = np.sum(dz1, axis=0, keepdims=True) / m
    
    return dW0, db0, dW1, db1

# Save gradients to CSV files
def save_gradients(dW1, db1):
    pd.DataFrame(dW1).to_csv('Task_1/b/grad_W1.csv', header=None, index=False)
    pd.DataFrame(db1).to_csv('Task_1/b/grad_b1.csv', header=None, index=False)

# Main function
def main():
    # Load data
    X, W0, b0, W1, b1, correct_grad_W0, correct_grad_b0 = load_data()
    
    # Generate a one-hot encoded label for comparison (since we aren't provided with true labels)
    y_true = np.zeros((1, 4))  # Assume it's a classification task with 4 classes
    y_true[0, 0] = 1  # Assign label 0 for simplicity

    # Forward pass for W0, b0
    z1, a1, output = forward_pass(X, W0, b0, W1, b1)
    
    # Calculate gradients
    dW0, db0, dW1, db1 = backward_pass(X, y_true, z1, a1, output, W1)
    
    # Compare dW0 and db0 with correct gradients (sanity check)
    print("Gradient Check for W0:", np.allclose(dW0, correct_grad_W0))
    print("Gradient Check for b0:", np.allclose(db0, correct_grad_b0))

    # Save the gradients for W1 and b1
    save_gradients(dW1, db1)

if __name__ == "__main__":
    main()
