In [2]:
import numpy as np

# Sigmoid activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Mean Squared Error Loss and its derivative
def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def mse_loss_derivative(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_true.size

# Neural Network Class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights and biases
        self.weights_input_hidden = np.random.rand(input_size, hidden_size) - 0.5
        self.bias_hidden = np.random.rand(hidden_size) - 0.5
        self.weights_hidden_output = np.random.rand(hidden_size, output_size) - 0.5
        self.bias_output = np.random.rand(output_size) - 0.5

    def forward(self, X):
        # Compute hidden layer
        self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = sigmoid(self.hidden_input)

        # Compute output layer
        self.output_input = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        self.output = sigmoid(self.output_input)

        return self.output

    def backward(self, X, y, output, learning_rate):
        # Output layer error and gradient
        output_error = mse_loss_derivative(y, output)
        output_gradient = output_error * sigmoid_derivative(output)

        # Hidden layer error and gradient
        hidden_error = np.dot(output_gradient, self.weights_hidden_output.T)
        hidden_gradient = hidden_error * sigmoid_derivative(self.hidden_output)

        # Update weights and biases
        self.weights_hidden_output -= learning_rate * np.dot(self.hidden_output.T, output_gradient)
        self.bias_output -= learning_rate * np.sum(output_gradient, axis=0)
        self.weights_input_hidden -= learning_rate * np.dot(X.T, hidden_gradient)
        self.bias_hidden -= learning_rate * np.sum(hidden_gradient, axis=0)

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            # Forward pass
            output = self.forward(X)

            # Compute loss
            loss = mse_loss(y, output)

            # Backward pass
            self.backward(X, y, output, learning_rate)

            # Print loss every 100 epochs
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")

# Example Dataset: XOR Problem
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Hyperparameters
input_size = 2
hidden_size = 4
output_size = 1
epochs = 50000
learning_rate = 0.2

# Create and train the neural network
nn = NeuralNetwork(input_size, hidden_size, output_size)
nn.train(X, y, epochs, learning_rate)

# Test the neural network
print("Final Predictions:")
print(nn.forward(X))


Epoch 0, Loss: 0.2513673360948589
Epoch 100, Loss: 0.2500124624027848
Epoch 200, Loss: 0.2500057123792562
Epoch 300, Loss: 0.2500047207608552
Epoch 400, Loss: 0.25000376701954014
Epoch 500, Loss: 0.2500028262810234
Epoch 600, Loss: 0.25000189770575987
Epoch 700, Loss: 0.250000980585594
Epoch 800, Loss: 0.25000007424178644
Epoch 900, Loss: 0.24999917802267643
Epoch 1000, Loss: 0.2499982913018494
Epoch 1100, Loss: 0.24999741347642732
Epoch 1200, Loss: 0.24999654396547308
Epoch 1300, Loss: 0.2499956822084994
Epoch 1400, Loss: 0.24999482766407513
Epoch 1500, Loss: 0.24999397980851862
Epoch 1600, Loss: 0.24999313813467428
Epoch 1700, Loss: 0.2499923021507627
Epoch 1800, Loss: 0.249991471379301
Epoch 1900, Loss: 0.24999064535608617
Epoch 2000, Loss: 0.24998982362923672
Epoch 2100, Loss: 0.2499890057582893
Epoch 2200, Loss: 0.24998819131334393
Epoch 2300, Loss: 0.24998737987425562
Epoch 2400, Loss: 0.24998657102986796
Epoch 2500, Loss: 0.24998576437728554
Epoch 2600, Loss: 0.24998495952118202