In [1]:
import numpy as np
import math

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def binary_cross_entropy(y_true, y_pred):
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))


In [3]:
X = np.array([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
])

y = np.array([[0], [1], [1], [0]])  

input_neurons = 2      
hidden_neurons = 2    
output_neurons = 1     

learning_rate = 0.1    
epochs = 10000        

np.random.seed(42)
W1 = np.random.randn(input_neurons, hidden_neurons)  
b1 = np.zeros((1, hidden_neurons))                   
W2 = np.random.randn(hidden_neurons, output_neurons) 
b2 = np.zeros((1, output_neurons))                   


In [4]:
# Training the MLP
for epoch in range(epochs):
    # Forward Propagation
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)  # Hidden layer activation

    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)  # Output layer activation

    # Compute Loss
    loss = binary_cross_entropy(y, A2)

    #  Backpropagation
    # Output layer error
    dL_dA2 = A2 - y  # Derivative of BCE loss w.r.t A2
    dA2_dZ2 = sigmoid_derivative(A2)
    dL_dZ2 = dL_dA2 * dA2_dZ2  # Gradient of Loss w.r.t Z2

    # Hidden layer error
    dL_dA1 = np.dot(dL_dZ2, W2.T)  # Backpropagate error from output to hidden layer
    dA1_dZ1 = sigmoid_derivative(A1)
    dL_dZ1 = dL_dA1 * dA1_dZ1  # Gradient of Loss w.r.t Z1

    # Compute Gradients
    dL_dW2 = np.dot(A1.T, dL_dZ2)  # Gradient for W2
    dL_db2 = np.sum(dL_dZ2, axis=0, keepdims=True)  # Gradient for b2

    dL_dW1 = np.dot(X.T, dL_dZ1)  # Gradient for W1
    dL_db1 = np.sum(dL_dZ1, axis=0, keepdims=True)  # Gradient for b1

    # Update Weights and Biases
    W2 -= learning_rate * dL_dW2
    b2 -= learning_rate * dL_db2
    W1 -= learning_rate * dL_dW1
    b1 -= learning_rate * dL_db1

    # Print loss every 1000 epochs
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


Epoch 0, Loss: 0.7049
Epoch 1000, Loss: 0.6920
Epoch 2000, Loss: 0.6840
Epoch 3000, Loss: 0.5989
Epoch 4000, Loss: 0.4697
Epoch 5000, Loss: 0.4222
Epoch 6000, Loss: 0.4023
Epoch 7000, Loss: 0.3916
Epoch 8000, Loss: 0.3848
Epoch 9000, Loss: 0.3801


In [6]:
print("\nFinal Predictions after training:")
for i in range(len(X)):
    Z1 = np.dot(X[i], W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)
    
    # Apply threshold
    predicted_output = 1 if A2[0][0] >= 0.5 else 0
    print(f"Input: {X[i]} -> Predicted Output: {predicted_output}")



Final Predictions after training:
Input: [0 0] -> Predicted Output: 0
Input: [0 1] -> Predicted Output: 0
Input: [1 0] -> Predicted Output: 1
Input: [1 1] -> Predicted Output: 1
