## **AIM**

**Implement Ex-OR Gate (or any other problem) using Backpropagation Neural Networks (self-implementation).**


In [None]:
import numpy as np

# Activation Functions

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(a):
    return a * (1 - a)

def tanh(z):
    return np.tanh(z)

def tanh_derivative(a):
    return 1 - a**2



# Forward Propagation
def forward(X, W1, b1, W2, b2):
    Z1 = np.dot(W1, X) + b1
    A1 = tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    cache = (Z1, A1, Z2, A2)
    return A2, cache


# Cost Function
def compute_cost(A2, Y):
    m = Y.shape[1]
    cost = - (1/m) * np.sum(Y * np.log(A2 + 1e-8) + (1 - Y) * np.log(1 - A2 + 1e-8))
    return cost



# Backward Propagation
def backward(X, Y, cache, W2):
    Z1, A1, Z2, A2 = cache
    m = X.shape[1]

    dZ2 = A2 - Y
    dW2 = (1/m) * np.dot(dZ2, A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)

    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * tanh_derivative(A1)
    dW1 = (1/m) * np.dot(dZ1, X.T)
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)

    return dW1, db1, dW2, db2

# Update Parameters
def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    return W1, b1, W2, b2



# Prediction

def predict(X, W1, b1, W2, b2):
    A2, _ = forward(X, W1, b1, W2, b2)
    return (A2 > 0.5).astype(int)


# Training Loop 
def train(X, Y, hidden_units=2, learning_rate=1.0, num_iterations=10000):
    n_x = X.shape[0]
    n_h = hidden_units
    n_y = Y.shape[0]

    np.random.seed(3)
    W1 = np.random.randn(n_h, n_x) * 0.5
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h) * 0.5
    b2 = np.zeros((n_y, 1))

    for i in range(num_iterations):
        # Forward + Cost
        A2, cache = forward(X, W1, b1, W2, b2)
        cost = compute_cost(A2, Y)

        # Backpropagation
        dW1, db1, dW2, db2 = backward(X, Y, cache, W2)

        # Update parameters
        W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)

        # Prediction
        preds = predict(X, W1, b1, W2, b2)

        print(f"Iteration {i:5d} | Cost: {cost:.4f} | "
              f"W1: {np.round(W1,3).tolist()} | b1: {np.round(b1.T,3).tolist()} | "
              f"W2: {np.round(W2,3).tolist()} | b2: {np.round(b2.T,3).tolist()} | "
              f"Pred: {preds.tolist()}")

    return W1, b1, W2, b2


# XOR Inputs and Outputs

X = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
Y = np.array([[0, 1, 1, 0]])


# Train the Model

W1, b1, W2, b2 = train(X, Y, hidden_units=2, learning_rate=1.0, num_iterations=500000)


# Final Output
preds = predict(X, W1, b1, W2, b2)
print("\n Final Predictions for XOR gate:")
print(preds)
print("Expected Output:", Y)


Iteration     0 | Cost: 0.6965 | W1: [[0.891, 0.209], [0.036, -0.93]] | b1: [[-0.001, 0.0]] | W2: [[-0.121, -0.169]] | b2: [[-0.0]] | Pred: [[0, 1, 0, 1]]
Iteration     1 | Cost: 0.6959 | W1: [[0.889, 0.2], [0.025, -0.929]] | b1: [[-0.002, 0.0]] | W2: [[-0.104, -0.161]] | b2: [[-0.002]] | Pred: [[0, 1, 0, 1]]
Iteration     2 | Cost: 0.6954 | W1: [[0.887, 0.193], [0.014, -0.928]] | b1: [[-0.003, 0.0]] | W2: [[-0.089, -0.153]] | b2: [[-0.005]] | Pred: [[0, 1, 0, 1]]
Iteration     3 | Cost: 0.6949 | W1: [[0.886, 0.187], [0.003, -0.928]] | b1: [[-0.004, 0.001]] | W2: [[-0.076, -0.147]] | b2: [[-0.008]] | Pred: [[0, 1, 0, 1]]
Iteration     4 | Cost: 0.6946 | W1: [[0.885, 0.182], [-0.007, -0.927]] | b1: [[-0.004, 0.0]] | W2: [[-0.063, -0.141]] | b2: [[-0.011]] | Pred: [[0, 1, 0, 1]]
Iteration     5 | Cost: 0.6942 | W1: [[0.884, 0.178], [-0.017, -0.927]] | b1: [[-0.005, 0.0]] | W2: [[-0.051, -0.136]] | b2: [[-0.015]] | Pred: [[0, 1, 0, 1]]
Iteration     6 | Cost: 0.6939 | W1: [[0.883, 0.175],