In [1]:
#️⃣ Cell 1: Imports
import numpy as np
import matplotlib.pyplot as plt


In [2]:
#️⃣ Cell 2: Dataset (XOR logic)
# Input data (XOR truth table)
X = np.array([[0,0],
              [0,1],
              [1,0],
              [1,1]])

# Expected output
y = np.array([[0], [1], [1], [0]])


In [3]:
#️⃣ Cell 3: Initialize network parameters
np.random.seed(42)

input_size = 2
hidden_size = 2
output_size = 1
learning_rate = 0.1

# Random weights and biases
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))


In [4]:
#️⃣ Cell 4: Define activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)


In [5]:
#️⃣ Cell 5: Training loop
epochs = 10000
losses = []

for epoch in range(epochs):
    # ---- Forward Propagation ----
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)
    
    # ---- Loss (Mean Squared Error) ----
    loss = np.mean((y - a2) ** 2)
    losses.append(loss)
    
    # ---- Backward Propagation ----
    d_loss_a2 = 2 * (a2 - y)
    d_a2_z2 = sigmoid_derivative(a2)
    d_z2_W2 = a1.T
    d_z2_a1 = W2.T
    
    # Gradients for output layer
    dW2 = np.dot(d_z2_W2, d_loss_a2 * d_a2_z2)
    db2 = np.sum(d_loss_a2 * d_a2_z2, axis=0, keepdims=True)
    
    # Gradients for hidden layer
    d_a1_z1 = sigmoid_derivative(a1)
    dW1 = np.dot(X.T, np.dot(d_loss_a2 * d_a2_z2, d_z2_a1.T) * d_a1_z1)
    db1 = np.sum(np.dot(d_loss_a2 * d_a2_z2, d_z2_a1.T) * d_a1_z1, axis=0, keepdims=True)
    
    # ---- Update weights ----
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    
    # ---- Print progress ----
    if epoch % 2000 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


ValueError: shapes (4,1) and (2,1) not aligned: 1 (dim 1) != 2 (dim 0)

In [6]:
epochs = 10000
losses = []

for epoch in range(epochs):
    # ---- Forward Propagation ----
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)
    
    # ---- Loss (Mean Squared Error) ----
    loss = np.mean((y - a2) ** 2)
    losses.append(loss)
    
    # ---- Backward Propagation ----
    # Output layer error
    d_a2 = a2 - y
    d_z2 = d_a2 * sigmoid_derivative(a2)
    
    dW2 = np.dot(a1.T, d_z2)
    db2 = np.sum(d_z2, axis=0, keepdims=True)
    
    # Hidden layer error
    d_a1 = np.dot(d_z2, W2.T)
    d_z1 = d_a1 * sigmoid_derivative(a1)
    
    dW1 = np.dot(X.T, d_z1)
    db1 = np.sum(d_z1, axis=0, keepdims=True)
    
    # ---- Update weights ----
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    
    # ---- Print progress ----
    if epoch % 2000 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


Epoch 0, Loss: 0.2558
Epoch 2000, Loss: 0.2454
Epoch 4000, Loss: 0.1532
Epoch 6000, Loss: 0.1336
Epoch 8000, Loss: 0.1297
