In [1]:
import random
import math

# ----------------------------
# Activation (ReLU)
# ----------------------------
def relu(x):
    return max(0, x)

def relu_derivative(x):
    return 1 if x > 0 else 0

# ----------------------------
# Initialize weights
# ----------------------------
def init_weights(in_size, out_size):
    return [[random.uniform(-1, 1) for _ in range(out_size)] for _ in range(in_size)]

def init_bias(size):
    return [0.0 for _ in range(size)]

# ----------------------------
# Forward pass
# ----------------------------
def forward(x, W1, b1, W2, b2):
    # Hidden layer
    z1 = [sum(x[i]*W1[i][j] for i in range(len(x))) + b1[j] for j in range(len(b1))]
    a1 = [relu(z) for z in z1]

    # Output layer (no activation for regression)
    z2 = sum(a1[i]*W2[i][0] for i in range(len(a1))) + b2[0]

    return z1, a1, z2

# ----------------------------
# Training
# ----------------------------
# Example: 3 input → 4 hidden → 1 output
W1 = init_weights(3, 4)
b1 = init_bias(4)

W2 = init_weights(4, 1)
b2 = init_bias(1)

# Data (single sample)
x = [2.0, 3.0, -1.0]
y_true = 1.0

lr = 0.01

for epoch in range(20):

    # Forward
    z1, a1, y_pred = forward(x, W1, b1, W2, b2)

    # Loss (MSE)
    loss = (y_pred - y_true) ** 2

    # ----------------------------
    # Backpropagation
    # ----------------------------

    # Output gradient
    dL_dy = 2 * (y_pred - y_true)

    # Gradients for W2 and b2
    dW2 = [dL_dy * a for a in a1]
    db2 = dL_dy

    # Backprop to hidden layer
    d_hidden = []
    for i in range(len(a1)):
        grad = dL_dy * W2[i][0] * relu_derivative(z1[i])
        d_hidden.append(grad)

    # Gradients for W1 and b1
    dW1 = [[d_hidden[j] * x[i] for j in range(4)] for i in range(3)]
    db1 = d_hidden

    # ----------------------------
    # Update weights
    # ----------------------------
    for i in range(3):
        for j in range(4):
            W1[i][j] -= lr * dW1[i][j]

    for j in range(4):
        b1[j] -= lr * db1[j]

    for i in range(4):
        W2[i][0] -= lr * dW2[i]

    b2[0] -= lr * db2

    print(f"Epoch {epoch}: Loss = {loss}")

Epoch 0: Loss = 0.10551463430625277
Epoch 1: Loss = 0.06375638573285247
Epoch 2: Loss = 0.039474055227084894
Epoch 3: Loss = 0.02488235465579641
Epoch 4: Loss = 0.01589836626970618
Epoch 5: Loss = 0.010264392159697114
Epoch 6: Loss = 0.006680896612870513
Epoch 7: Loss = 0.004376305491547365
Epoch 8: Loss = 0.0028812422544535303
Epoch 9: Loss = 0.0019046202033193265
Epoch 10: Loss = 0.0012631268370864328
Epoch 11: Loss = 0.0008398891214921463
Epoch 12: Loss = 0.0005596491915048381
Epoch 13: Loss = 0.0003735552335048917
Epoch 14: Loss = 0.0002496888081602089
Epoch 15: Loss = 0.0001670844250172542
Epoch 16: Loss = 0.00011191141009809426
Epoch 17: Loss = 7.501366293818819e-05
Epoch 18: Loss = 5.0312272215159006e-05
Epoch 19: Loss = 3.376184457114822e-05
