In [13]:
# Import required library
import numpy as np

# Target value and input data
Y = np.array([[0.875]])  # Target value (modified)
X = np.array([[0.6, 0.4]])  # Input data (modified)

# Initialize weights and biases with random values
W = [np.random.randn(2, 2), np.random.randn(2, 2), np.random.randn(2, 1)]
B = [np.random.randn(1, 2), np.random.randn(1, 2), np.random.randn(1, 1)]

# Activation function (sigmoid) and its derivative
sig = lambda x: 1 / (1 + np.exp(-x))  # Sigmoid function
dsig = lambda A: A * (1 - A)  # Derivative of sigmoid

# Loss function (mean squared error) and its derivative
mse = lambda x, y: 0.5 * np.square(x - y).sum()  # Mean Squared Error (MSE)
dmse = lambda x, y: (x - y)  # Derivative of MSE

# Forward propagation function
def forward_pass(X, W, B):
    """Performs forward propagation through the network"""
    A, dA = [], []  # To store activations and derivatives of activations
    for i, w in enumerate(W):
        A.append(X)  # Store current input as activation
        X = sig(np.dot(X, w) + B[i])  # Compute output for the current layer
        dA.append(dsig(X))  # Store derivative of activation
    return X, A, dA  # Return final output, activations, and their derivatives

# Backward propagation function
def backward_pass(W, B, A, dA, pred, Y, learning_rate=0.5):
    """Performs backward propagation and updates weights and biases"""
    E = dmse(pred, Y) * dA[-1]  # Compute error for the output layer
    for i, w in reversed(list(enumerate(W))):
        dw = np.dot(A[i].T, E)  # Compute gradient for weights
        db = np.dot(np.ones(shape=(1, E.shape[0])), E)  # Compute gradient for biases
        W[i] -= dw * learning_rate  # Update weights
        B[i] -= db * learning_rate  # Update biases
        if i > 0:  # Propagate error to the previous layer
            E = np.dot(E, w.T) * dA[i - 1]  # Compute error for previous layer

# Deep copy weights and biases to update them during training
updated_W = [w.copy() for w in W]
updated_B = [b.copy() for b in B]

# Training loop
for epoch in range(501):
    # Perform forward propagation
    pred, A, dA = forward_pass(X, updated_W, updated_B)

    # Perform backward propagation
    backward_pass(updated_W, updated_B, A, dA, pred, Y)

    # Log progress every 20 epochs
    if epoch % 20 == 0:
        print(f"Epoch: {epoch}, Prediction: {pred}, Loss: {mse(pred, Y):.6f}")


Epoch: 0, Prediction: [[0.52946706]], Loss: 0.059697
Epoch: 20, Prediction: [[0.71272151]], Loss: 0.013167
Epoch: 40, Prediction: [[0.77830423]], Loss: 0.004675
Epoch: 60, Prediction: [[0.8098765]], Loss: 0.002121
Epoch: 80, Prediction: [[0.82816845]], Loss: 0.001097
Epoch: 100, Prediction: [[0.83996418]], Loss: 0.000614
Epoch: 120, Prediction: [[0.84810007]], Loss: 0.000362
Epoch: 140, Prediction: [[0.85397103]], Loss: 0.000221
Epoch: 160, Prediction: [[0.85834477]], Loss: 0.000139
Epoch: 180, Prediction: [[0.86167989]], Loss: 0.000089
Epoch: 200, Prediction: [[0.86426787]], Loss: 0.000058
Epoch: 220, Prediction: [[0.86630316]], Loss: 0.000038
Epoch: 240, Prediction: [[0.86792054]], Loss: 0.000025
Epoch: 260, Prediction: [[0.86921641]], Loss: 0.000017
Epoch: 280, Prediction: [[0.87026148]], Loss: 0.000011
Epoch: 300, Prediction: [[0.87110872]], Loss: 0.000008
Epoch: 320, Prediction: [[0.87179847]], Loss: 0.000005
Epoch: 340, Prediction: [[0.87236194]], Loss: 0.000003
Epoch: 360, Predi