In [1]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def train_neural_network(X, Y, n_hidden=2, learning_rate=0.1, epochs=10000):
    n_samples, n_features = X.shape
    n_output = Y.shape[1]

    np.random.seed(42)
    W1 = np.random.randn(n_features, n_hidden)
    b1 = np.zeros((n_hidden,))
    W2 = np.random.randn(n_hidden, n_output)
    b2 = np.zeros((n_output,))

    for epoch in range(epochs):
        Z1 = X @ W1 + b1
        A1 = sigmoid(Z1)
        Z2 = A1 @ W2 + b2
        A2 = sigmoid(Z2)

        loss = 0.5 * np.mean((A2 - Y) ** 2)
        if epoch % 1000 == 0:
            print(f"Epoch {epoch}, Loss: {loss:.6f}")

        dL_dA2 = A2 - Y
        dA2_dZ2 = A2 * (1 - A2)
        dL_dZ2 = dL_dA2 * dA2_dZ2

        dL_dW2 = A1.T @ dL_dZ2 / n_samples
        dL_db2 = np.sum(dL_dZ2, axis=0) / n_samples

        dL_dA1 = dL_dZ2 @ W2.T
        dA1_dZ1 = A1 * (1 - A1)
        dL_dZ1 = dL_dA1 * dA1_dZ1

        dL_dW1 = X.T @ dL_dZ1 / n_samples
        dL_db1 = np.sum(dL_dZ1, axis=0) / n_samples

        W2 -= learning_rate * dL_dW2
        b2 -= learning_rate * dL_db2
        W1 -= learning_rate * dL_dW1
        b1 -= learning_rate * dL_db1

    return W1, b1, W2, b2

def predict(X, W1, b1, W2, b2):
    Z1 = X @ W1 + b1
    A1 = sigmoid(Z1)
    Z2 = A1 @ W2 + b2
    A2 = sigmoid(Z2)
    return A2

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])

W1, b1, W2, b2 = train_neural_network(X, Y, n_hidden=2, learning_rate=0.1, epochs=10000)
predictions = predict(X, W1, b1, W2, b2)
print("\nPredictions:")
print(predictions)
print("\nTargets:")
print(Y)

Epoch 0, Loss: 0.127915
Epoch 1000, Loss: 0.124984
Epoch 2000, Loss: 0.124913
Epoch 3000, Loss: 0.124824
Epoch 4000, Loss: 0.124703
Epoch 5000, Loss: 0.124519
Epoch 6000, Loss: 0.124221
Epoch 7000, Loss: 0.123699
Epoch 8000, Loss: 0.122717
Epoch 9000, Loss: 0.120793

Predictions:
[[0.45920139]
 [0.44385503]
 [0.58575287]
 [0.49494702]]

Targets:
[[0]
 [1]
 [1]
 [0]]
