In [1]:
import numpy as np

In [2]:
def initialize_parameters(input_size, hidden_size, output_size):
    np.random.seed(42)
    parameters = {
        "W1": np.random.randn(hidden_size, input_size) * 0.01,
        "b1": np.zeros((hidden_size, 1)),
        "W2": np.random.randn(output_size, hidden_size) * 0.01,
        "b2": np.zeros((output_size, 1))
    }
    return parameters

In [3]:
def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return (Z > 0).astype(int)

In [4]:
def forward_propagation(X, parameters):
    W1, b1, W2, b2 = parameters["W1"], parameters["b1"], parameters["W2"], parameters["b2"]

    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)

    cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}
    return A2, cache

In [5]:
def compute_cost(Y, A2):
    m = Y.shape[1]
    cost = -np.sum(Y * np.log(A2) + (1 - Y) * np.log(1 - A2)) / m
    return np.squeeze(cost)

In [6]:
def backward_propagation(X, Y, parameters, cache):
    m = X.shape[1]
    W2 = parameters["W2"]

    dZ2 = cache["A2"] - Y
    dW2 = np.dot(dZ2, cache["A1"].T) / m
    db2 = np.sum(dZ2, axis=1, keepdims=True) / m

    dZ1 = np.dot(W2.T, dZ2) * relu_derivative(cache["Z1"])
    dW1 = np.dot(dZ1, X.T) / m
    db1 = np.sum(dZ1, axis=1, keepdims=True) / m

    grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}
    return grads

In [7]:
def update_parameters(parameters, grads, learning_rate):
    for key in parameters.keys():
        parameters[key] -= learning_rate * grads["d" + key]
    return parameters

In [8]:
def train_neural_network(X, Y, input_size, hidden_size, output_size, epochs=1000, learning_rate=0.01):
    parameters = initialize_parameters(input_size, hidden_size, output_size)

    for i in range(epochs):
        A2, cache = forward_propagation(X, parameters)
        cost = compute_cost(Y, A2)
        grads = backward_propagation(X, Y, parameters, cache)
        parameters = update_parameters(parameters, grads, learning_rate)

        if i % 100 == 0:
            print(f"Epoch {i}: Cost = {cost}")

    return parameters

In [9]:
def predict(X, parameters):
    A2, _ = forward_propagation(X, parameters)
    return (A2 > 0.5).astype(int)

In [10]:
X = np.array([[0, 0, 1, 1], [0, 1, 0, 1]])
Y = np.array([[0, 0, 0, 1]])

trained_parameters = train_neural_network(
    X, Y, input_size=2, hidden_size=4, output_size=1, epochs=10000, learning_rate=0.1)

predictions = predict(X, trained_parameters)
print("Predictions:", predictions)

Epoch 0: Cost = 0.693146369355556
Epoch 100: Cost = 0.5639151894233227
Epoch 200: Cost = 0.5565925189740488
Epoch 300: Cost = 0.4644741172097931
Epoch 400: Cost = 0.29516280613637336
Epoch 500: Cost = 0.1579776667730769
Epoch 600: Cost = 0.08563555993338258
Epoch 700: Cost = 0.05288251365109446
Epoch 800: Cost = 0.03704356618451611
Epoch 900: Cost = 0.02905583377309042
Epoch 1000: Cost = 0.02401034841169598
Epoch 1100: Cost = 0.020422312372831625
Epoch 1200: Cost = 0.017694655817974164
Epoch 1300: Cost = 0.015501749759281246
Epoch 1400: Cost = 0.013686620557187959
Epoch 1500: Cost = 0.01220429968875079
Epoch 1600: Cost = 0.010974235562573388
Epoch 1700: Cost = 0.009922426517793716
Epoch 1800: Cost = 0.009037597840418245
Epoch 1900: Cost = 0.008262495265921054
Epoch 2000: Cost = 0.007595727088792033
Epoch 2100: Cost = 0.007011782061884308
Epoch 2200: Cost = 0.006492342970863621
Epoch 2300: Cost = 0.006032762228198714
Epoch 2400: Cost = 0.005624169059161951
Epoch 2500: Cost = 0.005260068