Binary classification


In [4]:
import numpy as np

# Sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of sigmoid function
def sigmoid_derivative(x):
    return x * (1 - x)

# Mean Squared Error loss function
def mean_squared_error_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# Inputs and outputs for XOR problem
inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
outputs = np.array([[0], [1], [1], [0]])

# Setting a random seed for reproducibility
np.random.seed(42)

# Initializing sizes
input_size = 2
hidden_size = 4
output_size = 1

# Initializing weights and biases
weights_input_hidden = np.random.rand(input_size, hidden_size)
bias_hidden = np.random.randn(hidden_size)
weights_hidden_output = np.random.rand(hidden_size, output_size)
bias_output = np.random.randn(output_size)

learning_rate = 0.1
epochs = 30000

# Training loop
for epoch in range(epochs):
    # Forward pass
    hidden_input = np.dot(inputs, weights_input_hidden) + bias_hidden
    hidden_output = sigmoid(hidden_input)

    final_input = np.dot(hidden_output, weights_hidden_output) + bias_output
    final_output = sigmoid(final_input)

    # Calculate loss
    loss = mean_squared_error_loss(outputs, final_output)

    # Backpropagation
    error_output = final_output - outputs
    gradient_output = error_output * sigmoid_derivative(final_output)

    error_hidden = np.dot(gradient_output, weights_hidden_output.T)
    gradient_hidden = error_hidden * sigmoid_derivative(hidden_output)

    # Update weights and biases
    weights_hidden_output -= learning_rate * np.dot(hidden_output.T, gradient_output)
    bias_output -= learning_rate * np.mean(gradient_output, axis=0)

    weights_input_hidden -= learning_rate * np.dot(inputs.T, gradient_hidden)
    bias_hidden -= learning_rate * np.mean(gradient_hidden, axis=0)

    # Print loss every 1000 epochs
    if (epoch + 1) % 1000 == 0:
        print(f"Epoch: {epoch + 1}, Loss: {loss:.6f}")

# Testing the network
result = []
for input_pair in inputs:
    hidden_input = np.dot(input_pair, weights_input_hidden) + bias_hidden
    hidden_output = sigmoid(hidden_input)

    final_input = np.dot(hidden_output, weights_hidden_output) + bias_output
    final_output = sigmoid(final_input)

    result.append((input_pair, np.round(final_output[0], 2)))

# Print the results
print(result)


Epoch: 1000, Loss: 0.235709
Epoch: 2000, Loss: 0.198913
Epoch: 3000, Loss: 0.147844
Epoch: 4000, Loss: 0.080936
Epoch: 5000, Loss: 0.032734
Epoch: 6000, Loss: 0.015943
Epoch: 7000, Loss: 0.009609
Epoch: 8000, Loss: 0.006612
Epoch: 9000, Loss: 0.004940
Epoch: 10000, Loss: 0.003898
Epoch: 11000, Loss: 0.003195
Epoch: 12000, Loss: 0.002694
Epoch: 13000, Loss: 0.002320
Epoch: 14000, Loss: 0.002032
Epoch: 15000, Loss: 0.001804
Epoch: 16000, Loss: 0.001620
Epoch: 17000, Loss: 0.001468
Epoch: 18000, Loss: 0.001340
Epoch: 19000, Loss: 0.001232
Epoch: 20000, Loss: 0.001139
Epoch: 21000, Loss: 0.001059
Epoch: 22000, Loss: 0.000989
Epoch: 23000, Loss: 0.000927
Epoch: 24000, Loss: 0.000872
Epoch: 25000, Loss: 0.000822
Epoch: 26000, Loss: 0.000778
Epoch: 27000, Loss: 0.000738
Epoch: 28000, Loss: 0.000702
Epoch: 29000, Loss: 0.000669
Epoch: 30000, Loss: 0.000639
[(array([0, 0]), 0.02), (array([0, 1]), 0.98), (array([1, 0]), 0.97), (array([1, 1]), 0.03)]
