A toy problem As a dataset, we chose a pretty standard not linearly separable dataset made of two classes "0" and "1". i use Mean Squared Error (MSE)

In [None]:
import numpy as np
import math
import matplotlib.pyplot as plt
import copy


np.random.seed(0)

i have used a fixed errors in dataset generation and visualization np.random.seed(0) for reproducibility.

In [None]:
def generate_dataset(N_points):

    radiuses = np.random.uniform(0, 0.5, size=N_points//2)
    angles = np.random.uniform(0, 2*math.pi, size=N_points//2)

    x_1 = (radiuses * np.cos(angles)).reshape(N_points//2, 1)
    x_2 = (radiuses * np.sin(angles)).reshape(N_points//2, 1)
    X_class_1 = np.concatenate((x_1, x_2), axis=1)
    Y_class_1 = np.full((N_points//2,), 1)

    radiuses = np.random.uniform(0.6, 1, size=N_points//2)
    angles = np.random.uniform(0, 2*math.pi, size=N_points//2)

    x_1 = (radiuses * np.cos(angles)).reshape(N_points//2, 1)
    x_2 = (radiuses * np.sin(angles)).reshape(N_points//2, 1)
    X_class_0 = np.concatenate((x_1, x_2), axis=1)
    Y_class_0 = np.full((N_points//2,), 0)

    X = np.concatenate((X_class_1, X_class_0), axis=0)
    Y = np.concatenate((Y_class_1, Y_class_0), axis=0)
    return X, Y

Forward propagation

In [None]:
N_points = 1000
X, Y = generate_dataset(N_points)

# Plot dataset
plt.scatter(X[:N_points//2, 0], X[:N_points//2, 1], color='red', label='Class 1')
plt.scatter(X[N_points//2:, 0], X[N_points//2:, 1], color='blue', label='Class 0')
plt.legend(loc='upper right')
plt.show()

In [None]:
weights = {
    'W1': np.random.randn(3, 2) * 0.01,
    'b1': np.zeros(3),
    'W2': np.random.randn(3) * 0.01,
    'b2': 0,
}

sigmoid betwwen 0 and  1 gives 0.5

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [None]:
def forward_propagation(X, weights):
    Z1 = np.dot(X, weights['W1'].T) + weights['b1']
    H = sigmoid(Z1)
    Z2 = np.dot(H, weights['W2'].T) + weights['b2']
    Y_pred = sigmoid(Z2)
    return Y_pred, Z2, H, Z1

    # i computed loss using Mean Squared Error

In [None]:
def back_propagation(X, Y_T, weights):
    N_points = X.shape[0]
    Y_pred, Z2, H, Z1 = forward_propagation(X, weights)


    loss = np.mean((Y_pred - Y_T) ** 2)


    dLdY = 2 * (Y_pred - Y_T) / N_points
    dLdZ2 = dLdY * (sigmoid(Z2) * (1 - sigmoid(Z2)))
    dLdW2 = np.dot(H.T, dLdZ2)
    dLdb2 = np.sum(dLdZ2)

    dLdH = np.dot(dLdZ2.reshape(N_points, 1), weights['W2'].reshape(1, 3))
    dLdZ1 = dLdH * (sigmoid(Z1) * (1 - sigmoid(Z1)))
    dLdW1 = np.dot(dLdZ1.T, X)
    dLdb1 = np.sum(dLdZ1, axis=0)

    gradients = {
        'W1': dLdW1,
        'b1': dLdb1,
        'W2': dLdW2,
        'b2': dLdb2,
    }
    return gradients, loss

In [None]:
epochs = 2000
learning_rate = 0.1
initial_weights = copy.deepcopy(weights)

In [None]:
losses = []
for epoch in range(epochs):
    gradients, loss = back_propagation(X, Y, weights)
    for weight_name in weights:
        weights[weight_name] -= learning_rate * gradients[weight_name]
    losses.append(loss)

In [None]:
plt.plot(range(epochs), losses)
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

In [None]:
def visualization(weights, X_data, title, superposed_training=False):
    N_test_points = 1000
    xs = np.linspace(1.1*np.min(X_data), 1.1*np.max(X_data), N_test_points)
    datapoints = np.transpose([np.tile(xs, len(xs)), np.repeat(xs, len(xs))])
    Y_initial = forward_propagation(datapoints, weights)[0].reshape(N_test_points, N_test_points)

    X1, X2 = np.meshgrid(xs, xs)
    plt.pcolormesh(X1, X2, Y_initial, shading='auto', cmap='coolwarm')
    plt.colorbar(label='P(Class 1)')

    if superposed_training:
        plt.scatter(X_data[:N_points//2, 0], X_data[:N_points//2, 1], color='red')
        plt.scatter(X_data[N_points//2:, 0], X_data[N_points//2:, 1], color='blue')

    plt.title(title)
    plt.show()

In [None]:

initial_predictions = forward_propagation(X, initial_weights)[0]
trained_predictions = forward_propagation(X, weights)[0]

In [None]:
initial_preds_binary = (initial_predictions >= 0.5).astype(int)
trained_preds_binary = (trained_predictions >= 0.5).astype(int)

In [None]:
initial_accuracy = np.mean(initial_preds_binary == Y)
trained_accuracy = np.mean(trained_preds_binary == Y)

In [None]:
print(f"accuracy before training (Forward Pass Only): {initial_accuracy * 100:.2f}%")
print(f"accuracy after training (Forward + Backward): {trained_accuracy * 100:.2f}%")

In [None]:
visualization(initial_weights, X, "Before Training (Forward Pass Only)", True)
visualization(weights, X, "After Training (Forward + Backpropagation)", True)