In [43]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(69)

def get_data(N = 300):
    X = np.random.randn(N, 2)
    T = np.where(X[:, 0]**2 + X[:, 1]**2 < 1.0, 0, 1)
    Y = np.zeros((N, 1))
    Y[T == 1] = 1
    return X, Y

class NN():
    def __init__(self, architecture):
        self.activations = []
        self.params_values = {}
        self.layers = len(architecture)
        self.grads_momentum = {}
        for i, layer in enumerate(architecture):
            input_size, output_size, activation = layer["input_dim"], layer["output_dim"], layer["activation"]
            self.activations.append(activation)
            self.params_values[f"W{str(i)}"] = np.random.randn(
                output_size, input_size
            ) / np.sqrt(input_size)
            self.params_values[f"b{str(i)}"] = np.zeros((1, output_size))
            self.grads_momentum[f"W{str(i)}"] = np.zeros_like(self.params_values[f"W{str(i)}"])
            self.grads_momentum[f"b{str(i)}"] = np.zeros_like(self.params_values[f"b{str(i)}"])

        self.reset()

    def reset(self):
        self.cache = {}
        self.grads = {}

    def relu(self, x):
        return np.maximum(0, x)

    def drelu(self, dA, z):
        dA_ = np.copy(dA)
        dA_[z <= 0] = 0
        return dA_

    def sigmoid(self, x):
        return 1. / (1. + np.exp(-x))

    def dsigmoid(self, dA, z):
        s = self.sigmoid(z)
        return s * (1. - s) * dA

    def bce(self, yhat, y):
        yhat, y = yhat.flatten(), y.flatten()
        cost = -np.mean(np.dot(y, np.log(yhat+1e-8)) + np.dot((1 - y), np.log(1 - yhat+1e-8)))
        return np.squeeze(cost)

    def dbce(self, yhat, y):
        return -(y / (yhat+1e-8) - (1 - y) / (1 - yhat+1e-8))

    def single_forward(self, x, W, b, activation):
        Z = x @ W.T + b
        A = getattr(self, activation)(Z)
        return A, Z

    def forward(self, x):
        A_prev = None
        A_curr = x
        for i in range(self.layers):
            W, b = self.params_values[f"W{str(i)}"], self.params_values[f"b{str(i)}"]
            activation = self.activations[i]
            A_prev = A_curr
            A_curr, Z_curr = self.single_forward(A_prev, W, b, activation)
            self.cache[str(i)] = (Z_curr, A_prev)
        return A_curr

    def single_backward(self, dA_curr, W, Z_curr, A_prev, activation):
        m = A_prev.shape[1]
        dactivation = getattr(self, f"d{activation}")
        dA_curr = dactivation(dA_curr, Z_curr)
        dW = np.dot(dA_curr.T, A_prev) / m
        db = np.sum(dA_curr, axis = 0, keepdims = True) / m
        dA_curr = np.dot(dA_curr, W)
        return dA_curr, dW, db

    def backward(self, yhat, y):
        dA_curr = self.dbce(yhat, y)
        for i in range(self.layers - 1, -1, -1):
            W = self.params_values[f"W{str(i)}"]
            Z_curr, A_prev = self.cache[str(i)]
            dA_curr, dW, db = self.single_backward(dA_curr, W, Z_curr, A_prev, self.activations[i])
            self.grads[f"W{str(i)}"] = dW
            self.grads[f"b{str(i)}"] = db

    def accuracy(self, yhat, y):
        prediction = np.where(yhat > 0.5, 1, 0)
        return np.mean(prediction == y)

    def train(self, x, y, learning_rate, epochs, momentum = 0.9, weight_decay = 0.0001):
        losses = []
        accuracies = []
        for _ in range(epochs):
            yhat = self.forward(x)
            loss = self.bce(yhat, y)
            losses.append(loss)
            accuracy = self.accuracy(yhat, y)
            accuracies.append(accuracy)
            self.backward(yhat, y)
            self.update_params(weight_decay, momentum, learning_rate)

        return losses, accuracies

    def update_params(self, weight_decay, momentum, learning_rate):
        for i in range(self.layers):
            dW = self.grads[f"W{str(i)}"] + weight_decay * self.params_values[f"W{str(i)}"]
            db = self.grads[f"b{str(i)}"] + weight_decay * self.params_values[f"b{str(i)}"]
            self.grads_momentum[f"W{str(i)}"] = momentum * self.grads_momentum[f"W{str(i)}"] + (1 - momentum) * dW
            self.grads_momentum[f"b{str(i)}"] = momentum * self.grads_momentum[f"b{str(i)}"] + (1 - momentum) * db
            self.params_values[f"W{str(i)}"] -= learning_rate * self.grads_momentum[f"W{str(i)}"]
            self.params_values[f"b{str(i)}"] -= learning_rate * self.grads_momentum[f"b{str(i)}"]
        self.reset()

X, Y = get_data(100)

theta = np.linspace(0, 2*np.pi, 300)
plt.plot(np.cos(theta), np.sin(theta), 'k--')
plt.scatter(X[:, 0], X[:, 1], c=Y, s=50, cmap='RdBu')
plt.savefig('data.png')
plt.close()
        
nn_architecture = [
    {"input_dim": 2, "output_dim": 6, "activation": "relu"},
    {"input_dim": 6, "output_dim": 3, "activation": "relu"},
    {"input_dim": 3, "output_dim": 10, "activation": "relu"},
    {"input_dim": 10, "output_dim": 4, "activation": "relu"},
    {"input_dim": 4, "output_dim": 1, "activation": "sigmoid"},
]

nn = NN(nn_architecture)
losses, accuracies = nn.train(X, Y, 0.005, 1000, 0.9, 0.1)

plt.plot(losses)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.savefig('loss.png')
plt.close()

plt.plot(accuracies)
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.savefig('accuracy.png')
plt.close()

X_test, Y_test = get_data(100000)

yhat = nn.forward(X_test)
yhat = (yhat > 0.5).astype(int)
accuracy = np.mean(yhat == Y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")

theta = np.linspace(0, 2*np.pi, 360)
plt.plot(np.cos(theta), np.sin(theta), 'k--')
plt.scatter(X_test[:, 0], X_test[:, 1], c=yhat.flatten(), s=50, cmap='RdBu')
plt.savefig('decision_boundary.png')
plt.close()

training_sizes = [50, 100, 200, 300, 500, 1000]
test_accuracies = []

for size in training_sizes:
    X_train, Y_train = get_data(size)
    nn = NN(nn_architecture)
    nn.train(X_train, Y_train, 0.005, 1000, 0.9, 0.1)
    yhat_test = nn.forward(X_test)
    yhat_test = (yhat_test > 0.5).astype(int)
    test_accuracy = np.mean(yhat_test == Y_test)
    test_accuracies.append(test_accuracy * 100)
    print(f"Training Size: {size}, Test Accuracy: {test_accuracy * 100:.2f}%")

plt.plot(training_sizes, test_accuracies, marker='o')
plt.xlabel('Training Size')
plt.ylabel('Test Accuracy')
plt.title('Test Accuracy vs. Training Size')
plt.savefig('accuracy_vs_training_size.png')
plt.close()


Accuracy: 95.71%
Training Size: 50, Test Accuracy: 89.55%
Training Size: 100, Test Accuracy: 96.03%
Training Size: 200, Test Accuracy: 98.09%
Training Size: 300, Test Accuracy: 97.56%
Training Size: 500, Test Accuracy: 98.38%
Training Size: 1000, Test Accuracy: 98.71%
