# CSC173 Activity 01 - Neural Network from Scratch

## Import required libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy.typing import NDArray
from numpy import float64

## Define possible activation functions

In [None]:
def sigmoid(x: NDArray[float64]) -> NDArray[float64]:
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x: NDArray[float64]) -> NDArray[float64]:
    return x * (1 - x)

def tanh(x: NDArray[float64]) -> NDArray[float64]:
    return np.tanh(x)

def tanh_derivative(x: NDArray[float64]) -> NDArray[float64]:
    return 1 - np.tanh(x) ** 2

def relu(x: NDArray[float64]) -> NDArray[float64]:
    return np.maximum(0, x)

def relu_derivative(x: NDArray[float64]) -> NDArray[float64]:
    return (x > 0).astype(float64)

## Select activation functions for hidden and output layers

In [None]:
hidden_activation, hidden_activation_derivative = relu, relu_derivative
output_activation, output_activation_derivative = sigmoid, sigmoid_derivative

## Neural Network Core Functions

### Function for parameter initialization

In [None]:
def initialize_parameters(input_size: int, hidden_size: int, output_size: int) -> dict[str, NDArray[float64]]:
    W1: NDArray[float64] = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)      # (input_size × hidden_size)
    b1: NDArray[float64] = np.zeros((1, hidden_size))                                               # (1 × hidden_size)
    
    W2: NDArray[float64] = np.random.randn(hidden_size, output_size) * np.sqrt(2. / hidden_size)    # (hidden_size × output_size)
    b2: NDArray[float64] = np.zeros((1, output_size))                                               # (1 × output_size)
    
    return {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}

### Function implementing forward propagation

In [None]:
def forward(
    X: NDArray[float64], 
    parameters: dict[str, NDArray[float64]]
) -> tuple[NDArray[float64], dict[str, NDArray[float64]]]:
    W1, b1, W2, b2 = parameters['W1'], parameters['b1'], parameters['W2'], parameters['b2']
    
    # Layer 1
    Z1: NDArray[float64] = X @ W1 + b1              # (N×2) × (2×2) + (1×2) = (N×2)
    A1: NDArray[float64] = hidden_activation(Z1)  # (N×2)
    
    # Layer 2
    Z2: NDArray[float64] = A1 @ W2 + b2             # (N×2) × (2×1) + (1×1) = (N×1)
    A2: NDArray[float64] = output_activation(Z2)  # (N×1)
    
    cache: dict[str, NDArray[float64]] = {'Z1': Z1, 'A1': A1, 'Z2': Z2, 'A2': A2}
    
    return A2, cache

### Functions for loss computations

In [None]:
def compute_loss(A2: NDArray[float64], Y: NDArray[float64]) -> float:
    return float(np.mean((A2 - Y) ** 2))

def compute_loss_derivative(A2: NDArray[float64], Y: NDArray[float64]) -> NDArray[float64]:
    """Derivative of 1/2 MSE loss: (A2 - Y)"""
    return A2 - Y

### Function implementing backward propagation

In [None]:
def backward(
    X: NDArray[float64], 
    Y: NDArray[float64], 
    cache: dict[str, NDArray[float64]], 
    parameters: dict[str, NDArray[float64]]
) -> dict[str, NDArray[float64]]:
    W2 = parameters['W2']
    A1, A2 = cache['A1'], cache['A2']
    
    m: int = X.shape[0]  # batch size
    
    # Output layer gradients
    dA2: NDArray[float64] = compute_loss_derivative(A2, Y)           # (m×1)
    dZ2: NDArray[float64] = dA2 * output_activation_derivative(A2)      # (m×1)
    dW2: NDArray[float64] = A1.T @ dZ2 / m                                # (2×m) × (m×1) = (2×1), averaged
    db2: NDArray[float64] = np.sum(dZ2, axis=0, keepdims=True) / m      # (1×1), averaged
    
    # Hidden layer gradients
    dA1: NDArray[float64] = dZ2 @ W2.T                                    # (m×1) × (1×2) = (m×2)
    dZ1: NDArray[float64] = dA1 * hidden_activation_derivative(A1)      # (m×2)
    dW1: NDArray[float64] = X.T @ dZ1 / m                                 # (2×m) × (m×2) = (2×2), averaged
    db1: NDArray[float64] = np.sum(dZ1, axis=0, keepdims=True) / m      # (1×2), averaged
    
    gradients: dict[str, NDArray[float64]] = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2}
    
    return gradients

### Function to update the parameters given the new gradients

In [None]:
def update_parameters(
    parameters: dict[str, NDArray[float64]], 
    gradients: dict[str, NDArray[float64]], 
    learning_rate: float
) -> dict[str, NDArray[float64]]:
    parameters['W1'] -= learning_rate * gradients['dW1']
    parameters['b1'] -= learning_rate * gradients['db1']
    parameters['W2'] -= learning_rate * gradients['dW2']
    parameters['b2'] -= learning_rate * gradients['db2']
    
    return parameters

## Training and Prediction

### Training Function

In [None]:
def train_network(
    X: NDArray[float64], 
    Y: NDArray[float64], 
    learning_rate: float = 0.1, 
    epochs: int = 1000,
    input_size: int = 2,
    hidden_size: int = 4,
    output_size: int = 1,
) -> tuple[dict[str, NDArray[float64]], list[float]]:
    # Initialize parameters
    parameters: dict[str, NDArray[float64]] = initialize_parameters(input_size, hidden_size, output_size)
    
    losses: list[float] = []
    
    for _ in range(epochs):
        # Forward propagation
        A2, cache = forward(X, parameters)
        
        # Compute loss
        loss = compute_loss(A2, Y)
        losses.append(loss)
        
        # Backward propagation
        gradients = backward(X, Y, cache, parameters)
        
        # Update parameters
        parameters = update_parameters(parameters, gradients, learning_rate)
    
    return parameters, losses

### Prediction function using forward propagation with the parameters from the trained model

In [None]:
def predict(X: NDArray[float64], parameters: dict[str, NDArray[float64]]) -> NDArray[float64]:
    return forward(X, parameters)[0]

## Visualization and Utility Functions

### Print the matrices containing the weights and the biases of the network

In [None]:
def print_matrices(parameters: dict[str, NDArray[float64]]) -> None:
    for k, v in parameters.items():
        rows: int
        rows, _ = v.shape
        
        for r, vr in enumerate(v):
            if rows == 1:
                print(f'{k:2} = [ {" ".join(f"{vc:^9.5f}" for vc in vr)} ]')
                break
            if r == 0:
                print(f'{k:2} = ⎡ {" ".join(f"{vc:^9.5f}" for vc in vr)} ⎤')
            elif r == rows - 1:
                print(f'{" ":2}   ⎣ {" ".join(f"{vc:^9.5f}" for vc in vr)} ⎦')
            else:
                print(f'{" ":2}   ⎥ {" ".join(f"{vc:^9.5f}" for vc in vr)} ⎥')

### Function for plotting the loss of the network throughout its training

In [None]:
def plot_loss(losses: list[float]) -> None:
    """Plot training loss over epochs"""
    plt.figure(figsize=(10, 6))
    plt.plot(losses)
    plt.ylabel('MSE Loss')
    plt.xlabel('Epoch')
    plt.title('Training Loss')
    plt.grid(True, alpha=0.3)
    plt.show()

### Function for plotting the decision boundary of the network

In [None]:
def plot_decision_boundary(
    basis_df: pd.DataFrame,
    pair: tuple[str, str],
    label: str,
    parameters: dict[str, NDArray[float64]], 
    resolution: int = 500
) -> None:
    df = basis_df.copy()
    
    df[label] = basis_df[label].map({'M': 1, 'B': 0})
    
    X: NDArray[float64] = df[[pair[0], pair[1]]].values.astype(float64)
    
    m = df[[pair[0], pair[1]]].where(df[label] > 0.5)
    b = df[[pair[0], pair[1]]].where(df[label] <= 0.5)
    
    # Create a grid of points
    x_min: float64
    x_max: float64
    y_min: float64
    y_max: float64
    
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, resolution),
                         np.linspace(y_min, y_max, resolution))
    
    # Predict for each grid point
    grid_points = np.c_[xx.ravel(), yy.ravel()]
    predictions = predict(grid_points, parameters)
    predictions = predictions.reshape(xx.shape)
    
    # Plot decision boundary (where prediction = 0.5)
    plt.figure(figsize=(10, 8))
    plt.contour(xx, yy, predictions, levels=[0.5], colors='black', linewidths=2)
    plt.contourf(xx, yy, predictions, levels=25, alpha=0.3)
    
    # Plot data points
    plt.scatter(m[pair[0]], m[pair[1]], c='red', edgecolors='black', label='Malignant (M)', s=50)
    plt.scatter(b[pair[0]], b[pair[1]], c='blue', edgecolors='black', label='Benign (B)', s=50)
    
    plt.xlabel(pair[0])
    plt.ylabel(pair[1])
    plt.legend()
    plt.title(f'Decision Boundary: {pair[0]} vs {pair[1]}')
    plt.show()

### Function for calculating the accuracy of the network

In [None]:
def calculate_accuracy(y_true: NDArray[float64], y_pred: NDArray[float64], threshold: float = 0.5) -> float:
    # Convert probabilities to binary predictions
    y_pred_binary: NDArray[float64] = (y_pred > threshold).astype(float64)
    
    # Calculate accuracy
    accuracy: NDArray[float64] = (y_pred_binary == y_true)
    
    return float(np.mean(accuracy).astype(float64()))

### Funciton for printing the predictions from the prediction portion of the dataset

In [None]:
def print_predictions(
    X_test: NDArray[float64], 
    test_predictions: NDArray[float64], 
    y_test: NDArray[float64], 
    diag_rev_map: dict[int, str]
) -> None:
    for i, (input, prediction, expected) in enumerate(zip(X_test, test_predictions, y_test)):
        i_str = f'[{', '.join(f'{float(val):7.4f}' for val in input)}]'
        p_str = f'{float(prediction.item()):.4f}'
        m_str = f'{diag_rev_map[int(prediction.item() > 0.50)]}'
        e_str = f'{diag_rev_map[int(expected.item())]}'
        correct = '✓' if (prediction.item() > 0.50) == (expected.item() > 0.50) else '✗'
        
        print(f'{i+1:2d}. {i_str} -> {p_str} => {m_str} | {e_str} {correct}')

## Main Execution

In [None]:
def main() -> None:
    # Seed for initial parameters
    np.random.seed(67)
    
    # Read the dataset from the csv file
    df = pd.read_csv('../data/wdbc.csv')
    
    # Set the chosen features
    # In this case, they are 'concavity3' and 'radius3'
    x1 = 'concavity3'
    x2 = 'radius3'
    
    # Set the column name of the label
    # In this case, it is 'Diagnosis'
    y1 = 'Diagnosis'
    
    # Create the mappings for the diagnosis to binary values, and the reverse
    diag_map = {'M': 1, 'B': 0}
    diag_rev_map = {1: 'M', 0: 'B'}
    
    # Create the new dataframe of only the data columns to be used
    selected_df: pd.DataFrame = df[[y1, x1, x2]]
    
    # Split the filtered dataframe into training dataset and testing dataset
    # Here, 95% of the total dataset is used to train, while the remaining is used for prediction testing
    train_df: pd.DataFrame = selected_df.sample(frac=0.95)
    tests_df: pd.DataFrame = selected_df[~selected_df.index.isin(train_df.index)]
    
    # Transform the feature and label columns in to matrices for both the training and testing datasets
    X_train: NDArray[float64] = train_df[[x1, x2]].values.astype(float64)
    y_train: NDArray[float64] = train_df[y1].map(diag_map).to_numpy().astype(float64).reshape(-1, 1)
    
    X_test: NDArray[float64] = tests_df[[x1, x2]].values.astype(float64)
    y_test: NDArray[float64] = tests_df[y1].map(diag_map).to_numpy().astype(float64).reshape(-1, 1)
    
    # Train the network and extract the parameters and loss for each epoch
    parameters, losses = train_network(
        X_train, y_train, learning_rate=0.05, epochs=1000,
        input_size=2, hidden_size=3, output_size=1,
    )
    
    # We print the matrices to see the values of the weights and biases
    print_matrices(parameters)

    # Plot the losses
    plot_loss(losses)
    
    # Plot the decision boundary
    plot_decision_boundary(
        selected_df,
        (x1, x2),
        y1,
        parameters,
    )
    
    # Make predictions on test set
    test_predictions = predict(X_test, parameters)
    
    # Calculate training accuracy
    train_predictions = predict(X_train, parameters)
    train_accuracy = calculate_accuracy(y_train, train_predictions)
    
    # Calculate test accuracy
    test_accuracy = calculate_accuracy(y_test, test_predictions)
    
    # Print the evaluation of the model based on its accuracy with the training data and its accuracy on the test data
    print(f'\n{' MODEL EVALUATION ':=^56}\n')
    print(f"Training Accuracy: {train_accuracy:.2%}")
    print(f"Test Accuracy:     {test_accuracy:.2%}")
    
    # Print individual predictions from the test data
    print(f'\n{' PREDICTIONS ':=^56}\n')
    print_predictions(X_test, test_predictions, y_test, diag_rev_map)

main()