<a href="https://colab.research.google.com/github/zzc029498-max/nec-/blob/main/A1_part2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

class NeuralNet:
    """
    Implementation of a Back-Propagation (BP) Neural Network from scratch.

    Follows the variable naming and structure defined in the
    NEC 2025/26 Activity 1 PDF.
    """

    def __init__(self, network_architecture, n_epochs, learning_rate, momentum, activation_function, validation_split):
        """
        Initializes the Neural Network.

        Args:
            network_architecture (list): Units per layer, e.g., [3, 9, 5, 1]
            n_epochs (int): Number of training epochs
            learning_rate (float): Learning rate (eta)
            momentum (float): Momentum (mu)
            activation_function (str): 'sigmoid', 'relu', 'linear', 'tanh'
            validation_split (float): Percentage of data for validation (0.0 to 1.0)
        """

        # --- 1. Initialize parameters ---
        self.n_epochs = n_epochs
        self.lr = learning_rate
        self.mu = momentum
        self.fact_name = activation_function
        self.validation_split = validation_split

        # --- 2. Initialize network structure (using 1-based indexing) ---

        self.L = len(network_architecture)
        self.n = [0] + network_architecture

        self.h = [None] * (self.L + 1)
        self.xi = [None] * (self.L + 1)
        self.w = [None] * (self.L + 1)
        self.theta = [None] * (self.L + 1)
        self.delta = [None] * (self.L + 1)

        self.dw = [None] * (self.L + 1)
        self.dtheta = [None] * (self.L + 1)

        self.d_w_prev = [None] * (self.L + 1)
        self.d_theta_prev = [None] * (self.L + 1)

        # --- 3. Initialize weights and arrays ---
        for l in range(1, self.L + 1):
            n_units = self.n[l]

            self.h[l] = np.zeros(n_units)
            self.xi[l] = np.zeros(n_units)
            self.delta[l] = np.zeros(n_units)

            if l > 1:
                n_units_prev = self.n[l-1]

                limit = np.sqrt(6 / (n_units_prev + n_units))
                self.w[l] = np.random.uniform(-limit, limit, (n_units, n_units_prev))

                self.theta[l] = np.zeros(n_units)

                self.dw[l] = np.zeros((n_units, n_units_prev))
                self.dtheta[l] = np.zeros(n_units)
                self.d_w_prev[l] = np.zeros((n_units, n_units_prev))
                self.d_theta_prev[l] = np.zeros(n_units)

        # --- 4. Loss history ---
        self.train_loss_history = []
        self.val_loss_history = []

    def fit(self, X, y):
        """
        Trains the network using input data X and target y.
        """

        # --- 1. Split training/validation data ---
        if self.validation_split > 0:
            X_train, X_val, y_train, y_val = train_test_split(
                X, y, test_size=self.validation_split, shuffle=True
            )
        else:
            X_train, y_train = X, y
            X_val, y_val = None, None

        n_samples = X_train.shape[0]

        # --- 2. Epoch loop ---
        for epoch in range(self.n_epochs):

            train_epoch_losses = []

            indices = np.random.permutation(n_samples)
            X_train_shuffled = X_train[indices]
            y_train_shuffled = y_train[indices]

            # --- 3. Sample loop (Stochastic Gradient Descent) ---
            for i in range(n_samples):
                x_sample = X_train_shuffled[i]
                y_sample = y_train_shuffled[i]

                self._forward_pass(x_sample)

                prediction = self.xi[self.L]
                train_epoch_losses.append(self._mse_loss(np.array([y_sample]), prediction))

                self._backward_pass(y_sample)

                self._update_weights()

            # --- 4. Calculate and store epoch losses ---
            avg_train_loss = np.mean(train_epoch_losses)
            self.train_loss_history.append(avg_train_loss)

            if X_val is not None:
                y_val_pred = self.predict(X_val)
                avg_val_loss = self._mse_loss(y_val, y_val_pred.ravel())
                self.val_loss_history.append(avg_val_loss)
            else:
                self.val_loss_history.append(np.nan)

            if (epoch + 1) % 10 == 0:
                print(f"Epoch {epoch + 1}/{self.n_epochs} - "
                      f"Train Loss: {avg_train_loss:.6f} - "
                      f"Val Loss: {self.val_loss_history[-1]:.6f}")

    def predict(self, X):
        """
        Performs predictions on input data X.
        """
        predictions = []
        for x_sample in X:
            self._forward_pass(x_sample)
            predictions.append(self.xi[self.L].copy())

        return np.array(predictions)

    def loss_epochs(self):
        """
        Returns the evolution of training and validation error.

        Returns:
            np.array: Array of size (n_epochs, 2),
                      col 0 is train loss, col 1 is val loss.
        """
        return np.column_stack((self.train_loss_history, self.val_loss_history))

    # --------------- Internal Helper Methods ---------------

    def _forward_pass(self, x):
        """Executes the forward pass for a single sample."""

        if len(x) != self.n[1]:
            raise ValueError(f"Input sample size ({len(x)}) does not match input layer size ({self.n[1]})")
        self.xi[1] = x

        for l in range(2, self.L + 1):
            self.h[l] = self.w[l] @ self.xi[l-1] + self.theta[l]
            self.xi[l] = self._activation(self.h[l])

    def _backward_pass(self, y_true):
        """Executes the backward pass for a single sample."""

        y_true_val = np.array(y_true).item()

        error_signal = y_true_val - self.xi[self.L]

        f_prime_h_L = self._activation_derivative(self.h[self.L])

        self.delta[self.L] = error_signal * f_prime_h_L

        for l in range(self.L - 1, 1, -1):
            f_prime_h_l = self._activation_derivative(self.h[l])
            sum_term = self.delta[l+1] @ self.w[l+1]
            self.delta[l] = sum_term * f_prime_h_l

    def _update_weights(self):
        """Updates weights and thresholds using momentum."""

        for l in range(2, self.L + 1):

            self.dw[l] = self.lr * np.outer(self.delta[l], self.xi[l-1])
            self.dtheta[l] = self.lr * self.delta[l]

            self.w[l] += self.dw[l] + self.mu * self.d_w_prev[l]

            self.theta[l] += self.dtheta[l] + self.mu * self.d_theta_prev[l]

            self.d_w_prev[l] = self.dw[l]
            self.d_theta_prev[l] = self.dtheta[l]

    # --------------- Activation Functions & Derivatives ---------------

    def _activation(self, h):
        """Applies the selected activation function."""
        if self.fact_name == 'sigmoid':
            return 1 / (1 + np.exp(-h))
        elif self.fact_name == 'relu':
            return np.maximum(0, h)
        elif self.fact_name == 'linear':
            return h
        elif self.fact_name == 'tanh':
            return np.tanh(h)
        else:
            raise ValueError(f"Unknown activation function: {self.fact_name}")

    def _activation_derivative(self, h):
        """Calculates the derivative f'(h) of the activation function."""
        if self.fact_name == 'sigmoid':
            f_h = 1 / (1 + np.exp(-h))
            return f_h * (1 - f_h)
        elif self.fact_name == 'relu':
            return (h > 0) * 1.0
        elif self.fact_name == 'linear':
            return np.ones_like(h)
        elif self.fact_name == 'tanh':
            f_h = np.tanh(h)
            return 1 - f_h**2
        else:
            raise ValueError(f"Unknown activation function: {self.fact_name}")

    def _mse_loss(self, y_true, y_pred):
        """Calculates Mean Squared Error (MSE)"""
        return mean_squared_error(y_true, y_pred)

# --- Example Usage (for testing) ---
if __name__ == "__main__":

    print("--- Testing NeuralNet.py ---")

    # 1. Prepare simple data (XOR-like regression)
    X = np.array([
        [0, 0],
        [0, 1],
        [1, 0],
        [1, 1]
    ])
    y = np.array([0, 1, 1, 0])

    print(f"Test data X shape: {X.shape}")
    print(f"Test data y shape: {y.shape}")

    # 2. Define network parameters
    architecture = [2, 4, 1]
    epochs = 5000
    lr = 0.1
    momentum = 0.9
    act_func = 'sigmoid'
    val_split = 0.0

    # 3. Create and train the network
    nn = NeuralNet(
        network_architecture=architecture,
        n_epochs=epochs,
        learning_rate=lr,
        momentum=momentum,
        activation_function=act_func,
        validation_split=val_split
    )

    print("\nStarting training...")
    nn.fit(X, y)
    print("Training complete.")

    # 4. Make predictions
    predictions = nn.predict(X)

    print("\n--- Predictions ---")
    for x_in, y_t, y_p in zip(X, y, predictions):
        print(f"Input: {x_in} | Target: {y_t} | Predicted: {y_p[0]:.4f}")

    # 5. Get loss history
    loss_history = nn.loss_epochs()
    print(f"\nLoss history shape: {loss_history.shape}")
    print("Final training loss:", loss_history[-1, 0])
    print("Final validation loss:", loss_history[-1, 1])