<a href="https://colab.research.google.com/github/zzc029498-max/nec-/blob/main/Back_Propagation_Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

class NeuralNet:
    """
    Implementation of a Back-Propagation (BP) Neural Network from scratch.
    This version uses 0-based indexing for layers, based on the provided skeleton.
    """

    def __init__(self, network_architecture, n_epochs, learning_rate, momentum, activation_function, validation_split):
        """
        Initializes the Neural Network.

        Args:
            network_architecture (list): Units per layer, e.g., [3, 9, 5, 1].
                                         Index 0 is input layer, last index is output layer.
            n_epochs (int): Number of training epochs.
            learning_rate (float): Learning rate (eta).
            momentum (float): Momentum (mu).
            activation_function (str): 'sigmoid', 'relu', 'linear', 'tanh'.
            validation_split (float): Percentage of data for validation (0.0 to 1.0).
        """

        # --- 1. Initialize parameters ---
        self.n_epochs = n_epochs
        self.lr = learning_rate
        self.mu = momentum
        self.fact_name = activation_function
        self.validation_split = validation_split

        # --- 2. Initialize network structure (using 0-based indexing) ---
        self.L = len(network_architecture) # Total number of layers
        self.n = network_architecture.copy() # Units per layer

        # Array to store activations (xi) and weighted sum inputs (h)
        self.xi = [None] * self.L
        self.h = [None] * self.L

        # Array to store delta errors
        self.delta = [None] * self.L

        # Weights (w) and Biases (theta) - Indices L-1 for weights connecting L-1 to L
        self.w = [None] * self.L
        self.theta = [None] * self.L

        # Delta weight/bias changes (for current step and previous step)
        self.dw = [None] * self.L
        self.dtheta = [None] * self.L
        self.d_w_prev = [None] * self.L
        self.d_theta_prev = [None] * self.L

        # --- 3. Initialize weights and arrays ---
        for l in range(self.L):
            n_units = self.n[l]
            self.xi[l] = np.zeros(n_units)
            self.h[l] = np.zeros(n_units)
            self.delta[l] = np.zeros(n_units)

            # Weights and Biases (only for layers l > 0, connecting l-1 to l)
            if l > 0:
                n_units_prev = self.n[l-1]

                # Biases (Theta)
                self.theta[l] = np.zeros(n_units)
                self.dtheta[l] = np.zeros(n_units)
                self.d_theta_prev[l] = np.zeros(n_units)

                # Weights Initialization (Glorot/Xavier for Tanh/Sigmoid, He for ReLU)
                if self.fact_name == 'relu':
                    # He Initialization (Normal distribution)
                    limit = np.sqrt(2 / n_units_prev)
                    self.w[l] = np.random.normal(0, limit, (n_units, n_units_prev))
                else:
                    # Glorot/Xavier Initialization (Uniform distribution)
                    limit = np.sqrt(6 / (n_units_prev + n_units))
                    self.w[l] = np.random.uniform(-limit, limit, (n_units, n_units_prev))

                # Initialize delta weight arrays
                self.dw[l] = np.zeros((n_units, n_units_prev))
                self.d_w_prev[l] = np.zeros((n_units, n_units_prev))

        # --- 4. Loss history ---
        self.train_loss_history = []
        self.val_loss_history = []

    def fit(self, X, y):
        """
        Trains the network using input data X and target y.
        """

        # --- 1. Split training/validation data ---
        if self.validation_split > 0:
            X_train, X_val, y_train, y_val = train_test_split(
                X, y, test_size=self.validation_split, shuffle=True
            )
        else:
            X_train, y_train = X, y
            X_val, y_val = None, None

        n_samples = X_train.shape[0]
        output_layer_index = self.L - 1

        # --- 2. Epoch loop ---
        for epoch in range(self.n_epochs):

            train_epoch_losses = []

            indices = np.random.permutation(n_samples)
            X_train_shuffled = X_train[indices]
            y_train_shuffled = y_train[indices]

            # --- 3. Sample loop (Stochastic Gradient Descent) ---
            for i in range(n_samples):
                x_sample = X_train_shuffled[i]
                y_sample = y_train_shuffled[i]

                self._forward_pass(x_sample)

                prediction = self.xi[output_layer_index]
                train_epoch_losses.append(self._mse_loss(np.atleast_1d(y_sample), prediction))

                self._backward_pass(y_sample)

                self._update_weights()

            # --- 4. Calculate and store epoch losses ---
            avg_train_loss = np.mean(train_epoch_losses)
            self.train_loss_history.append(avg_train_loss)

            if X_val is not None:
                y_val_pred = self.predict(X_val)
                avg_val_loss = self._mse_loss(y_val, y_val_pred.ravel())
                self.val_loss_history.append(avg_val_loss)
            else:
                self.val_loss_history.append(np.nan)

            if (epoch + 1) % 100 == 0:
                print(f"Epoch {epoch + 1}/{self.n_epochs} - "
                      f"Train Loss: {avg_train_loss:.6f} - "
                      f"Val Loss: {self.val_loss_history[-1]:.6f}")

    def predict(self, X):
        """
        Performs predictions on input data X.
        """
        predictions = []
        for x_sample in X:
            self._forward_pass(x_sample)
            predictions.append(self.xi[self.L - 1].copy()) # Get activation from the last layer

        return np.array(predictions)

    def loss_epochs(self):
        """
        Returns the evolution of training and validation error.
        """
        return np.column_stack((self.train_loss_history, self.val_loss_history))

    # --------------- Internal Helper Methods ---------------

    def _forward_pass(self, x):
        """Executes the forward pass for a single sample."""

        output_layer_index = self.L - 1

        if len(x) != self.n[0]:
            raise ValueError(f"Input sample size ({len(x)}) does not match input layer size ({self.n[0]})")

        self.xi[0] = x # Set input layer activation

        # Iterate from the first hidden layer (index 1) up to the output layer (index L-1)
        for l in range(1, self.L):

            # Calculate weighted input sum h[l] = w[l] * xi[l-1] + theta[l]
            self.h[l] = self.w[l] @ self.xi[l-1] + self.theta[l]

            # Use activation function
            if l < output_layer_index:
                # Hidden layers use the specified activation function
                self.xi[l] = self._activation(self.h[l])
            else:
                # Output layer (l = output_layer_index) for regression uses LINEAR activation
                self.xi[l] = self.h[l]

    def _backward_pass(self, y_true):
        """Executes the backward pass for a single sample."""

        y_true_arr = np.atleast_1d(y_true)
        output_layer_index = self.L - 1

        # --- 1. Output Layer Delta (L-1) ---
        # Error signal
        error_signal = y_true_arr - self.xi[output_layer_index]

        # Output layer is Linear (regression), so f'(h_L) = 1
        self.delta[output_layer_index] = error_signal * np.ones_like(self.h[output_layer_index])


        # --- 2. Hidden Layers Delta (L-2 down to 1) ---
        # Iterate from the last hidden layer (L-2) down to the first hidden layer (1)
        for l in range(output_layer_index - 1, 0, -1):

            f_prime_h_l = self._activation_derivative(self.h[l])

            # Sum term: delta[l+1] * w[l+1]
            sum_term = self.delta[l+1] @ self.w[l+1]

            self.delta[l] = sum_term * f_prime_h_l

    def _update_weights(self):
        """Updates weights and thresholds using momentum."""

        # Iterate from the first connection (index 1) up to the last connection (index L-1)
        for l in range(1, self.L):

            # Calculate current delta weight and delta bias
            self.dw[l] = self.lr * np.outer(self.delta[l], self.xi[l-1])
            self.dtheta[l] = self.lr * self.delta[l]

            # Update weights and biases with momentum
            self.w[l] += self.dw[l] + self.mu * self.d_w_prev[l]
            self.theta[l] += self.dtheta[l] + self.mu * self.d_theta_prev[l]

            # Store current delta values for next epoch's momentum term
            self.d_w_prev[l] = self.dw[l].copy()
            self.d_theta_prev[l] = self.dtheta[l].copy()

    # --------------- Activation Functions & Derivatives ---------------

    def _activation(self, h):
        """Applies the selected activation function to hidden layers."""
        if self.fact_name == 'sigmoid':
            return 1 / (1 + np.exp(-h))
        elif self.fact_name == 'relu':
            return np.maximum(0, h)
        elif self.fact_name == 'tanh':
            return np.tanh(h)
        else:
            # Linear is handled by _forward_pass for the output layer,
            # but we include it here for completeness/hidden layers if chosen
            return h

    def _activation_derivative(self, h):
        """Calculates the derivative f'(h) of the activation function for hidden layers."""
        if self.fact_name == 'sigmoid':
            f_h = self._activation(h)
            return f_h * (1 - f_h)
        elif self.fact_name == 'relu':
            return (h > 0) * 1.0 # 1 if h > 0, 0 otherwise
        elif self.fact_name == 'tanh':
            f_h = self._activation(h)
            return 1 - f_h**2
        else:
            return np.ones_like(h) # Derivative of linear is 1

    def _mse_loss(self, y_true, y_pred):
        """Calculates Mean Squared Error (MSE)"""
        return mean_squared_error(y_true, y_pred)

# --- Example Usage (for testing) ---
if __name__ == "__main__":

    print("--- Testing NeuralNet.py (XOR Example) ---")

    # 1. Prepare simple data (XOR classification/regression)
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    y = np.array([0, 1, 1, 0])

    # 2. Define network parameters
    architecture = [2, 4, 1] # Input: 2, Hidden: 4, Output: 1
    epochs = 5000
    lr = 0.1
    momentum = 0.9
    act_func = 'sigmoid' # Sigmoid works well for XOR
    val_split = 0.0

    # 3. Create and train the network
    nn = NeuralNet(
        network_architecture=architecture,
        n_epochs=epochs,
        learning_rate=lr,
        momentum=momentum,
        activation_function=act_func,
        validation_split=val_split
    )

    print(f"\nArchitecture: {architecture}, Activation: {act_func}, LR: {lr}, Momentum: {momentum}")
    print("Starting training...")
    nn.fit(X, y)
    print("Training complete.")

    # 4. Make predictions
    predictions = nn.predict(X)

    print("\n--- Predictions ---")
    for x_in, y_t, y_p in zip(X, y, predictions):
        print(f"Input: {x_in} | Target: {y_t} | Predicted: {y_p[0]:.4f}")

    final_loss = nn.loss_epochs()[-1, 0]
    print(f"\nFinal training loss: {final_loss:.6f}")

    if final_loss < 0.01:
        print("Test Passed: XOR learned successfully.")
    else:
        print("Test Failed: Loss is too high.")