In [None]:
import numpy as np
import matplotlib.pyplot as plt

```mermaid
graph TD
    subgraph Inputs
        I1((x1))
        I2((x2))
    end

    subgraph Hidden_Layer
        H1[H1]
        H2[H2]
    end

    subgraph Outputs
        O((y_hat))
        Y((y))
        L((Loss))
    end

    I1 --> H1 & H2
    I2 --> H1 & H2
    H1 & H2 --> O
    O & Y --> L

    subgraph Backpropagation
        L --> BO((dO))
        BO --> BA1((dA1)) & BA2((dA2))
        BA1 --> BW1((dW1))
        BA2 --> BW2((dW2))
        BW1 & BW2 --> BI1((dx1)) & BI2((dx2))
    end

    BI1 -.-> I1
    BI2 -.-> I2

    E1["Inputs:
    x1, x2: Input features"]
    E2["Hidden Layer:
    H1, H2: Hidden neurons
    Each includes:
    - Weighted Sum
    - Activation function"]
    E3["Outputs:
    y_hat: Predicted output
    y: True label
    Loss: Measure of prediction error"]
    E4["Backpropagation:
    dO: Gradient of loss w.r.t. output
    dA1, dA2: Gradients w.r.t. activations
    dW1, dW2: Gradients w.r.t. weighted sums
    dx1, dx2: Gradients w.r.t. inputs"]

    E1 -.-> Inputs
    E2 -.-> Hidden_Layer
    E3 -.-> Outputs
    E4 -.-> Backpropagation

    classDef default fill:#f5f5f5,stroke:#333,stroke-width:1px;
    classDef input fill:#333,stroke:#000,stroke-width:2px,color:#fff;
    classDef hidden fill:#ffeeba,stroke:#d39e00,stroke-width:2px,color:#000;
    classDef output fill:#d5f5e3,stroke:#1e8449,stroke-width:2px,color:#000;
    classDef backprop fill:#f5b7b1,stroke:#922b21,stroke-width:2px,color:#000;
    classDef explanation fill:#e8e8e8,stroke:#333,stroke-width:1px,color:#333;

    class I1,I2 input;
    class H1,H2 hidden;
    class O,Y,L output;
    class BO,BA1,BA2,BW1,BW2,BI1,BI2 backprop;
    class E1,E2,E3,E4 explanation;
```

In [1]:
def init_params(layer_dims):
    """_summary_

    Args:
        layer_dims (_type_): Dimensions of the layers in the Neural Network

    Returns:
        dict: Randomly initialized weights and biases for the Neural Network
    """
    np.random.seed(3)
    params = {}
    L = len(layer_dims)

    for l in range(1, L):
        params['W'+str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1])*0.01
        params['b'+str(l)] = np.zeros((layer_dims[l], 1)) # Bias initilized as Zero

    return params

In [2]:
# Z (linear hypothesis) - Z = W*X + b , where
# * - dot product,
# W - weight matrix, b- bias vector, X- Input 
# This is a linear function called Sigmoid ativation function.
# It is used to introduce non-linearity in the model.
# Z can be a scalar a vector or a matrix.

# In this case Z = -1 dot Z + b
def sigmoid(Z):
    """ This function computes the sigmoid of Z that in the NN context is the activation function of a neuron.
        It is used to introduce non-linearity in the model.

    Args:
        Z (np.array): This is the linear hypothesis of the model.

    Returns:
        np.array, list: The sigmoid of Z and the cache of Z
    """
    A = 1/(1+np.exp(np.dot(-1, Z))) # The dot product multiple the matrix with -1.
    cache = (Z)

    return A, cache

NameError: name 'np' is not defined

In [None]:
def forward_prop(X, params):
    """ This function computes the forward propagation of the Neural Network.
        It computes the linear hypothesis and applies the sigmoid activation function to the linear hypothesis.
    
    Args:
        X (np.array): The training data
        params (dict): The parameters of the Neural Network
    Returns:
        np.array, list: The output of the Neural Network and the caches of the linear and activation functions 
    """    

    A = X # input to first layer i.e. training data
    caches = []
    L = len(params)//2 # The number of layers in the network. Each layer has a weight and a bias, so the total number of parameters is twice the number of layers.
    print(f'Length of params: {len(params)}, L: {L}, Params: {params}')
    for l in range(1, L+1):
        A_prev = A

        # Debug
        print(f"Shape of W{str(l)}): {params['W'+str(l)].shape}")
        print(f"Shape of A_prev: {A_prev.shape}")
        
        # Linear Hypothesis - Using the formula Z = W*X + b
        print(f'Weight {"W"+str(l)}: {params["W"+str(l)]}, A_prev: {A_prev}, Bias: {params["b"+str(l)]}')
        Z = np.dot(params['W'+str(l)], A_prev) + params['b'+str(l)] 

        # Storing the linear cache
        linear_cache = (A_prev, params['W'+str(l)], params['b'+str(l)]) 
        print(f'Linear Cache: {linear_cache}')
        # Applying sigmoid on linear hypothesis
        A, activation_cache = sigmoid(Z) 
        print(f'Sigmoid: {A}, Activation Cache: {activation_cache}')
         # storing the both linear and activation cache
        cache = (linear_cache, activation_cache)
        caches.append(cache)

    return A, caches
