#### Forward Till MSE Loss Function

In [11]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# MSE Function
def mean_square_error(predicted, actual):
    return np.sum(np.square(predicted - actual))/2

# Layer 1
X = np.array([0.5, 0.2, 0.1])
W1 = np.array([[0.1, 0.2, 0.3], 
               [0.4, 0.5, 0.6], 
               [0.7, 0.8, 0.9]])
b1 = np.array([0.1, 0.2, 0.3])

#  Output Layer 2
W2 = np.array([[0.1, 0.4], 
               [0.2, 0.5], 
               [0.3, 0.6]])
b2 = np.array([0.1, 0.2])

Z1 = np.dot(X, W1) + b1
print("Summation at Hidden Layer (Z1):",Z1)
A1 = sigmoid(Z1)
print("Output of Hidden Layer (A1):",A1)

Z2 = np.dot(A1, W2) + b2
print("Summation at Output Layer (Z2):",Z2)
A2 = sigmoid(Z2)
print("Final Output (A2):",A2)

Y = np.array([1, 0])

# Compute MSE Loss
loss = mean_square_error(A2, Y)
print("Mean Squared Error Loss:", loss)

Summation at Hidden Layer (Z1): [0.3  0.48 0.66]
Output of Hidden Layer (A1): [0.57444252 0.61774787 0.65926039]
Summation at Output Layer (Z2): [0.47877194 1.13420718]
Final Output (A2): [0.61745785 0.75661448]
Mean Squared Error Loss: 0.3594019879097132


#### Feed-Forward Propagation
##### 3 input neuron
##### 3 Hidden Layer(4 neuron in each layer)
##### 1 output neuron
##### Use Tanh Activation Function and Sigmoid Activation Function 

In [13]:
import numpy as np

def tanh(X): # Use at Hidden layer
    return (np.exp(X)-np.exp(-X))/(np.exp(X)+np.exp(-X))

def sigmoid(X): # Use on Output layer
    return 1 / (1 + np.exp(-X))

# MSE Function
def mean_square_error(predicted, actual):
    return np.sum(np.square(predicted - actual))/2

# Forward Prapogation
def feed_forward(X, weights, biases):
    input_layer = X
    print("Input (A0) : ", input_layer)
    
    for i in range(len(weights) -1):
        Z = np.dot(input_layer, weights[i]) + biases[i].reshape(-1)
        A = tanh(Z)
        print(f"\nLayer {i+1} (Hidden Layer):")
        print(f"Z{i+1}: {Z}")
        print(f"A{i+1} (after tanh): {A}")
        input_layer = A
        
    # Output layer
    Z_output = np.dot(input_layer, weights[-1]) + biases[-1].reshape(-1)
    A_output = sigmoid(Z_output)
    print(f"\nOutput Layer:")
    print(f"Z_out: {Z_output}")
    print(f"Final Output (after sigmoid): {A_output}")

    return A_output

# Inputs
X = np.array([0.6, 0.8, 0.3])

weights = [
    np.array([[0.2, 0.5, 0.3, 0.7],
              [0.4, 0.1, 0.8, 0.2],
              [0.6, 0.9, 0.5, 0.3]]),
    np.array([[0.1, 0.3, 0.6, 0.8],
              [0.5, 0.2, 0.7, 0.4],
              [0.9, 0.8, 0.4, 0.5],
              [0.3, 0.7, 0.2, 0.1]]),
    np.array([[0.7, 0.5, 0.4, 0.2],
              [0.3, 0.8, 0.6, 0.1],
              [0.9, 0.2, 0.7, 0.3],
              [0.5, 0.4, 0.1, 0.8]]),
    np.array([[0.6],
              [0.2],
              [0.7],
              [0.4]])
]

biases = [
    np.array([0.2, 0.2, 0.2, 0.2]),
    np.array([0.3, 0.3, 0.3, 0.3]),
    np.array([0.1, 0.1, 0.1, 0.1]),
    np.array([0.4])
]


output = feed_forward(X, weights, biases)
print("Final Output:", output)

Y = np.array([1, 0])

# Compute MSE Loss
loss = mean_square_error(output, Y)
print("\nMean Squared Error Loss:", loss)

Input (A0) :  [0.6 0.8 0.3]

Layer 1 (Hidden Layer):
Z1: [0.82 0.85 1.17 0.87]
A1 (after tanh): [0.67506987 0.69106947 0.82427217 0.70137413]

Layer 2 (Hidden Layer):
Z2: [1.66529891 1.79111448 1.65877425 1.59875719]
A2 (after tanh): [0.93092742 0.94587806 0.93005189 0.92148126]

Layer 3 (Hidden Layer):
Z3: [2.33319994 1.87676904 1.78308225 1.39697387]
A3 (after tanh): [0.98136316 0.95420385 0.94502569 0.88469579]

Output Layer:
Z_out: [2.19505496]
Final Output (after sigmoid): [0.89980457]
Final Output: [0.89980457]

Mean Squared Error Loss: 0.4098436902862408


#### Backword Propagation

In [26]:
# FORWARD PROPAGATION

import numpy as np

def tanh(X): # Use at Hidden layer
    return (np.exp(X)-np.exp(-X))/(np.exp(X)+np.exp(-X))

def sigmoid(X): # Use on Output layer
    return 1 / (1 + np.exp(-X))

# MSE Function
def mean_square_error(predicted, actual):
    return np.sum(np.square(predicted - actual))/2

# Forward Propagation
def feed_forward(X, weights, biases):
    input_layer = X
    Zs = []
    As = []
    print("Input (A0) : ", input_layer)
    
    for i in range(len(weights) -1):
        Z = np.dot(input_layer, weights[i]) + biases[i].reshape(-1)
        A = tanh(Z)
        print(f"\nLayer {i+1} (Hidden Layer):")
        print(f"Z{i+1}: {Z}")
        print(f"A{i+1} (after tanh): {A}")
        Zs.append(Z)
        As.append(A)
        input_layer = A
        
    # Output layer
    Z_output = np.dot(input_layer, weights[-1]) + biases[-1].reshape(-1)
    A_output = sigmoid(Z_output)
    print(f"\nOutput Layer:")
    print(f"Z_out: {Z_output}")
    print(f"Final Output (after sigmoid): {A_output}")

    Zs.append(Z_output)
    As.append(A_output)

    return A_output, {"Zs":Zs, "As": As}


# BACK PROPAGATION
def back_propagation(X, Y, weights, biases, forward_cache, learning_rate):
    Zs, As = forward_cache["Zs"], forward_cache["As"]

    grads_W = []
    grads_b = []
    
    # Output Layer Error
    A_output = As[-1]
    Z_output = Zs[-1]
    
    dA = A_output - Y
    dZ = dA * sigmoid(Z_output)

    dW = np.outer(As[-2], dZ)
    db = dZ
    grads_W.insert(0, dW)
    grads_b.insert(0, db)

    # Backprop through hidden layers (reversed)
    for i in reversed(range(len(Zs) - 1)):
        dA = np.dot(dZ, weights[i+1].T)
        dZ = dA * (1-np.tanh(Zs[i])**2)
        
        A_prev = X if i == 0 else As[i-1]
        dW = np.outer(A_prev, dZ)
        db = dZ
        
        grads_W.insert(0, dW)
        grads_b.insert(0, db)
        

    # Update weights and biases
    for i in range(len(weights)):
        weights[i] -= learning_rate * grads_W[i]
        biases[i] -= learning_rate * grads_b[i]

    return grads_W, grads_b, weights, biases

# Inputs
X = np.array([0.6, 0.8, 0.3])

Y = np.array([1, 0])
learning_rate = 0.1 # Given

weights = [
    np.array([[0.2, 0.5, 0.3, 0.7],
              [0.4, 0.1, 0.8, 0.2],
              [0.6, 0.9, 0.5, 0.3]]),
    np.array([[0.1, 0.3, 0.6, 0.8],
              [0.5, 0.2, 0.7, 0.4],
              [0.9, 0.8, 0.4, 0.5],
              [0.3, 0.7, 0.2, 0.1]]),
    np.array([[0.7, 0.5, 0.4, 0.2],
              [0.3, 0.8, 0.6, 0.1],
              [0.9, 0.2, 0.7, 0.3],
              [0.5, 0.4, 0.1, 0.8]]),
    np.array([[0.6, 0.3],
              [0.2, 0.7],
              [0.7, 0.5],
              [0.4, 0.6]])
]

biases = [
    np.array([0.2, 0.2, 0.2, 0.2]),
    np.array([0.3, 0.3, 0.3, 0.3]),
    np.array([0.1, 0.1, 0.1, 0.1]),
    np.array([0.2, 0.2])
]


output, cache = feed_forward(X, weights, biases)
print("Final Output:", output)

# Compute MSE Loss
loss = mean_square_error(output, Y)
print("\nMean Squared Error Loss:", loss)

# Backpropagation and Update
grads_W, grads_b, updated_weights, updated_biases = back_propagation(X, Y, weights, biases, cache, learning_rate)

# Print Gradients
print("\nGradients for Weights:")
for i, g in enumerate(grads_W):
    print(f"dW{i+1}:\n", g)

print("\nGradients for Biases:")
for i, g in enumerate(grads_b):
    print(f"db{i+1}:\n", g)

# Print updated weights
print("\nUpdated Weights:")
for i, w in enumerate(updated_weights):
    print(f"W{i+1}:\n", w)

print("\nUpdated Biases:")
for i, b in enumerate(updated_biases):
    print(f"b{i+1}:\n", b)


Input (A0) :  [0.6 0.8 0.3]

Layer 1 (Hidden Layer):
Z1: [0.82 0.85 1.17 0.87]
A1 (after tanh): [0.67506987 0.69106947 0.82427217 0.70137413]

Layer 2 (Hidden Layer):
Z2: [1.66529891 1.79111448 1.65877425 1.59875719]
A2 (after tanh): [0.93092742 0.94587806 0.93005189 0.92148126]

Layer 3 (Hidden Layer):
Z3: [2.33319994 1.87676904 1.78308225 1.39697387]
A3 (after tanh): [0.98136316 0.95420385 0.94502569 0.88469579]

Output Layer:
Z_out: [1.99505496 2.16568196]
Final Output (after sigmoid): [0.8802769  0.89712513]
Final Output: [0.8802769  0.89712513]

Mean Squared Error Loss: 0.4095835597653488

Gradients for Weights:
dW1:
 [[0.00688561 0.00575783 0.00480007 0.00340393]
 [0.00918081 0.00767711 0.0064001  0.00453857]
 [0.0034428  0.00287892 0.00240004 0.00170197]]
dW2:
 [[0.00559    0.00508069 0.00628497 0.01047436]
 [0.00572249 0.0052011  0.00643393 0.01072261]
 [0.00682549 0.00620361 0.00767406 0.01278937]
 [0.00580781 0.00527866 0.00652986 0.01088249]]
dW3:
 [[0.00612632 0.0451813  0.