# Advanced Exercise Project Manual Neural Networks 1

I found this notebook online earlier in the semester, but unfortunately I didn’t save the link. I still wanted to include it because I learned a lot from going through it.

This notebook contains a simple neural network with two layers:

- The first layer has 4 inputs and 2 neurons.

- The second layer takes those 2 values and passes them to 3 output neurons.

ReLU activation is used after the first layer and SoftMax at the end to get class probabilities.

Then the code uses multiple loss functions, namely:

- Categorical cross-entropy

- Binary cross-entropy

- KL divergence

- Hinge loss

Each one was calculated manually using NumPy,which is very impressive.


I found this useful for comparing different ways of measuring how wrong the model’s predictions were. For example, the categorical crossentropy loss was about 0.33, and the binary cross-entropy was lower, around 0.19. These made me understand how the models accuracy can change depending on the task and on the loss function used.

What I learned from this notebook is how neural network layers are structured and how they transform input into output. I also noticed that the SoftMax layer was very sensitive to small changes in weights or inputs.



In [None]:
# Seed for reproducibility
np.random.seed(10)

# Input data
X = [[1.0, 2.0, 3.0, 4.0], [2.0, 3.0, 4.0, 5.0], [3.0, 4.0, 5.0, 6.0]]

# Define a dense layer class
class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        # Randomly initialize weights, scaled to 0.10
        self.weights = 0.10 * np.random.randn(n_inputs, n_neurons)
        # Initialize biases as zeros
        self.biases = np.zeros((1, n_neurons))

    def forward(self, inputs):
        # Forward pass: calculate weighted sum (Z = XW + B)
        self.output = np.dot(inputs, self.weights) + self.biases

# Define the ReLU activation function
class Activation_ReLU:
    def forward(self, inputs):
        # ReLU: Output is max(0, input)
        self.output = np.maximum(0, inputs)

# Define the SoftMax activation function
class Activation_SoftMax:
    def forward(self, inputs):
        # Stabilize inputs to avoid large exponentials
        exp_values = np.exp(inputs - np.max(inputs, axis=-1, keepdims=True))
        # Normalize by sum of exponentials
        self.output = exp_values / np.sum(exp_values, axis=-1, keepdims=True)

# Define a categorical cross-entropy loss function
class Loss_CategoricalCrossEntropy:
    def forward(self, predicted_prob, true_labels):
        epsilon = 1e-15  # Small constant to avoid log(0)
        # Clip predicted probabilities to avoid log(0) or log(1)
        clipped = np.clip(predicted_prob, epsilon, 1 - epsilon)
        # Calculate loss based on true labels
        if len(true_labels.shape) == 1:
            # If labels are not one-hot encoded, use indices
            loss = -np.log(clipped[range(len(predicted_prob)), true_labels])
        elif len(true_labels.shape) == 2:
            # If labels are one-hot encoded
            loss = -np.sum(clipped * true_labels, axis=1)
        # Return the mean loss
        return np.mean(loss)

# Step 1: Define layers
layer1 = Layer_Dense(4, 2)  # First layer with 4 inputs and 2 neurons
activation1 = Activation_ReLU()  # Activation function for the first layer

layer2 = Layer_Dense(2, 3)  # Second layer with 2 inputs and 3 neurons
activation2 = Activation_SoftMax()  # Activation function for the second layer

# Step 2: Forward propagation
# Forward pass through first layer and activation
layer1.forward(X)
activation1.forward(layer1.output)

# Forward pass through second layer and activation
layer2.forward(activation1.output)
activation2.forward(layer2.output)

# Print outputs after second activation
print("Final Output Probabilities (SoftMax):\n", activation2.output)

# Step 3: Loss calculation
# Example true labels (one-hot encoded)
true_labels = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]])

# Initialize loss function
loss_function = Loss_CategoricalCrossEntropy()
# Calculate loss
loss = loss_function.forward(activation2.output, true_labels)
print("Categorical Cross-Entropy Loss:", loss)

# Additional: Binary Cross-Entropy Loss Function
def binary_cross_entropy(y, y_hat):
    epsilon = 1e-15  # Small constant to avoid log(0)
    y_hat = np.clip(y_hat, epsilon, 1 - epsilon)  # Clip predicted probabilities
    loss = - (y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))  # BCE formula
    return np.mean(loss)  # Return mean loss

# Example for binary classification
y_binary = np.array([1, 0, 1, 0])  # True labels
y_hat_binary = np.array([0.9, 0.1, 0.8, 0.3])  # Predicted probabilities
binary_loss = binary_cross_entropy(y_binary, y_hat_binary)
print("Binary Cross-Entropy Loss:", binary_loss)

# Additional: KL Divergence
def kl_divergence(p, q):
    epsilon = 1e-15
    p = np.clip(p, epsilon, 1 - epsilon)
    q = np.clip(q, epsilon, 1 - epsilon)
    return np.sum(p * np.log(p / q))

# Example distributions
p_distribution = np.array([0.2, 0.5, 0.3])
q_distribution = np.array([0.3, 0.4, 0.3])
kl_loss = kl_divergence(p_distribution, q_distribution)
print("KL Divergence:", kl_loss)

# Additional: Hinge Loss
def hinge_loss(y_true, raw_output):
    return max(0, 1 - y_true * raw_output)

# Example for hinge loss
y_true_hinge = 1
raw_output_hinge = 0.5
hinge_loss_value = hinge_loss(y_true_hinge, raw_output_hinge)
print("Hinge Loss:", hinge_loss_value)


Final Output Probabilities (SoftMax):
 [[0.3330081  0.33231371 0.33467819]
 [0.3328193  0.33172567 0.33545502]
 [0.33262964 0.3311377  0.33623267]]
Categorical Cross-Entropy Loss: -0.3317256915856903
Binary Cross-Entropy Loss: 0.19763488164214868
KL Divergence: 0.030478754035472025
Hinge Loss: 0.5
