# Calculating Network Error with Loss

In [5]:
import math
import numpy as np

In [3]:
# an example output from the output layer of the neural network
softmax_output = [0.7, 0.1, 0.2]
# Ground truth
target_output = [1, 0, 0]

loss = -(
    math.log(softmax_output[0]) * target_output[0]
    + math.log(softmax_output[1]) * target_output[1]
    + math.log(softmax_output[2]) * target_output[2]
)

print(loss)


# similarly
loss = -math.log(softmax_output[0])
print(loss)

0.35667494393873245
0.35667494393873245


The Categorical Cross-Entropy Loss accounts for the fact that the output is a probability distribution and outputs a larger loss the lower the confidence is.

In [4]:
print(math.log(1))
print(math.log(0.95))
print(math.log(0.9))
print(math.log(0.8))
print("...")
print(math.log(0.2))
print(math.log(0.1))
print(math.log(0.05))
print(math.log(0.01))

0.0
-0.05129329438755058
-0.10536051565782628
-0.2231435513142097
...
-1.6094379124341003
-2.3025850929940455
-2.995732273553991
-4.605170185988091


In [7]:
b = 5.2
print(np.log(b))
print(math.e ** np.log(b))

1.6486586255873816
5.199999999999999


In [8]:
softmax_output = [[0.7, 0.1, 0.2], [0.1, 0.5, 0.4], [0.02, 0.9, 0.08]]
class_targets = [0, 1, 1]

for targ_idx, distribution in zip(class_targets, softmax_output):
    print(distribution[targ_idx])

0.7
0.5
0.9


In [9]:
# same using numpy
softmax_output = np.array([[0.7, 0.1, 0.2], [0.1, 0.5, 0.4], [0.02, 0.9, 0.08]])
class_targets = [0, 1, 1]

print(softmax_output[[0, 1, 2], class_targets])

[0.7 0.5 0.9]


In [12]:
softmax_output[range(len(softmax_output)), class_targets]

array([0.7, 0.5, 0.9])

In [15]:
# Now we apply the negative log to this list
neg_log = -np.log(softmax_output[range(len(softmax_output)), class_targets])
average_loss = np.mean(neg_log)
print(average_loss)

0.38506088005216804


In [17]:
np.sum(softmax_output * class_targets, axis=1)

array([0.3 , 0.9 , 0.98])

In [21]:
# Handle if target are not sparse but one-hot encoded
softmax_output = np.array([[0.7, 0.1, 0.2], [0.1, 0.5, 0.4], [0.02, 0.9, 0.08]])
class_targets = np.array([[1, 0, 0], [0, 1, 0], [0, 1, 0]])

# Probabilities for target values only if categorical labels
if len(class_targets.shape) == 1:
    correct_confidences = softmax_output[range(len(softmax_output)), class_targets]

# Mask values - only for one-hot encoded labels
elif len(class_targets.shape) == 2:
    correct_confidences = np.sum(softmax_output * class_targets, axis=1)

# Losses
neg_log = -np.log(correct_confidences)
average_loss = np.mean(neg_log)
print(average_loss)

0.38506088005216804


## The Categorical Cross-Entropy Loss Class

In [19]:
# Common loss class
class Loss:
    # Calculates the data and regularization losses given model output and ground truth values
    def calculate(self, output, y):
        # Calculate sample losses
        sample_losses = self.forward(output, y)
        # Calculate mean loss
        data_loss = np.mean(sample_losses)
        # Return loss
        return data_loss


# Cross-entropy loss
class Loss_CategoricalCrossentropy(Loss):
    # Forward pass
    def forward(self, y_pred, y_true):
        # Number of samples in a batch
        samples = len(y_pred)

        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        # Probabilities for target values - only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]

        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)

        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods

In [22]:
loss_function = Loss_CategoricalCrossentropy()
loss = loss_function.calculate(softmax_output, class_targets)
print(loss)

0.38506088005216804


## Combining everything up to this point: 

In [23]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data

nnfs.init()


# Dense layer
class Layer_Dense:
    # Layer initialization
    def __init__(self, n_inputs, n_neurons):
        # Initialize weights and biases
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    # Forward pass
    def forward(self, inputs):
        # Calculate output values from inputs, weights and biases
        self.output = np.dot(inputs, self.weights) + self.biases


# ReLU activation
class Activation_ReLU:
    # Forward pass
    def forward(self, inputs):
        # Calculate output values from inputs
        self.output = np.maximum(0, inputs)


# Softmax activation
class Activation_Softmax:
    # Forward pass
    def forward(self, inputs):
        # Get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        # Normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)

        self.output = probabilities


# Common loss class
class Loss:
    # Calculates the data and regularization losses
    # given model output and ground truth values
    def calculate(self, output, y):
        # Calculate sample losses
        sample_losses = self.forward(output, y)
        print("Hey Um sample losees: ", sample_losses)

        # Calculate mean loss
        data_loss = np.mean(sample_losses)

        # Return loss
        return data_loss


# Cross-entropy loss
class Loss_CategoricalCrossentropy(Loss):
    # Forward pass
    def forward(self, y_pred, y_true):
        # Number of samples in a batch
        samples = len(y_pred)

        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        # Probabilities for target values -
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]

        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)

        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods


# Create dataset
X, y = spiral_data(samples=100, classes=3)

# Create Dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)

# Create ReLU activation (to be used with Dense layer):
activation1 = Activation_ReLU()

# Create second Dense layer with 3 input features (as we take output
# of previous layer here) and 3 output values
dense2 = Layer_Dense(3, 3)

# Create Softmax activation (to be used with Dense layer):
activation2 = Activation_Softmax()

# Create loss function
loss_function = Loss_CategoricalCrossentropy()

# Perform a forward pass of our training data through this layer
dense1.forward(X)

# Perform a forward pass through activation function
# it takes the output of first dense layer here
activation1.forward(dense1.output)


# Perform a forward pass through second Dense layer
# it takes outputs of activation function of first layer as inputs
dense2.forward(activation1.output)

# Perform a forward pass through activation function
# it takes the output of second dense layer here
activation2.forward(dense2.output)

# Let's see output of the first few samples:
print(activation2.output[:5])

# Perform a forward pass through loss function
# it takes the output of second dense layer here and returns loss
loss = loss_function.calculate(activation2.output, y)

# Print loss value
print("loss:", loss)


"""
>>>
[[0.33333334 0.33333334 0.33333334]
 [0.33333316 0.3333332  0.33333364]
 [0.33333287 0.3333329  0.33333418]
 [0.3333326  0.33333263 0.33333477]
 [0.33333233 0.3333324  0.33333528]]
loss: 1.0986104
"""

[[0.33333334 0.33333334 0.33333334]
 [0.33333316 0.3333332  0.33333364]
 [0.33333287 0.3333329  0.33333418]
 [0.3333326  0.33333263 0.33333477]
 [0.33333233 0.3333324  0.33333528]]
Hey Um sample losees:  [1.0986123 1.0986128 1.0986137 1.0986145 1.0986153 1.0986137 1.0986168
 1.0986168 1.0986178 1.0986191 1.0986198 1.098619  1.0986207 1.098622
 1.098622  1.0986223 1.0986184 1.0986241 1.0986224 1.098626  1.098626
 1.0986195 1.0986171 1.0986255 1.0986353 1.0986279 1.098618  1.0986184
 1.0986398 1.0986418 1.0986305 1.0986369 1.0986211 1.0986254 1.0986377
 1.098647  1.0986525 1.098653  1.0986506 1.098654  1.0986497 1.0986483
 1.0986478 1.0986323 1.0986618 1.098662  1.098664  1.0986414 1.0986663
 1.0986534 1.098662  1.0986348 1.0986499 1.0986599 1.0986315 1.0986141
 1.0986186 1.0986127 1.0986273 1.0986137 1.098617  1.098616  1.0986123
 1.098631  1.0986419 1.0986344 1.0986123 1.0986576 1.0986377 1.0986589
 1.0986655 1.0986648 1.0986633 1.0986542 1.0986674 1.0986677 1.0986685
 1.0986708 1.0986

'\n>>>\n[[0.33333334 0.33333334 0.33333334]\n [0.33333316 0.3333332  0.33333364]\n [0.33333287 0.3333329  0.33333418]\n [0.3333326  0.33333263 0.33333477]\n [0.33333233 0.3333324  0.33333528]]\nloss: 1.0986104\n'

## Accuracy Calculation

In [26]:
# Probabilities of 3 samples
softmax_output = np.array([[0.7, 0.2, 0.1], [0.5, 0.1, 0.4], [0.02, 0.9, 0.08]])

# Target (ground-truth) labels for 3 samples
class_targets = np.array([0, 1, 1])

# Calculate values along second axis (axis of index 1)
predictions = np.argmax(softmax_output, axis=1)
# If targets are one-hot encoded - convert them
if len(class_targets.shape) == 2:
    class_targets = np.argmax(class_targets, axis=1)

# True evaluates to 1; False to 0
accuracy = np.mean(predictions == class_targets)
print("acc:", accuracy)

acc: 0.6666666666666666


In [27]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data

nnfs.init()


# Dense layer
class Layer_Dense:
    # Layer initialization
    def __init__(self, n_inputs, n_neurons):
        # Initialize weights and biases
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    # Forward pass
    def forward(self, inputs):
        # Calculate output values from inputs, weights and biases
        self.output = np.dot(inputs, self.weights) + self.biases


# ReLU activation
class Activation_ReLU:
    # Forward pass
    def forward(self, inputs):
        # Calculate output values from inputs
        self.output = np.maximum(0, inputs)


# Softmax activation
class Activation_Softmax:
    # Forward pass
    def forward(self, inputs):
        # Get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        # Normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)

        self.output = probabilities


# Common loss class
class Loss:
    # Calculates the data and regularization losses
    # given model output and ground truth values
    def calculate(self, output, y):
        # Calculate sample losses
        sample_losses = self.forward(output, y)
        print("Hey Um sample losees: ", sample_losses)

        # Calculate mean loss
        data_loss = np.mean(sample_losses)

        # Return loss
        return data_loss


# Cross-entropy loss
class Loss_CategoricalCrossentropy(Loss):
    # Forward pass
    def forward(self, y_pred, y_true):
        # Number of samples in a batch
        samples = len(y_pred)

        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        # Probabilities for target values -
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]

        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)

        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods


# Create dataset
X, y = spiral_data(samples=100, classes=3)

# Create Dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)

# Create ReLU activation (to be used with Dense layer):
activation1 = Activation_ReLU()

# Create second Dense layer with 3 input features (as we take output
# of previous layer here) and 3 output values
dense2 = Layer_Dense(3, 3)

# Create Softmax activation (to be used with Dense layer):
activation2 = Activation_Softmax()

# Create loss function
loss_function = Loss_CategoricalCrossentropy()

# Perform a forward pass of our training data through this layer
dense1.forward(X)

# Perform a forward pass through activation function
# it takes the output of first dense layer here
activation1.forward(dense1.output)


# Perform a forward pass through second Dense layer
# it takes outputs of activation function of first layer as inputs
dense2.forward(activation1.output)

# Perform a forward pass through activation function
# it takes the output of second dense layer here
activation2.forward(dense2.output)

# Let's see output of the first few samples:
print(activation2.output[:5])

# Perform a forward pass through loss function
# it takes the output of second dense layer here and returns loss
loss = loss_function.calculate(activation2.output, y)

# Print loss value
print("loss:", loss)


# Calculate accuracy from output of activation2 and targets
# Calculate values along first axis
predictions = np.argmax(activation2.output, axis=1)
if len(y.shape) == 2:
    y = np.argmax(y, axis=1)
accuracy = np.mean(predictions == y)

# Print accuracy
print("acc:", accuracy)

[[0.33333334 0.33333334 0.33333334]
 [0.33333316 0.3333332  0.33333364]
 [0.33333287 0.3333329  0.33333418]
 [0.3333326  0.33333263 0.33333477]
 [0.33333233 0.3333324  0.33333528]]
Hey Um sample losees:  [1.0986123 1.0986128 1.0986137 1.0986145 1.0986153 1.0986137 1.0986168
 1.0986168 1.0986178 1.0986191 1.0986198 1.098619  1.0986207 1.098622
 1.098622  1.0986223 1.0986184 1.0986241 1.0986224 1.098626  1.098626
 1.0986195 1.0986171 1.0986255 1.0986353 1.0986279 1.098618  1.0986184
 1.0986398 1.0986418 1.0986305 1.0986369 1.0986211 1.0986254 1.0986377
 1.098647  1.0986525 1.098653  1.0986506 1.098654  1.0986497 1.0986483
 1.0986478 1.0986323 1.0986618 1.098662  1.098664  1.0986414 1.0986663
 1.0986534 1.098662  1.0986348 1.0986499 1.0986599 1.0986315 1.0986141
 1.0986186 1.0986127 1.0986273 1.0986137 1.098617  1.098616  1.0986123
 1.098631  1.0986419 1.0986344 1.0986123 1.0986576 1.0986377 1.0986589
 1.0986655 1.0986648 1.0986633 1.0986542 1.0986674 1.0986677 1.0986685
 1.0986708 1.0986