In [1]:
import numpy as np
import matplotlib.pyplot as plt

# Categorical Cross-Entropy Loss Function

In [None]:
class layer_dense:
    'Neural network dense layer'
    
    # initialization (weights and biases)
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.1*np.random.randn(n_inputs, n_neurons) 
        self.biases = np.zeros((1,n_neurons))

    # output
    def forward(self, inputs):
        self.output = inputs.dot(self.weights) + self.biases
        
class activation_ReLU:
    'rectified linear unit activation function'
    
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)
        
class activation_softmax:
    'softmax activation function'
    
    def forward(self, inputs):
        exp_values = np.exp(inputs-np.max(inputs,axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities

# common loss class.
# We’ll be adding more loss functions and some of the operations that we’ll be performing are common for all of them
class loss:
    def calculate(self, output,y):
        sample_losses = self.forward(output,y)
        loss = np.mean(sample_losses)
        return loss
    
# cross-entropy loss        
class loss_crossentropy(loss):
    
    def forward(self, y_pred, y_true):
        
        # number of data points
        samples = len(y_pred)
        
        # clip data to prevent division by 0
        y_pred_clipped = np.clip(y_pred, 1e-7,1 - 1e-7)
        
        # confidence values
        confidence_values = y_pred_clipped[range(samples),y_true]
       
        # sample losses 
        losses = -np.log(confidence_values)
        return losses

In [16]:
y = np.array([1,0,2,0])
y

array([1, 0, 2, 0])

In [3]:
P = np.array([[.1, .7, .2],
              [.3,.4,.3],
              [.05,.05,.9],
              [.4,.3,.3]])
P

array([[0.1 , 0.7 , 0.2 ],
       [0.3 , 0.4 , 0.3 ],
       [0.05, 0.05, 0.9 ],
       [0.4 , 0.3 , 0.3 ]])

In [17]:
# individual losses 
confidence_values = P[np.arange(4),y]
confidence_values

array([0.7, 0.3, 0.9, 0.4])

In [18]:
# losses
-np.log(confidence_values)

array([0.35667494, 1.2039728 , 0.10536052, 0.91629073])

In [7]:
# loss function
np.mean(-np.log(P[np.arange(4),y]))

0.6455747489491624

In [19]:
# problem: if the model is fully wrong
-np.log(0)

  


inf

In [20]:
# partial solution: add a very small value to the confidence (10**-7 = 0.0000001)
-np.log(0+1e-7)

16.11809565095832

In [23]:
# another problem: if the model if fully right, loss becomes a negative value instead of being 0
-np.log(1+1e-7)

-9.999999505838704e-08

In [22]:
np.log(1)

0.0

In [28]:
# solution: lowest possible value will become 1e-7; the highest possible value, will become 1-1e-7 
confidence = 0
np.clip(0, 1e-7, 1 - 1e-7)

1e-07

In [None]:
# loss_function.calculate(activation2.output,y)