### Using Softmax layer with Cross Entropy Loss

In [2]:
import torch 
import torch.nn as nn 
import numpy as np 

#### Softmax 
- With more than one element output, we use take the exponential of each element and divide the sum to get the probability of each output.  
- The element with the highest probability is the softmax layer output 
- Good at classification tasks


In [3]:
def softmax(x): 
    return np.exp(x)/np.sum(np.exp(x), axis=0)  # exponential each element and divide the sum of the exponential. Sum is performed on rows 

In [6]:
x=np.array([2.0,1.0,0.1])
output= softmax(x)
print("Probability of individual output",output)   #total will add up to be 1 
print("Combined Probability: ",np.sum(output))

Probability of individual output [0.65900114 0.24243297 0.09856589]
Combined Probability:  1.0


In [7]:
#PyTorch implementation 
x= torch.tensor([2.0,1.0,0.1])
output= torch.softmax(x,dim=0)  #compute the sum along the rows 
print(output)

tensor([0.6590, 0.2424, 0.0986])


#### Cross Entropy 
sum of the ground truth * log(y_prediction) * -1    
  
The further apart prediction is to the ground truth, the higher the loss is.  
- Because cross entropy is based on probability distributions where p(x) is the probability distribution of the correct label and log(p(x)) is the probability of prediction. It is best used when the prediction and ground truth is $\in [0,1]$

In [10]:
def cross_entropy(y_hat, y):
    loss= -np.sum(y * np.log(y_hat))
    return loss 

Y= np.array([1,0,0])
y1= np.array([0.7,0.2,0.1])
y2 =np.array([0.1,0.3,0.6])
l1= cross_entropy(y1,Y)
l2= cross_entropy(y2,Y)

print (f"First loss: {l1:.6f}")
print (f"Second loss: {l2:.6f}")

First loss: 0.356675
Second loss: 2.302585


#### PyTorch CrossEntropy Loss

softmax + negative log likehood loss

In [12]:
loss = nn.CrossEntropyLoss()

Y= torch.tensor([0])    
y_good= torch.tensor([[2.0,1.0,0.1]])# n_samples x n_classes. 

# Pytorch will apply softmax layer compute the loss sum with the ground truth 
y_bad= torch.tensor([[0.5,2.0,0.3]])

l1= loss(y_good,Y)
l2= loss(y_bad, Y)
print (f"Good loss: {l1.item():.6f}")
print (f"Loss loss: {l2.item():.6f}")

Good loss: 0.417030
Loss loss: 1.840616
