In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
def softmax(x):
    return np.exp(x)/np.sum(np.exp(x),axis=0)

In [3]:
x=np.array([2.0,1.0,0.1])
outputs=softmax(x)
print("softmax numpy: ",outputs)

softmax numpy:  [0.65900114 0.24243297 0.09856589]


In [4]:
x=torch.tensor([2.0,1.0,0.1])
outputs=torch.softmax(x,dim=0)
print("softmax using pytorch: ",outputs)

softmax using pytorch:  tensor([0.6590, 0.2424, 0.0986])


In [5]:
def cross_entropy(actual,predicted):
    loss=-np.sum(actual*np.log(predicted))
    return loss # float(predicted.shape[0])

In [6]:
# y must be hot encoded, otherwise
# if class 0:[1 0 0] 
# if class 1: [0 1 0]
# if class 2: [0 0 1]
y=np.array([1, 0, 0])

In [7]:
# y_pred has probabilities
y_pred_good = np.array([0.7, 0.2, 0.1])
y_pred_bad = np.array([0.1, 0.2, 0.6])
l1=cross_entropy(y,y_pred_good)
l2=cross_entropy(y,y_pred_bad)
print(f'loss1 numpy: {l1:.4f}')
print(f'loss 2 numpy: {l2:.4f}')
# lower the cross entropy, more better the probability

loss1 numpy: 0.3567
loss 2 numpy: 2.3026


careful
nn.CrossEntropyLoss applies
nn.LogSoftmax + nn.NLLLoss(negative log likelihood loss)

y has class labels, notont-hot
y_pred has raw scores(logits), no softmax

In [8]:
# calculating cross entropy using pytorch
loss = nn.CrossEntropyLoss()

In [9]:
y=torch.tensor([0]) # not be hot encoded
# n_samples * n_classes =1*3
y_pred_good = torch.tensor([[2.0,1.0,0.1]])
y_pred_bad = torch.tensor([[0.5,2.0,0.3]])

l1=loss(y_pred_good,y)
l2=loss(y_pred_bad,y)
print(l1.item()) # good prediction has lower cross entropy 
print(l2.item()) # bad prediction has higher cross entropy 

0.4170299470424652
1.840616226196289


In [10]:
_,prediction1 =torch.max(y_pred_good,1)
_,prediction2 = torch.max(y_pred_bad,1)
print(prediction1)
print(prediction2)

tensor([0])
tensor([1])


In [11]:
# now for multiple samples
y=torch.tensor([2,0,1])

y_pred_good = torch.tensor([[0.1,1.0,2.1],[2.0,1.0,0.1],[0.1,3.0,0.1]])
y_pred_bad=torch.tensor([[2.1,1.0,0.1],[0.1,1.0,2.1],[0.1,3.0,0.1]])
l1=loss(y_pred_good,y)
l2=loss(y_pred_bad,y)
print(l1.item()) # good prediction has lower cross entropy 
print(l2.item()) # bad prediction has higher cross entropy 

0.3018244206905365
1.6241613626480103


In [12]:
class NeuralNet2(nn.Module):
    def __init__(self,input_size,hidden_size,num_classes):
        super(NeuralNet2,self).__init__()
        self.linear1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size,num_classes)

    def forward(self,x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        
        return out

In [13]:
model = NeuralNet2(input_size=28*28,hidden_size=5,num_classes=3)
criterion = nn.CrossEntropyLoss() # applies softmax