In [1]:
import torch
import torch.nn as nn
import numpy as np

In [6]:
#numpy implementation of softmax
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x))

#numpy implementation of cross entropy
def cross_entropy(actual, pred):
    return -np.sum(actual * np.log(pred))



tensor([0.6590, 0.2424, 0.0986])
0.4170299470424652 	 1.840616226196289


In [8]:
#pytorch softmax
x = torch.tensor([2.0,1.0,0.1])
output = torch.softmax(x,dim=0) #Specify dim to compute along the first axis
print(output)

#pytorch cross entropy
loss = nn.CrossEntropyLoss()
y = torch.tensor([0]) #No more one-hot encoded
y_pred_good = torch.tensor([[2.0,1.0,0.1]])
y_pred_bad = torch.tensor([[0.5,2.0,0.3]])
l1 = loss(y_pred_good,y)
l2 = loss(y_pred_bad,y)
print(l1.item(),"\t",l2.item())

#Extract softmax preds
_,preds_good = torch.max(y_pred_good,dim=1)
_,preds_bad = torch.max(y_pred_bad,dim=1)

print(preds_good)
print(preds_bad)

tensor([0.6590, 0.2424, 0.0986])
0.4170299470424652 	 1.840616226196289
tensor([0])
tensor([1])


In [10]:
#Careful not to use softmax layer in nn when using crossentropy to avoid redundancy
class NeuralNet1(nn.Module):
    
    def __init__(self,input_size,hidden_size,num_classes):
        super(NeuralNet2,self).__init__()
        self.linear1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size,num_classes)
        #Avoid adding softmax layer since we use CrossEntropy loss function
    
    def forward(self,x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        #Notice no softmax since we use CrossEntropy loss later on during training
        return out

model = NeuralNet1(input_size=28*28,hidden_size=5,num_classes=3)
criterion = nn.CrossEntropyLoss() #CrossEntropyLoss applies softmax + cross_entropy on model output

In [11]:
#Remember to apply output layer when using binary cross entropy loss function
class NeuralNet2(nn.Module):
    
    def __init__(self,input_size,hidden_size,num_classes):
        super(NeuralNet2,self).__init__()
        self.linear1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size,num_classes)
        #Avoid adding softmax layer since we use CrossEntropy loss function
    
    def forward(self,x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        y_pred = torch.sigmoid(out) #Notice use of signmoid for binary classification
        
        return y_pred

model = NeuralNet2(input_size=28*28,hidden_size=5,num_classes=3)
criterion = nn.BCELoss() #BCELoss requires explicit use of sigmoid output layer