In [1]:
import torch
import torch.nn as nn
import numpy as np

## Softmax: Numpy vs PyTorch

<img src='pic/softmax.png' width=500>

In [10]:
# Implementation in Numpy

def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

x = np.array([2.0, 1.0, 0.1]) # example
outputs = softmax(x)
print(f'softmax in numpy: {outputs}')

softmax in numpy: [0.65900114 0.24243297 0.09856589]


In [14]:
# Implementation in PyTorch

x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0) # across axis=0
print(f'softmax in pytorch: {outputs}')

softmax in pytorch: tensor([0.6590, 0.2424, 0.0986])


## Cross-Entropy: Numpy vs PyTorch

The cross-entropy loss increases as the predicted probability diverges from the actual label.

<img src='pic/cross_entropy.png' width=500>

In [17]:
# implementation in Numpy

def cross_entropy(actual, pred):
    loss = -np.sum(actual * np.log(pred))
    return loss / float(pred.shape[0]) 
# note that the score in the slides above is not normalized
# meaning that is not divided by the number of samples

Y = np.array([1,0,0]) # note that y must be one hot encoded
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])
loss_1 = cross_entropy(Y, Y_pred_good)
loss_2 = cross_entropy(Y, Y_pred_bad)
print(f"Loss 1: {loss_1:.4f}")
print(f"Loss 2: {loss_2:.4f}")

Loss 1: 0.1189
Loss 2: 0.7675


In [22]:
# implementation in PyTorch

loss = nn.CrossEntropyLoss()
Y = torch.tensor([0]) # n_samples * n_classes = 1 * 3
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])
loss_1 = loss(Y_pred_good, Y) # loss(input, target) loss(pred, target)
loss_2 = loss(Y_pred_bad, Y)
print(f"Loss 1: {loss_1:.4f}")
print(f"Loss 2: {loss_2:.4f}")

Loss 1: 0.4170
Loss 2: 1.8406


Note that **nn.CrossEntropyLoss** already applies **nn.LogSoftmax** + **nn.NLLLoss** (negative log likelihood loss). You don't need to apply a softmax layer as the last one any more. 

The acutal Y should be the class labels, not the one-hot encoded representation, and the Y_pred should be raw scores (logits), instead of the softmax values.

In [40]:
# Make predictions in PyTorch
_, pred1 = torch.max(Y_pred_good, dim = 1) # choose the one with the highest prob.
_, pred2 = torch.max(Y_pred_bad, dim = 1)
print(pred1)
print(pred2)

tensor([0])
tensor([1])


## Full Execution in PyTorch

In [44]:
# implementation in PyTorch

loss = nn.CrossEntropyLoss()
Y = torch.tensor([2, 0, 1]) # 3 * 3
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1],
                            [2.0, 1.0, 0.1],
                            [2.0, 3.0, 0.1]])

Y_pred_bad = torch.tensor([[2.1, 1.0, 0.1],
                            [0.0, 2.0, 0.1],
                            [2.0, 1.0, 4.1]])

loss_1 = loss(Y_pred_good, Y) # loss(input, target) loss(pred, target)
loss_2 = loss(Y_pred_bad, Y)
_, pred1 = torch.max(Y_pred_good, dim = 1) # choose the one with the highest prob.
_, pred2 = torch.max(Y_pred_bad, dim = 1)

print(f"Loss 1: {loss_1:.4f}, pred = {pred1}")
print(f"Loss 2: {loss_2:.4f}, pred = {pred2}")

Loss 1: 0.3846, pred = tensor([2, 0, 1])
Loss 2: 2.6299, pred = tensor([0, 1, 2])


## Neural Network Classification structure in PyTorch

Be really careful about the difference when constructing the forward function in the Neural Network class. 

In [None]:
# binary classification problem

class NeuralNet_B(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(x)
        out = self.linear2(x) 
        # you need to apply sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred

model = NeuralNet_B(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss()

In [45]:
# multivariate classification problem

class NeuralNet_M(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(x)
        out = self.linear2(x) 
        # no softmax at the end
        # since nn.CrossEntropyLoss() already does for us
        return(out)

model = NeuralNet_M(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss()