In [1]:
import torch
import torch.nn as nn
import numpy as np

In [14]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print(f'softmax numpy : {np.round(outputs, 4)}')

softmax numpy : [0.659  0.2424 0.0986]


In [15]:
x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0)
print(f'softmax torch : {outputs}')

softmax torch : tensor([0.6590, 0.2424, 0.0986])


In [16]:
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss

y = np.array([1, 0, 0]) # y must be one-hot encoded

y_pred_good = np.array([0.7, 0.2, 0.1])
y_pred_bad = np.array([0.1, 0.3, 0.6])

l1 = cross_entropy(y, y_pred_good)
l2 = cross_entropy(y, y_pred_bad)

print(f'loss1 numpy : {l1:.4f}')
print(f'loss2 numpy : {l2:.4f}')

loss1 numpy : 0.3567
loss2 numpy : 2.3026


In [30]:
# nn.CrossEntropyLoss = nn.LogSoftmax + nn.NLLLoss
# y must have class labels, not one-hot
# y_pred must have raw scores(logits), no softmax

loss = nn.CrossEntropyLoss()

y = torch.tensor([2, 0, 1])
# n_samples x n_classes = 3 x 3
y_pred_good = torch.tensor([[0.1, 1.0, 2.1],
                            [2.0, 1.0, 0.1],
                            [0.1, 3.0, 0.1]])
y_pred_bad = torch.tensor([[2.1, 1.0, 0.1],
                           [0.1, 1.0, 2.1],
                           [0.1, 3.0, 0.1]])

l1 = loss(y_pred_good, y)
l2 = loss(y_pred_bad, y)

print(f'loss1 torch : {l1.item():.4f}')
print(f'loss2 torch : {l2.item():.4f}')

loss1 torch : 0.3018
loss2 torch : 1.6242


In [31]:
_, predictions1 = torch.max(input=y_pred_good, dim=1)
_, predictions2 = torch.max(input=y_pred_bad, dim=1)

predictions1, predictions2

(tensor([2, 0, 1]), tensor([0, 2, 1]))