In [None]:
import torch
import torch.nn as nn
import numpy as np

In [None]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [None]:
x = np.array([2, 1, 0.1], dtype=np.float32)
print(f"Softmax = {softmax(x)}")

Softmax = [0.6590011  0.24243298 0.09856589]


### Using Pytorch

In [None]:
x = torch.from_numpy(x.astype(np.float32))
print(f"Softmax = {torch.softmax(x, dim=0)}")

Softmax = tensor([0.6590, 0.2424, 0.0986])


# Cross Entropy

In [None]:
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss

In [None]:
# Class 0 is true class
y = np.array([1, 0, 0])

y_pred_good = np.array([0.7, 0.2, 0.1])
y_pred_bad = np.array([0.1, 0.3, 0.6])

l1 = cross_entropy(actual=y, predicted=y_pred_good)
l2 = cross_entropy(actual=y, predicted=y_pred_bad)

print(f"l1 = {l1:.5f}, l2 = {l2:.5f}")

l1 = 0.35667, l2 = 2.30259


### Using Pytorch

**nn.CrossEntropyLoss() = nn.LogSoftmax + nn.NLLLoss(negative log likelihood loss)**

- Don't use Softmax in last layer
- Y has class labels, not one-hot encoded
- Y_pred has raw scores(logits), no softmax

In [None]:
loss = nn.CrossEntropyLoss()

In [None]:
y = torch.tensor([0])

# Output will be in shape (n_samples, n_classes), here (1, 3)
y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])

l1 = loss(y_pred_good, y)
l2 = loss(y_pred_bad, y)
print(f"l1 = {l1:.5f}, l2 = {l2:.5f}")

l1 = 0.41703, l2 = 1.84062


For multiple samples, 1st sample belongs to 3rd class, 2nd sample belongs to 2nd class and so on...

max_values, indices = torch.max(x)

In [None]:
y = torch.tensor([2, 0, 1])

# Output will be in shape (n_samples, n_classes), here (1, 3)
y_pred_good = torch.tensor([[0.1, 1.0, 2.0],
                            [2.0, 1.0, 0.1],
                            [0.1, 3.0, 1.0]])
y_pred_bad = torch.tensor([[2.0, 1.0, 0.1],
                          [0.1, 3.0, 1.0],
                          [0.1, 1.0, 2.0]])

l1 = loss(y_pred_good, y)
l2 = loss(y_pred_bad, y)
print(f"l1 = {l1:.5f}, l2 = {l2:.5f}")

max_value, indices = torch.max(y_pred_good, dim=1)
print(max_value, indices)
max_value, indices = torch.max(y_pred_bad, dim=1)
print(max_value, indices)

l1 = 0.33610, l2 = 2.26944
tensor([2., 2., 3.]) tensor([2, 0, 1])
tensor([2., 3., 2.]) tensor([0, 1, 2])


# MultiClass problem

In [None]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, n_classes):
        super(NeuralNet, self).__init__()
        self.linear1 = nn.Linear(in_features=input_size, out_features=hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(in_features=hidden_size, out_features=n_classes)

    def forward(self, x):
        out = self.linear1(x)
        out1 = self.relu(out)
        out2 = self.linear2(out1)
        # No softmax at the end
        return out2

In [None]:
model = NeuralNet(input_size=28*28, hidden_size=5, n_classes=3)

criterion = nn.CrossEntropyLoss()

# Binary Class Classification

In [None]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, n_classes=1):
        super(NeuralNet, self).__init__()
        self.linear1 = nn.Linear(in_features=input_size, out_features=hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(in_features=hidden_size, out_features=1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.linear1(x)
        out1 = self.relu(out)
        out2 = self.linear2(out1)
        out3 = self.sigmoid(out2)
        return out3

model = NeuralNet(input_size=28*28, hidden_size=5, n_classes=3)

criterion = nn.BCELoss()