In [1]:
import torch
import torch.nn as nn
import numpy as np

x, here is 1-D array, It has only 1 axis

In [2]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [3]:
x = np.array([2, 1, 0.1], dtype=np.float32)
print(f"Softmax = {softmax(x)}")

Softmax = [0.6590011  0.24243298 0.09856589]


### Using Pytorch

In [4]:
x = torch.from_numpy(x.astype(np.float32))

softmax = torch.softmax(x, dim=0)
print(f"Softmax = {softmax}")

Softmax = tensor([0.6590, 0.2424, 0.0986])


### Scores/Logits are raw values coming out of last layer

# Cross Entropy
- Requires the value to be **One Hot Encoded**

In [5]:
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss

In [6]:
# Class 0 is true class(Total 3 class=0, 1 and 2)
y = np.array([1, 0, 0])

# It is assumed that the below probabilities values are obtained after
# Applying Softmax
y_pred_good = np.array([0.7, 0.2, 0.1])
y_pred_bad = np.array([0.1, 0.3, 0.6])

l1 = cross_entropy(actual=y, predicted=y_pred_good)
l2 = cross_entropy(actual=y, predicted=y_pred_bad)

print(f"l1 = {l1:.5f}, l2 = {l2:.5f}")

l1 = 0.35667, l2 = 2.30259


### Using Pytorch

**nn.CrossEntropyLoss() = nn.LogSoftmax + nn.NLLLoss(negative log likelihood loss)**

- Don't use Softmax in last layer
- Y has class labels, not one-hot encoded
- Y_pred has raw scores(logits), no softmax

In [7]:
loss = nn.CrossEntropyLoss()

In [8]:
y = torch.tensor([0]) # 0 is the true class

# Output will be in shape (n_samples, n_classes), here (1, 3)
y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])

l1 = loss(y_pred_good, y)
l2 = loss(y_pred_bad, y)
print(f"l1 = {l1:.5f}, l2 = {l2:.5f}")
print("#"*100)
print("Getting Predictions")
max_value, ind = torch.max(input=y_pred_good, dim=1)
print(f"Value = {max_value.item()}, Index = {ind.item()}")

l1 = 0.41703, l2 = 1.84062
####################################################################################################
Getting Predictions
Value = 2.0, Index = 0


Here, For multiple samples,
- 1st sample belongs to 3rd class
- 2nd sample belongs to 1st class  
- 3rd sample belongs to 2nd class

**length of y = No. of samples**

max_values, indices = torch.max(x)

In [9]:
y = torch.tensor([2, 0, 1])

# Output will be in shape (n_samples, n_classes), here (3, 3)
# n_classes can vary(obviously)
y_pred_good = torch.tensor([[0.1, 1.0, 2.0],
                            [2.0, 1.0, 0.1],
                            [0.1, 3.0, 1.0]])
y_pred_bad = torch.tensor([[2.0, 1.0, 0.1],
                          [0.1, 3.0, 1.0],
                          [0.1, 1.0, 2.0]])

l1 = loss(y_pred_good, y)
l2 = loss(y_pred_bad, y)
print(f"l1 = {l1:.5f}, l2 = {l2:.5f}")

print("For Good prediction")
max_value, indices = torch.max(y_pred_good, dim=1)
print(f"Predicted Values = {max_value}, Predicted Class =  {indices}")

print("For Bad prediction")
max_value, indices = torch.max(y_pred_bad, dim=1)
print(f"Predicted Values = {max_value}, Predicted Class =  {indices}")

l1 = 0.33610, l2 = 2.26944
For Good prediction
Predicted Values = tensor([2., 2., 3.]), Predicted Class =  tensor([2, 0, 1])
For Bad prediction
Predicted Values = tensor([2., 3., 2.]), Predicted Class =  tensor([0, 1, 2])


# MultiClass problem

In [10]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, n_classes):
        super(NeuralNet, self).__init__()
        self.linear1 = nn.Linear(in_features=input_size, out_features=hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(in_features=hidden_size, out_features=n_classes)

    def forward(self, x):
        out = self.linear1(x)
        out1 = self.relu(out)
        out2 = self.linear2(out1)
        # No softmax at the end
        return out2

In [11]:
model = NeuralNet(input_size=28*28, hidden_size=5, n_classes=3)

criterion = nn.CrossEntropyLoss()

# Binary Class Classification

In [12]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, n_classes=1):
        super(NeuralNet, self).__init__()
        self.linear1 = nn.Linear(in_features=input_size, out_features=hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(in_features=hidden_size, out_features=n_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.linear1(x)
        out1 = self.relu(out)
        out2 = self.linear2(out1)
        out3 = self.sigmoid(out2)
        return out3

model = NeuralNet(input_size=28*28, hidden_size=5, n_classes=1)

criterion = nn.BCELoss()