In [1]:
import torch
import torch.nn as nn
import numpy as np

## Softmax
Equation -
$$S(y_i) = \frac{e^{y_i}}{\Sigma_j e^{y_j}}$$

### Softmax Layer

<p align="center">
<img src="../images/Softmax.png" style="width:800px;height:300px;">
</p>

**Important Points -**
- Softmax Function-
    - Applies the exponential function to each element
    - Normalizes by dividing by the sum of all these exponentials
    - Squashes the output to be between 0 and 1 = probability
- Sum of all probabilities is 1

In [2]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print('softmax numpy:', outputs)

x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0) # along values along first axis
print('softmax torch:', outputs)

softmax numpy: [0.65900114 0.24243297 0.09856589]
softmax torch: tensor([0.6590, 0.2424, 0.0986])


# Cross-entropy

- Cross-entropy loss (or log loss) -
    - Measures the performance of a classification model
    - Whose output is a probability value between 0 and 1. 
- Loss increases as the predicted probability diverges from the actual label

<p align="center">
<img src="../images/Cross-entropy.png" style="width:400px;height:300px;">
</p>

In [17]:
def cross_entropy(actual, predicted):
    EPS = 1e-15
    predicted = np.clip(predicted, EPS, 1 - EPS) # Clipping the predicted value
    loss = -np.sum(actual * np.log(predicted))
    return loss # / float(predicted.shape[0])

**`y` must be one hot encoded**
- if class **0**: [1 0 0]
- if class **1**: [0 1 0]
- if class **2**: [0 0 1]

In [16]:
Y = np.array([1, 0, 0])
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)
print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:.4f}')

Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


### CrossEntropyLoss in PyTorch
- Applies Softmax
- `nn.LogSoftmax` + `nn.NLLLoss`
- `NLLLoss` = negative log likelihood loss

<p align="center">
<img src="../images/nn.CrossEntropyLoss.png" style="width:500px;height:300px;">
</p>

In [46]:
loss = nn.CrossEntropyLoss()
Y = torch.tensor([1]) # Y (=target) contains only the class label, not one-hot
Y_pred_good = torch.tensor([[2., 3.,1.,0.2]]) # input is of size nSamples x nClasses = 1 x 4 here, automatically picks it up
Y_pred_bad = torch.tensor([[2., 1.,1.2,4.]])
l1 = loss(Y_pred_good,Y)
l2 = loss(Y_pred_bad,Y)
print(f'Loss good = {l1.item():.4f}')
print(f'Loss bad = {l2.item():.4f}')

Loss good = 0.4473
Loss bad = 3.2199


In [47]:
# get predictions
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
print(f'Actual class: {Y.item()}, Y_pred1: {predictions1.item()}, Y_pred2: {predictions2.item()}')

Actual class: 1, Y_pred1: 1, Y_pred2: 3


In [48]:
loss = nn.CrossEntropyLoss()

Y = torch.tensor([0]) 

# here, input is of size nSamples x nClasses = 1 x 3
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(f'PyTorch Loss1: {l1.item():.4f}')
print(f'PyTorch Loss2: {l2.item():.4f}')

PyTorch Loss1: 0.4170
PyTorch Loss2: 1.8406


In [49]:
# get predictions
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
print(f'Actual class: {Y.item()}, Y_pred1: {predictions1.item()}, Y_pred2: {predictions2.item()}')

Actual class: 0, Y_pred1: 0, Y_pred2: 1


- Allows batch loss for multiple samples
- Target is of size nBatch = 3
- Each element has class label: 0, 1, or 2
- Input is of size nBatch x nClasses = 3 x 3
- Y_pred are logits (not softmax)

In [6]:
Y = torch.tensor([2, 0, 1]) # 3 samples to be predicted

Y_pred_good = torch.tensor(
    [[0.1, 0.2, 3.9], # predict class 2
    [1.2, 0.1, 0.3], # predict class 0
    [0.3, 2.2, 0.2]]) # predict class 1

Y_pred_bad = torch.tensor(
    [[0.9, 0.2, 0.1],
    [0.1, 0.3, 1.5],
    [1.2, 0.2, 0.5]])

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)
print(f'Batch Loss1:  {l1.item():.4f}') # Final loss/cost is the average loss in each sample
print(f'Batch Loss2: {l2.item():.4f}') # Similarly for this

Batch Loss1:  0.2834
Batch Loss2: 1.6418


In [7]:
# get predictions
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
print(f'Actual class: {Y}, Y_pred1: {predictions1}, Y_pred2: {predictions2}')

Actual class: tensor([2, 0, 1]), Y_pred1: tensor([2, 0, 1]), Y_pred2: tensor([0, 2, 0])


Can account for larger batches as well and will work fine, as it will automatically pick up

# Neural Networks

## Binary Classification Problem
<p align="center">
<img src="../images/NN_Sigmoid.png" style="width:550px;height:400px;">
</p>

In [12]:
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)  
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred

In [13]:
model1 = NeuralNet1(input_size=28*28, hidden_size=5)
criterion1 = nn.BCELoss()

## Multi-class Problem

<p align="center">
<img src="../images/NN_Cross_Entropy.png" style="width:550px;height:400px;">
</p>

In [14]:
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes) # Output = number of classes
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out

In [15]:
model2 = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion2 = nn.CrossEntropyLoss()  # (applies Softmax)

All pic credits - Python Engineer