<a href="https://colab.research.google.com/github/z-arabi/pytorchTutorial/blob/master/11_softmax_and_crossentropy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
import numpy as np

In [None]:
#
#        -> 2.0              -> 0.65  
# Linear -> 1.0  -> Softmax  -> 0.25   -> CrossEntropy(y, y_hat)
#        -> 0.1              -> 0.1                   
#
#     scores(logits)      probabilities
#                           sum = 1.0
#

In [None]:
'''
just one axis
(col,row)
(depth,col,row)
'''

a = np.array([[1,2],[3,4]])
print(np.sum(a,axis=0)) #sum of cols
print(np.sum(a,axis=1)) # sum of rows

b = np.array([[1],[2],[3]])
print(np.sum(b,axis=0)) #sum of cols
print(np.sum(b,axis=1)) # sum of rows

# for the one dim array all are diffrent from pevious
c = np.array([1,2,3])
print(np.sum(c,axis=0)) #sum of cols
# print(np.sum(c,axis=1)) #error > dowsn't have anothee axis just one axis

[4 6]
[3 7]
[6]
[1 2 3]
6


In [None]:
# Softmax applies the exponential function to each element, and normalizes
# by dividing by the sum of all these exponentials
# -> squashes the output to be between 0 and 1 = probability
# sum of all probabilities is 1
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

x = np.array([2.0, 1.0, 0.1])
print(x.shape)
outputs = softmax(x)
print('softmax numpy:', outputs)

(3,)
softmax numpy: [0.65900114 0.24243297 0.09856589]


In [None]:
# the functio was on the np cal > the torch has its own functio n
x = torch.tensor([2.0, 1.0, 0.1])
print(x.shape)
outputs = torch.softmax(x,dim=0) # dim=0 along values along first axis
print('softmax torch:', outputs)

torch.Size([3])
softmax torch: tensor([0.6590, 0.2424, 0.0986])


In [None]:
# softmax > defines the highest probability as an answer
# cross-ent > the diff between the actual answer and predicted one
# in classification to output the cross ent > the actual output must one hotted +
# for the predicted ones wemust have probabilities  (Softmax)
# Cross entropy > loss metrics
# Cross-entropy loss, or log loss, measures the performance of a classification model 
# whose output is a probability value between 0 and 1. 
# -> loss increases as the predicted probability diverges from the actual label
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss

def cross_entropy_norm(actual,predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss/ float(predicted.shape[0])

In [None]:
# y must be one hot encoded
# if class 0: [1 0 0]
# if class 1: [0 1 0]
# if class 2: [0 0 1]
Y = np.array([1, 0, 0])

Y_pred_good =  softmax(np.array([2.0, 1.0, 0.1]))
Y_pred_bad =  softmax(np.array([0.5, 2.0, 0.3]))

print(Y_pred_good)
print(Y_pred_bad)

l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)
l1n = cross_entropy_norm(Y, Y_pred_good)
l2n = cross_entropy_norm(Y, Y_pred_bad)
print(f'Loss1 numpy: {l1:.4f} , {l1n:.4f}')
print(f'Loss2 numpy: {l2:.4f} , {l2n:.4f}')

[0.65900114 0.24243297 0.09856589]
[0.15871958 0.71133182 0.1299486 ]
Loss1 numpy: 0.4170 , 0.1390
Loss2 numpy: 1.8406 , 0.6135


In [None]:
# CrossEntropyLoss in PyTorch (applies Softmax)
# nn.LogSoftmax + nn.NLLLoss
# NLLLoss = negative log likelihood loss
loss = nn.CrossEntropyLoss()
# loss(input, target)

# target is of size nSamples = 1
# each element has class label: 0, 1, or 2
# Y (=target) contains class labels, not one-hot
Y = torch.tensor([0])

# the actual is not one hot + the predicted is not softmax
# input is of size nSamples x nClasses = 1 x 3 ===> 3 classes for one sample
# y_pred (=input) must be raw, unnormalizes scores (logits) for each class, not softmax
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)
print(f'PyTorch Loss1: {l1.item():.4f}')
print(f'PyTorch Loss2: {l2.item():.4f}')

PyTorch Loss1: 0.4170
PyTorch Loss2: 1.8406


In [None]:
# index is important we don't need value
print(torch.max(Y_pred_good, dim=1))

# get predictions
value1 , predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
print(f'Actual class: {Y.item()}, Y_pred1: {predictions1.item()}, Y_pred2: {predictions2.item()}')


torch.return_types.max(
values=tensor([2.]),
indices=tensor([0]))
Actual class: 0, Y_pred1: 0, Y_pred2: 1


In [None]:
# allows batch loss for multiple samples
# doesn't matter how many samples u have > for each sample u have just one output

# target is of size nBatch = 3
# each element has class label: 0, 1, or 2
Y = torch.tensor([2, 0, 1])

# input is of size nBatch x nClasses = 3 x 3
# Y_pred are logits (not softmax)
Y_pred_good = torch.tensor(
    [[0.1, 0.2, 3.9], # predict class 2
    [1.2, 0.1, 0.3], # predict class 0
    [0.3, 2.2, 0.2]]) # predict class 1

Y_pred_bad = torch.tensor(
    [[0.9, 0.2, 0.1],
    [0.1, 0.3, 1.5],
    [1.2, 0.2, 0.5]])

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)
print(f'Batch Loss1:  {l1.item():.4f}')
print(f'Batch Loss2: {l2.item():.4f}')

# get predictions
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
print(f'Actual class: {Y}, Y_pred1: {predictions1}, Y_pred2: {predictions2}')

Batch Loss1:  0.2834
Batch Loss2: 1.6418
Actual class: tensor([2, 0, 1]), Y_pred1: tensor([2, 0, 1]), Y_pred2: tensor([0, 2, 0])


In [None]:
# Multiclass problem
class NeuralNetMul(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNetMul, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out

model = NeuralNetMul(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss()  # (applies Softmax)
# model.forward(X) >> X is 28*28
# criterian(out,Y_actual)

In [5]:
# for binary classifcation > u can use just one output + sigmoid function
# OR two outputs + softmax functions

# Binary classification
#directly add activation functions as layers
class NeuralNetBin1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNetBin1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        # nn.Sigmoid / nn.Softmax / nn.TanH / nn.LeakyReLU
        self.linear2 = nn.Linear(hidden_size, 1)  
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        out = self.sigmoid(out)
        return out


class NeuralNetBin2(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNetBin2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)  
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred

# use activation functions directly in forward pass
class NeuralNetBin3(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNetBin3, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, 1)  
    
    def forward(self, x):
        # torch.softmax / torch.tanh
        # import torch.nn.functional as F > F.relu / F.leaky_relu
        out = self.linear1(x)
        out = torch.relu(out)
        out = self.linear2(out)
        out = torch.sigmoid(out)
        return out

model1 = NeuralNetBin1(input_size=28*28, hidden_size=5)
model2 = NeuralNetBin2(input_size=28*28, hidden_size=5)
# binary cross ent
criterion = nn.BCELoss()
# model.forward(X) >> X is 28*28
# criterian(out,Y_actual)