In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

Loss function shows us how good our model is. In multy class recognizing we can get following numbers

In [4]:
out = np.array([5,2,-1,3])

The second key ingredient we need is a loss function, which is a differentiable objective that quantifies our unhappiness with the computed class scores. Intuitively, we want the correct class to have a higher score than the other classes. When this is the case, the loss should be low and otherwise the loss should be high. There are many ways to quantify this intuition, but in this example lets use the cross-entropy loss that is associated with the Softmax classifier.

We can see that the Softmax classifier interprets every element of f as holding the (unnormalized) log probabilities of the three classes. We exponentiate these to get (unnormalized) probabilities, and then normalize them to get probabilites. Therefore, the expression inside the log is the normalized probability of the correct class. Note how this expression works: this quantity is always between 0 and 1. When the probability of the correct class is very small (near 0), the loss will go towards (positive) infinity. Conversely, when the correct class probability goes towards 1, the loss will go towards zero because log(1)=0. Hence, the expression for Li is low when the correct class probability is high, and it’s very high when it is low.
http://cs231n.github.io/neural-networks-case-study/

In [24]:
X = np.random.rand(3, 5)
scores = np.array([0,1,4])
X

array([[0.11494422, 0.09975654, 0.28504199, 0.34777442, 0.15496837],
       [0.98433883, 0.46763162, 0.14608097, 0.39536274, 0.95039889],
       [0.32166425, 0.33591235, 0.76139172, 0.88183236, 0.28131679]])

expretion gives us probability

In [27]:
num_examples = X.shape[0]
# get unnormalized probabilities
exp_scores = np.exp(X)
# normalize them for each example
probs = exp_scores / np.sum(exp_scores, axis=1,keepdims=True)
probs

array([[0.18270343, 0.17994956, 0.21658044, 0.23060227, 0.1901643 ],
       [0.2816529 , 0.16800072, 0.12180454, 0.15628783, 0.27225402],
       [0.15928496, 0.16157071, 0.24725551, 0.27890268, 0.15298614]])

In [28]:
correct_logprobs = -np.log(probs[range(num_examples),scores])

In [30]:
data_loss = np.sum(correct_logprobs)/num_examples
data_loss

1.7870286679351468

In [7]:
def norm(x):
    sum = np.sum(x)
    return x / sum


def softmax(W):
    Wexp = np.exp(W)
    print(Wexp)
    print(norm(Wexp))
    log = -np.log10(norm(Wexp))
    print(log)
    print(np.max(log))

In [6]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

In [31]:
def softmax_loss_vectorized(W, X, y, reg):
  """
  Softmax loss function, vectorized version.

  Inputs and outputs are the same as softmax_loss_naive.
  """
  # Initialize the loss and gradient to zero.
  loss = 0.0
  dW = np.zeros_like(W)
  num_classes = W.shape[1]
  num_train = X.shape[0]

  #############################################################################
  # TODO: Compute the softmax loss and its gradient using no explicit loops.  #
  # Store the loss in loss and the gradient in dW. If you are not careful     #
  # here, it is easy to run into numeric instability. Don't forget the        #
  # regularization!                                                           #
  #############################################################################

  # loss
  # score: N by C matrix containing class scores
  scores = X.dot(W)
  scores -= scores.max()
  scores = np.exp(scores)
  scores_sums = np.sum(scores, axis=1)
  cors = scores[range(num_train), y]
  loss = cors / scores_sums
  loss = np.sum(np.log(loss))/num_train + reg * np.sum(W * W) * -1

  # grad
  s = np.divide(scores, scores_sums.reshape(num_train, 1))
  s[range(num_train), y] = - (scores_sums - cors) / scores_sums
  dW = X.T.dot(s)
  dW /= num_train
  dW += 2 * reg * W

  #############################################################################
  #                          END OF YOUR CODE                                 #
  #############################################################################

  return loss, dW

in pytorch for loss uses CrossEntropyLoss but usually it split in two function m = nn.LogSoftmax() loss = nn.NLLLoss() because log softmax can be count like last layer in model, and if take in count that model countet in GPU that it is faster than just use CrossEntropyLoss countable in CPU

"def softmax(x):\n",
    "    return torch.exp(x)/torch.sum(torch.exp(x), dim=1).view(-1, 1)\n"
    

In [33]:
def myCrossEntropyLoss(outputs, labels):
    batch_size = outputs.size()[0]
    # batch_size
    tmp_outputs = F.softmax(outputs, dim=1)
    print(tmp_outputs)# compute the log of softmax values
    outputs = F.log_softmax(outputs, dim=1)
    print(outputs)# compute the log of softmax values
    outputs = outputs[range(batch_size), labels] # pick the values corresponding to the labels
    return -torch.sum(outputs)/len(labels)

In [34]:
m = nn.LogSoftmax()
loss = nn.NLLLoss()
# input is of size N x C = 3 x 5
input = torch.randn(3, 5)
print(input)
# each element in target has to have 0 <= value < C
target = torch.tensor([1, 0, 4])
print(len(target))
output = loss(m(input), target)
print(output)
output2 = myCrossEntropyLoss(input, target)
print(output2)

tensor([[ 1.9763,  0.3201, -0.8598,  1.6141, -1.6732],
        [-0.9189,  0.5164,  1.0250, -0.6426,  0.8590],
        [ 0.7105, -0.5550,  0.0271, -0.7494, -0.1751]])
3
tensor(2.3586)
tensor([[0.5072, 0.0968, 0.0297, 0.3531, 0.0132],
        [0.0515, 0.2163, 0.3597, 0.0679, 0.3047],
        [0.4112, 0.1160, 0.2076, 0.0955, 0.1696]])
tensor([[-0.6789, -2.3351, -3.5150, -1.0411, -4.3283],
        [-2.9664, -1.5311, -1.0225, -2.6901, -1.1885],
        [-0.8886, -2.1542, -1.5720, -2.3485, -1.7742]])
tensor(2.3586)


