In [6]:
import torch
import torch.nn as nn
import numpy as np


# sofimax

- softmax with numpy

In [17]:
import torch
import torch.nn as nn
import numpy as np

def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)
# 数组 / 标量，用到了向量化操作，可以不用python的循环

x = np.array([2.0, 1.0, 0.1])
outputs_x = softmax(x)
print(f'softmax with numpy:{outputs_x}')

softmax with numpy:[0.65900114 0.24243297 0.09856589]


- softmax with pytorch

In [23]:
import torch
import torch.nn as nn
import numpy as np

y = torch.tensor([2.0, 1.0, 0.1], dtype=torch.float32)
outputs_y_1 = torch.softmax(y, dim=0)    # 要指定轴
outputs_y_2 = y.softmax(dim=0)           # 另一种方法
print(f'softmax_pytorch: \n{outputs_y_1} \n{outputs_y_2}')


softmax_pytorch: 
tensor([0.6590, 0.2424, 0.0986]) 
tensor([0.6590, 0.2424, 0.0986])


# crossentropy

- cross-entropy
  数值越低越好，独热编码和概率的乘积

- cross entropy with numpy

In [3]:
import torch
import torch.nn as nn
import numpy as np

def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss # / float(predicted.shape[0])

# target
Y = np.array([1, 0, 0])
# inputs
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)
print(f'Loss_good with numpy: {l1:.4f}')
print(f'Loss_bad with numpy: {l2:.4f}')



Loss_good with numpy: 0.3567
Loss_bad with numpy: 2.3026


- cross entropy with pytorch
1. nn.crossentropy 同时实现了nn.LogSoftmax + nn.NLLLoss, 所以不要再加softmax层
2. inputs（预测的值）是模型的原始得分（raw scores），这个类会先对inputs应用LogSoftmax类
3. target，是类索引（class indices）,计算上一步得到的log值与target的负对数似然损失，这里targets是类别的索引而不是独热编码，但索引和独热的效果是一样的，提高了计算效率


4. 官方文档里还有另一种情况

- 一个samples

In [6]:
import torch
import torch.nn as nn
import numpy as np

loss = nn.CrossEntropyLoss()

# 对应“正确”的类的索引
Y = torch.tensor([0])    

# inputs，n_amples * n_classes = 1 * 3
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])

loss1 = loss(Y_pred_good, Y)
loss2 = loss(Y_pred_bad, Y)

print(f'loss1: {loss1}')
print(f'loss2: {loss2}')

prediction1 = torch.max(Y_pred_good, 1)   # dim=1是沿着1轴，即统计行的数据
prediction2 = torch.max(Y_pred_bad, 1)
print(f'prediction1: {prediction1}')
print(f'prediction1: {prediction2}')

prediction1_max, prediction1_index = torch.max(Y_pred_good, 1)
prediction2_max, prediction2_index = torch.max(Y_pred_bad, dim=1)
print(f'prediction1: {prediction1_max} {prediction1_index}')
print(f'prediction2: {prediction2_max} {prediction2_index}')




loss1: 0.4170299470424652
loss2: 1.840616226196289
prediction1: torch.return_types.max(
values=tensor([2.]),
indices=tensor([0]))
prediction1: torch.return_types.max(
values=tensor([2.]),
indices=tensor([1]))
prediction1: tensor([2.]) tensor([0])
prediction2: tensor([2.]) tensor([1])


- 多个samples

In [9]:
import torch
import torch.nn as nn
import numpy as np

loss = nn.CrossEntropyLoss()

# 对应“正确”的类的索引
Y = torch.tensor([2, 0, 1])    

# inputs，n_amples * n_classes = 3 * 3
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1],
                            [2.0, 1.0, 0.1],
                            [0.1, 3.0, 0.1]])
Y_pred_bad = torch.tensor([[2.1, 1.0, 0.1],
                           [0.1, 1.0, 2.1],
                           [0.1, 3.0, 0.1]])

loss1 = loss(Y_pred_good, Y)
loss2 = loss(Y_pred_bad, Y)

print(f'loss1: {loss1}')
print(f'loss2: {loss2}')

# prediction1 = torch.max(Y_pred_good, 1)   # dim=1是沿着1轴，即统计行的数据
# prediction2 = torch.max(Y_pred_bad, 1)
# print(f'prediction1: {prediction1}')
# print(f'prediction1: {prediction2}')

prediction1_max, prediction1_index = torch.max(Y_pred_good, 1)
prediction2_max, prediction2_index = torch.max(Y_pred_bad, dim=1)
print(f'prediction1: {prediction1_max} {prediction1_index}')
print(f'prediction2: {prediction2_max} {prediction2_index}')



loss1: 0.3018244206905365
loss2: 1.6241613626480103
prediction1: tensor([2.1000, 2.0000, 3.0000]) tensor([2, 0, 1])
prediction2: tensor([2.1000, 2.1000, 3.0000]) tensor([0, 2, 1])


torch.crossentropyloss的传参**reduction**有三种参数，默认是**mean**，还有**none**和**sum**，注意是字符串格式

In [14]:
import torch
import torch.nn as nn
import numpy as np

loss = nn.CrossEntropyLoss(reduction='sum')

# 对应“正确”的类的索引
Y = torch.tensor([2, 0, 1])    

# inputs，n_amples * n_classes = 3 * 3
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1],
                            [2.0, 1.0, 0.1],
                            [0.1, 3.0, 0.1]])
Y_pred_bad = torch.tensor([[2.1, 1.0, 0.1],
                           [0.1, 1.0, 2.1],
                           [0.1, 3.0, 0.1]])

loss1 = loss(Y_pred_good, Y)
loss2 = loss(Y_pred_bad, Y)

print(f'loss1: {loss1}')
print(f'loss2: {loss2}')

# prediction1 = torch.max(Y_pred_good, 1)   # dim=1是沿着1轴，即统计行的数据
# prediction2 = torch.max(Y_pred_bad, 1)
# print(f'prediction1: {prediction1}')
# print(f'prediction1: {prediction2}')

prediction1_max, prediction1_index = torch.max(Y_pred_good, 1)
prediction2_max, prediction2_index = torch.max(Y_pred_bad, dim=1)
print(f'prediction1: {prediction1_max} {prediction1_index}')
print(f'prediction2: {prediction2_max} {prediction2_index}')



loss1: 0.9054732322692871
loss2: 4.87248420715332
prediction1: tensor([2.1000, 2.0000, 3.0000]) tensor([2, 0, 1])
prediction2: tensor([2.1000, 2.1000, 3.0000]) tensor([0, 2, 1])


In [15]:
import torch
import torch.nn as nn
import numpy as np

loss = nn.CrossEntropyLoss(reduction='none')

# 对应“正确”的类的索引
Y = torch.tensor([2, 0, 1])    

# inputs，n_amples * n_classes = 3 * 3
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1],
                            [2.0, 1.0, 0.1],
                            [0.1, 3.0, 0.1]])
Y_pred_bad = torch.tensor([[2.1, 1.0, 0.1],
                           [0.1, 1.0, 2.1],
                           [0.1, 3.0, 0.1]])

loss1 = loss(Y_pred_good, Y)
loss2 = loss(Y_pred_bad, Y)

print(f'loss1: {loss1}')
print(f'loss2: {loss2}')

# prediction1 = torch.max(Y_pred_good, 1)   # dim=1是沿着1轴，即统计行的数据
# prediction2 = torch.max(Y_pred_bad, 1)
# print(f'prediction1: {prediction1}')
# print(f'prediction1: {prediction2}')

prediction1_max, prediction1_index = torch.max(Y_pred_good, 1)
prediction2_max, prediction2_index = torch.max(Y_pred_bad, dim=1)
print(f'prediction1: {prediction1_max} {prediction1_index}')
print(f'prediction2: {prediction2_max} {prediction2_index}')



loss1: tensor([0.3840, 0.4170, 0.1044])
loss2: tensor([2.3840, 2.3840, 0.1044])
prediction1: tensor([2.1000, 2.0000, 3.0000]) tensor([2, 0, 1])
prediction2: tensor([2.1000, 2.1000, 3.0000]) tensor([0, 2, 1])


- nn with pytorch for softmax, multi-classes

因为nn的crossentropy包含了softmax，所以定义的网络里不要有softmax

In [17]:
import torch
import torch.nn as nn
import numpy as np


# multi-classes problem
class NeuralNet2(nn.Module):
    def __init__(self, inputs_size, hidden_size, num_classes):
        super().__init__()
        self.linear1 = nn.Linear(inputs_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out

model = NeuralNet2(inputs_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss()





binary-class,loss用了BCEloss，是crossentropyloss的binary

In [None]:
import torch
import torch.nn as nn
import numpy as np


# binary-class problem
class NeuralNet2(nn.Module):
    def __init__(self, inputs_size, hidden_size):
        super().__init__()
        self.linear1 = nn.Linear(inputs_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        y_pred = torch.sigmoid(out)
        return y_pred

model = NeuralNet2(inputs_size=28*28, hidden_size=5)
criterion = nn.BCELoss()