# 一、什么是Loss Function？   
**损失函数** 用于衡量模型预测与真实值之间的差距，是训练过程中的目标函数。  
> 损失越小，代表模型的预测越接近真实标签。

# 二、常见的损失函数   
![常见损失函数](images/Loss_Functions.png)

In [2]:
# torch.nn.L1Loss
#     size_average (bool, optional) 
#     reduce (bool, optional):
#     # 上面两者已经过时，推荐使用 reduction 即可
#     reduction (str, optional):
#         'none': no reduction will be applied, 
#         'mean': the sum of the output will be divided by the number of elements in the output, 
#         'sum': the output will be summed.  
#         Default: 'mean'

In [3]:
import torch
import torchvision
from torch import nn
from collections import OrderedDict

In [4]:
# 可以直接在数据中加入一个float
inputs = torch.tensor([1.0, 2, 3])
# 也可以指定类型
targets = torch.tensor([1, 2, 5], dtype = torch.float32)
print(inputs.shape)
inputs = torch.reshape(inputs, (1, 3))
targets = torch.reshape(targets, (1, 3))
print(inputs.shape)

torch.Size([3])
torch.Size([1, 3])


In [5]:
loss = nn.L1Loss(reduction = 'sum')
result = loss(inputs, targets)
print(result)
loss = nn.L1Loss(reduction = 'mean')
result = loss(inputs, targets)
print(result)

tensor(2.)
tensor(0.6667)


In [6]:
# torch.nn.MSELoss
#     size_average (bool, optional)
#     reduce (bool, optional)
#     # 上面两者已弃用
#     reduction (str, optional):
#         'none': no reduction will be applied, 
#         'mean': the sum of the output will be divided by the number of elements in the output, 
#         'sum': the output will be summed. 
#         Default: 'mean'

In [7]:
loss_MSE = nn.MSELoss(reduction = 'sum') # 对loss进行求和
result_MSE = loss_MSE(inputs, targets)
print(result_MSE)
loss_MSE = nn.MSELoss(reduction = 'mean') # 对loss进行平均
result_MSE = loss_MSE(inputs, targets)
print(result_MSE)

tensor(4.)
tensor(1.3333)


In [8]:
# nn.CrossEntropyLoss 结合了 nn.LogSoftmax 和 nn.NLLLoss, 所以:
	# •输入是 原始 logits（未经过 softmax）；
	# •内部会自动做 log_softmax 处理；
	# •对数似然越大，损失越小。
# torch.nn.CrossEntropyLoss(
#     weight (Tensor, optional)              类别权重（shape=[C]），用于类别不平衡
#     size_average (bool, optional)
#     ignore_index (int, optional)           忽略某个类别的标签索引（如在语义分割中忽略边界)
#     reduce (bool, optional)
#     reduction (str, optional)              同 L1Loss和 MSELoss
#     label_smoothing (float, optional)      标签平滑，防止过拟合
#         将原本 “硬标签” 的 one-hot 向量，稍微“软化”，变成接近的概率分布，使模型训练更加“宽容”，避免过拟合。
#         例如：真实类别是 → [0, 0, 1, 0]，模型如果预测 [0.01, 0.02, 0.96, 0.01]本该有较大的 loss，
#              但这已经是一个很好的预测，引入label_smoothing减少这种标签的损失函数，减少过拟合。

In [9]:
x = torch.tensor([0.1, 0.2, 0.3])
y = torch.tensor([1])
x = torch.reshape(x, (1, 3))
loss_cross = nn.CrossEntropyLoss()
result_cross = loss_cross(x, y)
print(result_cross)

tensor(1.1019)


In [None]:
# 如何在神经网络中写入 LossFunction

In [10]:
class Tudui(nn.Module):
    def __init__(self):
        super(Tudui, self).__init__()
        self.model1 = nn.Sequential(OrderedDict([
            ('conv1', nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2)),
            ('pool1', nn.MaxPool2d(kernel_size=2)),
            ('conv2', nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2)),
            ('pool2', nn.MaxPool2d(kernel_size=2)),
            ('conv3', nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)),
            ('pool3', nn.MaxPool2d(kernel_size=2)),
            ('flatten', nn.Flatten()),
            ('fc1', nn.Linear(1024, 64)),  # 注意：1024 = 64通道 × 4 × 4（针对输入32x32）
            ('fc2', nn.Linear(64, 10))
        ]))

    def forward(self, x):
        return self.model1(x)

In [11]:
dataset = torchvision.datasets.CIFAR10("./data/CIFAR10/", train = False, transform = torchvision.transforms.ToTensor(), download = True)
dataloader = torch.utils.data.DataLoader(dataset, batch_size = 64)

Files already downloaded and verified


In [14]:
tudui = Tudui()
loss = nn.CrossEntropyLoss()
i = 0;
for data in dataloader:
    if i >= 2:
        break
    imgs, targets = data
    outputs = tudui(imgs)
    print("output:")
    print(outputs)
    print("targets:")
    print(targets)
    print("loss:")
    result_loss = loss(outputs, targets)
    print(result_loss)
    result_loss.backward()
    print("ok")

    i = i + 1

output:
tensor([[ 0.0037, -0.0074, -0.0213,  0.0122,  0.0981, -0.1133, -0.0756, -0.0018,
         -0.0177,  0.0488],
        [ 0.0011,  0.0011, -0.0312, -0.0292,  0.1048, -0.1113, -0.0527,  0.0094,
         -0.0160,  0.0449],
        [-0.0055, -0.0048, -0.0284, -0.0124,  0.1059, -0.1266, -0.0605,  0.0179,
         -0.0157,  0.0349],
        [-0.0030,  0.0042, -0.0115, -0.0141,  0.1056, -0.1255, -0.0508, -0.0013,
          0.0008,  0.0535],
        [ 0.0071, -0.0026, -0.0195,  0.0199,  0.0889, -0.0942, -0.0830,  0.0111,
          0.0017,  0.0439],
        [-0.0078, -0.0203, -0.0189,  0.0148,  0.1055, -0.1099, -0.0792,  0.0149,
         -0.0340,  0.0384],
        [ 0.0135,  0.0052, -0.0155,  0.0194,  0.1022, -0.0968, -0.0953,  0.0348,
         -0.0180,  0.0704],
        [ 0.0018, -0.0202, -0.0157,  0.0224,  0.0857, -0.1214, -0.0783,  0.0184,
         -0.0290,  0.0355],
        [ 0.0144,  0.0103, -0.0162, -0.0010,  0.0882, -0.1063, -0.0605, -0.0020,
         -0.0072,  0.0538],
        [ 0

In [None]:
# backward() 是 PyTorch 中自动求导（autograd）机制的核心方法，用于执行反向传播（backpropagation），计算所有需要梯度的张量的 .grad 值。
# 一般配合优化器来用，详见下一节