## 目标函数大总结

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# logits shape:[BS, NC]
batch_size = 2
num_class = 4

logits = torch.randn(batch_size, num_class)  # input unnormalized score

target_indices = torch.randint(num_class, size=(batch_size,))  # delta目标分布
target_logits = torch.randn(batch_size, num_class)  # 非delta目标分布

### CE Loss

In [3]:
# 1. 调用Cross Entropy loss

# method 1 for CE loss
ce_loss_fn = nn.CrossEntropyLoss()
ce_loss = ce_loss_fn(logits, target_indices)
print(f'cross entropy loss1: {ce_loss}')

# method 2 for CE loss
ce_loss = ce_loss_fn(logits, torch.softmax(target_logits, dim=-1))
print(f'cross entropy loss2: {ce_loss}')

cross entropy loss1: 1.3177180290222168
cross entropy loss2: 1.6173157691955566


### NLL Loss(Negative Log Likelihood loss)

In [4]:
nll_fn = nn.NLLLoss()
nll_Loss = nll_fn(torch.log_softmax(logits, dim=-1) + 1e-7, target_indices)
# nll_loss = nll_fn(torch.log(torch.softmax(logits, -1)) + 1e-7, target_indices)

print(f'negative log-likelihood loss: {nll_Loss}')

negative log-likelihood loss: 1.3177179098129272


cross entropy value = NLL value

### 3. 调用Kullback-Leibler divergence loss (KL loss)

In [5]:
kl_loss_fn = nn.KLDivLoss()
kld_loss = kl_loss_fn(torch.log_softmax(logits, dim=-1), torch.softmax(target_logits, dim=-1))
print(f'kullback-leibler divergence loss: {kld_loss}')

kullback-leibler divergence loss: 0.09241800010204315




### 4. 验证 CE = IE + KLD

$H(p, q) = H(p) + D_{KL}(P \| q)$

In [6]:
ce_loss_fn_sample = nn.CrossEntropyLoss(reduction='none')
ce_loss_sample = ce_loss_fn_sample(logits, torch.softmax(target_logits, dim=-1))
print(f'cross entropy loss sample: {ce_loss_sample}')

kld_loss_fn_sample = nn.KLDivLoss(reduction='none')
kld_loss_sample = kld_loss_fn_sample(torch.log_softmax(logits, dim=-1), torch.softmax(target_logits, dim=-1)).sum(-1)
print(f'kullback-leibler divergence loss sample: {kld_loss_sample}')

target_information_entropy = torch.distributions.Categorical(probs=torch.softmax(target_logits, dim=-1)).entropy()
print(f'information entropy sample: {target_information_entropy}')  # IE为常数 如果目标分布是delta分布 IE=0

print(torch.allclose(ce_loss_sample, kld_loss_sample + target_information_entropy))

cross entropy loss sample: tensor([1.2337, 2.0009])
kullback-leibler divergence loss sample: tensor([0.0774, 0.6619])
information entropy sample: tensor([1.1563, 1.3389])
True


### 5. Binary Cross Entropy loss (BCE Loss)

In [7]:
bce_loss_fn = nn.BCELoss()
logits = torch.randn(batch_size)
prob_1 = torch.sigmoid(logits)
target = torch.randint(2, size=(batch_size, ))
bce_loss = bce_loss_fn(prob_1, target.float())
print(f'binary cross entropy loss: {bce_loss}')

binary cross entropy loss: 0.8749682903289795


用NLL Loss 代替BCE Loss 做二分类

In [8]:
prob_0 = 1 - prob_1.unsqueeze(-1)
prob = torch.cat([prob_0, prob_1.unsqueeze(-1)], dim=-1)
nll_loss_binary = nll_fn(torch.log(prob), target)
print(f'negative log-likelihood loss binary: {nll_loss_binary}')

print(torch.allclose(bce_loss, nll_loss_binary))

negative log-likelihood loss binary: 0.8749683499336243
True


### 6. Cosine Similarity loss

In [9]:
cosine_loss_fn = nn.CosineEmbeddingLoss()
v1 = torch.randn(batch_size, 512)
v2 = torch.randn(batch_size, 512)
target = torch.randint(2, size=(batch_size,)) * 2 - 1  # -1 或 1
cosine_loss = cosine_loss_fn(v1, v2, target)
print(f'cosine similarity loss: {cosine_loss}')

cosine similarity loss: 0.49027764797210693
