## 【LLM-DPO】之Bradley-Terry模型

在DPO里关于偏好学习的模型中，介绍了前置偏好建模方式Bradley-Terry(BT)

在竞技赛中，无法直接获得选手的分值，BT是通过选手之间的比较，来建模出各自的分数

本Notebook采用Pytorch实现最大似然估计形式，以及基于logistic回归形式的BT model

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
class BTModel(nn.Module):
    def __init__(self, N):
        super(BTModel, self).__init__()
        self.reward = nn.Parameter(torch.ones(N))
        self.BCE_loss = nn.BCELoss()
        
    def forward_exp(self, chosen_id, rejected_id):
        reward_chosen = torch.exp(self.reward[chosen_id])
        reward_rejected = torch.exp(self.reward[rejected_id])
        return reward_chosen / (reward_chosen + reward_rejected)

    def forward_sigmoid(self, chosen_id, rejected_id):
        reward_chosen = self.reward[chosen_id]
        reward_rejected = self.reward[rejected_id]
        return torch.sigmoid(reward_chosen - reward_rejected)

    # Maximum Likelihood Estimation
    def loss_exp(self, pred, label):
        return -torch.log(pred) if label == 1 else -torch.log(1 - pred)

    # Binary Cross Entropy
    def loss_sigmoid(self, pred, label):
        epsilon = 1e-7  # 避免log(0)的情况
        pred = torch.clamp(pred, epsilon, 1 - epsilon)  # 将预测值限制在(epsilon, 1-epsilon)范围内
        loss = -(label * torch.log(pred) + (1 - label) * torch.log(1 - pred))

        return  loss

In [2]:
N = 4
model = BTModel(4)
print(model.reward)
datas = [(0, 1, 1), (2, 3, 1), (1, 3, 1)] # 比赛数据，也可以认为是偏好数据
loss_fn = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

$$P(i \succ j) = \frac{\exp(s_i)}{\exp(s_i) + \exp(s_j)}$$

In [3]:
# 训练模型
for i in range(100):
    total_loss = 0
    for data in datas:
        id_i, id_j, label = data

        optimizer.zero_grad()
        pred = model.forward_exp(id_i, id_j)
        loss = model.loss_exp(pred, torch.tensor(label, dtype=torch.float32))
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    if i%10==0 : print(f"Epoch {i}, Loss: {total_loss}")

# 输出每个选手的强度参数
print(model.reward)

另一种建模即是用logistic回归

$$
L = -\log \sigma (s_i-s_j)
$$

In [4]:
# 训练模型

N = 4
model = BTModel(4)
print(model.reward)
optimizer = optim.SGD(model.parameters(), lr=0.01)

for i in range(100):
    total_loss = 0
    for data in datas:
        id_i, id_j, label = data
        optimizer.zero_grad()
        
        pred = model.forward_sigmoid(id_i, id_j)
        loss = model.loss_sigmoid(pred, torch.tensor(label, dtype=torch.float32))
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    if i%10==0 : print(f"Epoch {i}, Loss: {total_loss}")

# 输出每个选手的强度参数
print(model.reward)

# 测试更多的epoch, 选手的分数差异会越大

In [5]:
# 训练模型

N = 4
model = BTModel(4)
print(model.reward)
datas = [(0, 1, 1), (2, 3, 1), (1, 3, 1)] # 比赛数据，也可以认为是偏好数据
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 使用更大的epoch，模拟过拟合
epochs = 100000
print_step = epochs//10
for i in range(epochs):
    total_loss = 0
    for data in datas:
        id_i, id_j, label = data
        optimizer.zero_grad()
        
        pred = model.forward_sigmoid(id_i, id_j)
        loss = model.loss_sigmoid(pred, torch.tensor(label, dtype=torch.float32))
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    if i%print_step==0 : 
        print(f"Epoch {i}, Loss: {total_loss}")
        print(model.reward) 

# 输出每个选手的强度参数
print(model.reward)