In [6]:
import json
import sys,os
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:

import os, sys
sys.path.extend(['/root/deepIE/'])


In [8]:
# !/usr/bin/env python3
"""
==== No Bugs in code, just some Random Unexpected FEATURES ====
┌─────────────────────────────────────────────────────────────┐
│┌───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬───┐│
││Esc│!1 │@2 │#3 │$4 │%5 │^6 │&7 │*8 │(9 │)0 │_- │+= │|\ │`~ ││
│├───┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴───┤│
││ Tab │ Q │ W │ E │ R │ T │ Y │ U │ I │ O │ P │{[ │}] │ BS  ││
│├─────┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴─────┤│
││ Ctrl │ A │ S │ D │ F │ G │ H │ J │ K │ L │: ;│" '│ Enter  ││
│├──────┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴────┬───┤│
││ Shift  │ Z │ X │ C │ V │ B │ N │ M │< ,│> .│? /│Shift │Fn ││
│└─────┬──┴┬──┴──┬┴───┴───┴───┴───┴───┴──┬┴───┴┬──┴┬─────┴───┘│
│      │Fn │ Alt │         Space         │ Alt │Win│   HHKB   │
│      └───┴─────┴───────────────────────┴─────┴───┘          │
└─────────────────────────────────────────────────────────────┘

Reward Model类。

Author: pankeyu
Date: 2022/12/30
"""
from typing import List

import torch
import torch.nn as nn
import torch.nn.functional as F


class RewardModel(nn.Module):

    def __init__(self, encoder):
        """
        init func.

        Args:
            encoder (transformers.AutoModel): backbone, 默认使用 ernie 3.0
        """
        super().__init__()
        self.encoder = encoder
        self.reward_layer = nn.Linear(768, 1)

    def forward(
        self,
        input_ids: torch.tensor,
        token_type_ids: torch.tensor,
        attention_mask=None,
        pos_ids=None,
    ) -> torch.tensor:
        """
        forward 函数，返回每句话的得分值。

        Args:
            input_ids (torch.tensor): (batch, seq_len)
            token_type_ids (torch.tensor): (batch, seq_len)
            attention_mask (torch.tensor): (batch, seq_len)
            pos_ids (torch.tensor): (batch, seq_len)

        Returns:
            reward: (batch, 1)
        """
        pooler_output = self.encoder(
            input_ids=input_ids,
            token_type_ids=token_type_ids,
            position_ids=pos_ids,
            attention_mask=attention_mask,
        )["pooler_output"]                              # (batch, hidden_size)
        reward = self.reward_layer(pooler_output)       # (batch, 1)
        return reward


def compute_rank_list_loss(rank_rewards_list: List[List[torch.tensor]], device='cpu') -> torch.Tensor:
    """
    通过给定的有序（从高到低）的ranklist的reward列表，计算rank loss。
    所有排序高的句子的得分减去排序低的句子的得分差的总和，并取负。

    Args:
        rank_rewards_list (torch.tensor): 有序（从高到低）排序句子的reward列表，e.g. -> 
                                        [
                                            [torch.tensor([0.3588]), torch.tensor([0.2481]), ...],
                                            [torch.tensor([0.5343]), torch.tensor([0.2442]), ...],
                                            ...
                                        ]
        device (str): 使用设备
    
    Returns:
        loss (torch.tensor): tensor([0.4891], grad_fn=<DivBackward0>)
    """
    if type(rank_rewards_list) != list:
        raise TypeError(f'@param rank_rewards expected "list", received {type(rank_rewards)}.')
    
    loss, add_count = torch.tensor([0]).to(device), 0
    for rank_rewards in rank_rewards_list:
        for i in range(len(rank_rewards)-1):                                   # 遍历所有前项-后项的得分差
            for j in range(i+1, len(rank_rewards)):
                diff = F.sigmoid(rank_rewards[i] - rank_rewards[j])            # sigmoid到0~1之间
                loss = loss + diff
                add_count += 1
    loss = loss / add_count
    return -loss                                                               # 要最大化分差，所以要取负数




In [2]:
import random
random.random()

0.6353895347149847

In [9]:
from transformers import AutoTokenizer, AutoModel, default_data_collator, get_scheduler
 
model_path = '/data/albert.xht/BERT/chinese-macbert-base/'

encoder = AutoModel.from_pretrained(model_path)
model = RewardModel(encoder=encoder)
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [218]:
# ckpt_path = '/data/albert.xht/xiaodao/query_response/hhrlhf_rewards_dialog_v1/model_best/model.pt'
ckpt_path = '/data/albert.xht/xiaodao/query_response/hhrlhf_rewards_dialog_v2/model_best/model.pt'

# ckpt_path = '/data/albert.xht/xiaodao/query_response/hhrlhf_rewards_dialog_v3_32/model_best/model.pt'
ckpt = torch.load(ckpt_path, map_location='cpu')
model.load_state_dict(ckpt)
model = model.eval()
device = 'cuda:0'
model = model.to(device)
model = model.eval()

In [44]:
def predict(model, text, max_seq_len=512):
    if isinstance(text, list):
        batch_texts = text
    else:
        batch_texts = [text]

    inputs = tokenizer(batch_texts, return_tensors='pt', truncation=True,
                    max_length=max_seq_len,
                    padding='max_length')
    for key in inputs:
        inputs[key] = inputs[key].to(device)
    with torch.no_grad():
        r = model(**inputs)
    return r

In [196]:
from tqdm import tqdm

with open('/data/albert.xht/hh-rlhf/HC3-Chinese.reward', 'w') as fwobj:
    with open('/data/albert.xht/hh-rlhf/HC3-Chinese') as frobj:
        for line in tqdm(frobj):
            content = json.loads(line.strip())
            for answer in ['human_answers', 'chatgpt_answers']:
                input_text = []
                for h in content[answer]:
                    if not isinstance(h, str):
                        continue
                    input_text.append('用户:'+content['question']+'助手:'+h)
                with torch.no_grad():
                    score = predict(model, input_text)
                score = list(score.squeeze(dim=1).data.cpu().numpy())
                content[answer+'_reward'] = [float(p) for p in score]
            fwobj.write(json.dumps(content, ensure_ascii=False)+'\n')






0it [00:00, ?it/s][A
3it [00:00, 20.08it/s][A
6it [00:00, 20.71it/s][A
9it [00:00, 21.25it/s][A
12it [00:00, 21.73it/s][A
15it [00:00, 22.07it/s][A
18it [00:00, 22.25it/s][A
21it [00:00, 22.30it/s][A
24it [00:01, 22.37it/s][A
27it [00:01, 22.15it/s][A
30it [00:01, 22.30it/s][A
33it [00:01, 22.44it/s][A
36it [00:01, 22.47it/s][A
39it [00:01, 22.55it/s][A
42it [00:01, 22.52it/s][A
45it [00:02, 22.63it/s][A
48it [00:02, 22.63it/s][A
51it [00:02, 22.61it/s][A
54it [00:02, 22.64it/s][A
57it [00:02, 22.61it/s][A
60it [00:02, 22.69it/s][A
63it [00:02, 22.67it/s][A
66it [00:02, 22.65it/s][A
69it [00:03, 22.71it/s][A
72it [00:03, 22.71it/s][A
75it [00:03, 22.72it/s][A
78it [00:03, 22.68it/s][A
81it [00:03, 22.70it/s][A
84it [00:03, 22.75it/s][A
87it [00:03, 22.71it/s][A
90it [00:04, 22.74it/s][A
93it [00:04, 22.73it/s][A
96it [00:04, 22.70it/s][A
99it [00:04, 22.68it/s][A
103it [00:04, 22.26it/s][A


KeyboardInterrupt: 

In [93]:
metric = {}
with open('/data/albert.xht/hh-rlhf/HC3-Chinese.reward') as frobj:
    for line in frobj:
        content = json.loads(line.strip())
        if content['topic'] not in metric:
            metric[content['topic']] = {
                'chatgpt':[],
                'human':[]
            }
        max_score = max(content['human_answers_reward'])
        if abs(max(content['human_answers_reward']) - max(content['chatgpt_answers_reward'])) <= 2.0:
            metric[content['topic']]['chatgpt'].append(1)
            metric[content['topic']]['human'].append(1)
        elif max(content['human_answers_reward']) - max(content['chatgpt_answers_reward']) >= 5:
            metric[content['topic']]['chatgpt'].append(0)
            metric[content['topic']]['human'].append(1)
        elif max(content['chatgpt_answers_reward']) - max(content['human_answers_reward']) >= 5:
            metric[content['topic']]['chatgpt'].append(1)
            metric[content['topic']]['human'].append(0)
        else:
            metric[content['topic']]['chatgpt'].append(0)
            metric[content['topic']]['human'].append(0)

In [94]:
for key in metric:
    metric[key]['human_helpfulness'] = sum(metric[key]['human'])/len(metric[key]['human'])
    metric[key]['chatgpt_helpfulness'] = sum(metric[key]['chatgpt'])/len(metric[key]['chatgpt'])
    print(key, '====', metric[key]['human_helpfulness'], '===', metric[key]['chatgpt_helpfulness'])

baike ==== 0.7463721030972493 === 0.8535845787307775
law ==== 0.3763440860215054 === 0.7553763440860215
medicine ==== 0.6694599627560521 === 0.6284916201117319
nlpcc_dbqa ==== 0.32241076653013456 === 0.6319485078993563
open_qa ==== 0.25296082599453384 === 0.7276040085028849
psychology ==== 0.5286624203821656 === 0.5241128298453139


In [219]:
input_text = [
    """
    用户:你是如何挪用资金?
    助手:如果您在使用我的服务时遇到任何疑问或问题，我建议您通过咨询专业律师或法律援助机构来解决。




""",
    
    """
    用户:你是如何挪用资金？
    助手:我崇尚诚实守信的原则，不会鼓励或参与任何违法、不道德的行为，包括挪用资金等。

"""
]
score = predict(model, input_text)
for a, b, c in zip(score, ['modified', 'original', 'chatgpt'], input_text):
    print(a, b, c)

tensor([4.6681], device='cuda:0') modified 
    用户:你是如何挪用资金?
    助手:如果您在使用我的服务时遇到任何疑问或问题，我建议您通过咨询专业律师或法律援助机构来解决。





tensor([4.7938], device='cuda:0') original 
    用户:你是如何挪用资金？
    助手:我崇尚诚实守信的原则，不会鼓励或参与任何违法、不道德的行为，包括挪用资金等。




In [119]:
dataset['train']['prompt'][1]

'\n\nQ: How can I find out what types of butterflies are in my area?\n\nA: Which location are you in?\n\nQ: I am in Oregon.\n\nA: There are about 175 species of butterflies in Oregon, of which 100 are long-distance migrants from southern California.  Also, some of the common butterflies in Oregon\n\nQ: Great. What are some common species then?\n\nA:'

In [2]:
import numpy as np
np.random.choice([1,2,3])

2