### transform original pairrm model to hf format

In [5]:
from llm_blender.pair_ranker.pairrm import DebertaV2PairRM
from transformers import DebertaV2Config, AutoTokenizer
config = DebertaV2Config.from_pretrained('microsoft/deberta-v3-large')
tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-large')
source_prefix = "<|source|>"
cand1_prefix = "<|candidate1|>"
cand2_prefix = "<|candidate2|>"
cand_prefix = "<|candidate|>"
tokenizer.add_tokens([source_prefix, cand1_prefix, cand2_prefix, cand_prefix])

config.n_tasks = 1
config.source_prefix_id = 128001
config.cand1_prefix_id = 128002
config.cand2_prefix_id = 128003
config.cand_prefix_id = 128004
config.drop_out = 0.05
pairrm = DebertaV2PairRM(config)
pairrm.pretrained_model.resize_token_embeddings(len(tokenizer))





Embedding(128005, 1024)

In [6]:
!git clone https://huggingface.co/llm-blender/PairRM
import safetensors
import logging

load_result = safetensors.torch.load_model(pairrm, "./PairRM/model.safetensors") # path of original pairrm model
missing_keys, unexpected_keys = load_result
if missing_keys:
    print(f"Missing keys: {missing_keys}")
if unexpected_keys:
    print(f"Unexpected keys: {unexpected_keys}")
if not missing_keys and not unexpected_keys:
    print(f"Successfully loaded checkpoint from './PairRM/model.safetensors'")

Successfully loaded checkpoint from './PairRM/model.safetensors'


In [None]:
from transformers import Trainer, TrainingArguments
trainer = Trainer(
    model=pairrm,
    args=TrainingArguments(
        output_dir="./hf_PairRM",
        overwrite_output_dir=True,
    ),
    tokenizer=tokenizer,
)
trainer.save_model("./hf_PairRM/final_checkpoint")

## Verifying Correctness

### load hf_format pairrm using `from_pretrained` 

In [18]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from llm_blender.pair_ranker.pairrm import DebertaV2PairRM
from transformers import AutoTokenizer
from typing import List
pairrm = DebertaV2PairRM.from_pretrained("llm-blender/PairRM-hf", device_map="cuda:0")
tokenizer = AutoTokenizer.from_pretrained('llm-blender/PairRM-hf')
source_prefix = "<|source|>"
cand1_prefix = "<|candidate1|>"
cand2_prefix = "<|candidate2|>"
inputs = ["hello!", "I love you!"]
candidates_A = ["hi!", "I hate you!"]
candidates_B = ["f**k off!", "I love you, too!"]
def tokenize_pair(sources:List[str], candidate1s:List[str], candidate2s:List[str]):
    ids = []
    assert len(sources) == len(candidate1s) == len(candidate2s)
    for i in range(len(sources)):
        source_ids = tokenizer.encode(source_prefix + sources[i])
        candidate1_ids = tokenizer.encode(cand1_prefix + candidate1s[i])
        candidate2_ids = tokenizer.encode(cand2_prefix + candidate2s[i])
        ids.append(source_ids + candidate1_ids + candidate2_ids)
    encodings = tokenizer.pad({"input_ids": ids}, return_tensors="pt")
    return encodings

encodings = tokenize_pair(inputs, candidates_A, candidates_B)
encodings = {k:v.to(pairrm.device) for k,v in encodings.items()}
outputs = pairrm(**encodings)
logits = outputs.logits.tolist()
comparison_results = outputs.logits > 0
print(logits)
# [1.9003021717071533, -1.2547134160995483]
print(comparison_results)
# tensor([ True, False], device='cuda:0'), which means whether candidate A is better than candidate B for each input

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


[1.9003021717071533, -1.2547134160995483]
tensor([ True, False], device='cuda:0')


### load from llm-blender wrapper

In [20]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import llm_blender
blender = llm_blender.Blender()
# Load Ranker
blender.loadranker("llm-blender/PairRM") # load ranker checkpoint
inputs = ["hello!", "I love you!"]
candidates_A = ["hi!", "I hate you!"]
candidates_B = ["f**k off!", "I love you, too!"]
logits = blender.compare(inputs, candidates_A, candidates_B, return_logits=True, mode="[A,B]")
comparison_results = logits > 0
print(logits)
# [1.9003021717071533, -1.2547134160995483]
print(comparison_results)
# tensor([ True, False], device='cuda:0'), which means whether candidate A is better than candidate B for each input



Successfully loaded ranker from  /home/dongfu/data/.cache/huggingface/hub/llm-blender/PairRM


Ranking candidates: 100%|██████████| 1/1 [00:00<00:00, 18.13it/s]

[ 1.9   -1.255]
[ True False]



