### transform original pairrm model to hf format

In [5]:
from llm_blender.pair_ranker.pairrm import DebertaV2PairRM
from transformers import DebertaV2Config, AutoTokenizer
config = DebertaV2Config.from_pretrained('microsoft/deberta-v3-large')
tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-large')
source_prefix = "<|source|>"
cand1_prefix = "<|candidate1|>"
cand2_prefix = "<|candidate2|>"
cand_prefix = "<|candidate|>"
tokenizer.add_tokens([source_prefix, cand1_prefix, cand2_prefix, cand_prefix])

config.n_tasks = 1
config.source_prefix_id = 128001
config.cand1_prefix_id = 128002
config.cand2_prefix_id = 128003
config.cand_prefix_id = 128004
config.drop_out = 0.05
pairrm = DebertaV2PairRM(config)
pairrm.pretrained_model.resize_token_embeddings(len(tokenizer))





Embedding(128005, 1024)

In [6]:
!git clone https://huggingface.co/llm-blender/PairRM
import safetensors
import logging

load_result = safetensors.torch.load_model(pairrm, "./PairRM/model.safetensors") # path of original pairrm model
missing_keys, unexpected_keys = load_result
if missing_keys:
    print(f"Missing keys: {missing_keys}")
if unexpected_keys:
    print(f"Unexpected keys: {unexpected_keys}")
if not missing_keys and not unexpected_keys:
    print(f"Successfully loaded checkpoint from './PairRM/model.safetensors'")

Successfully loaded checkpoint from './PairRM/model.safetensors'


In [None]:
from transformers import Trainer, TrainingArguments
trainer = Trainer(
    model=pairrm,
    args=TrainingArguments(
        output_dir="./hf_PairRM",
        overwrite_output_dir=True,
    ),
    tokenizer=tokenizer,
)
trainer.save_model("./hf_PairRM/final_checkpoint")

### load hf_format pairrm using `from_pretrained` 

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from llm_blender.pair_ranker.pairrm import DebertaV2PairRM
pairrm = DebertaV2PairRM.from_pretrained("llm-blender/PairRM-hf", device_map="cuda:0")

2024-01-05 15:46:09.979198: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-05 15:46:10.826106: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.8//lib64
2024-01-05 15:46:10.826187: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.8//lib64


Downloading config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/1.74G [00:00<?, ?B/s]

## Verifying Correctness

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import llm_blender
blender = llm_blender.Blender()
# Load Ranker
blender.loadranker("llm-blender/PairRM") # load ranker checkpoint

2024-01-05 15:11:00.611021: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-05 15:11:01.483983: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.8//lib64
2024-01-05 15:11:01.484081: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.8//lib64


Successfully loaded ranker from  /home/dongfu/data/.cache/huggingface/hub/llm-blender/PairRM


In [2]:
inputs = ["hello!", "I love you!"]
candidates_A = ["hi!", "I hate you!"]
candidates_B = ["f**k off!", "I love you, too!"]
comparison_results = blender.compare(inputs, candidates_A, candidates_B, return_logits=True, mode="[A,B]")
comparison_results

Ranking candidates:   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[     1, 128001,  12018,    300,      2,      1, 128002,   8595,    300,
              2,      1, 128003,   2994,   1225,   1225,   1165,    442,    300,
              2,      0,      0,      0,      0],
        [     1, 128001,    273,    472,    274,    300,      2,      1, 128002,
            273,   3254,    274,    300,      2,      1, 128003,    273,    472,
            274,    261,    461,    300,      2]], device='cuda:0')


Ranking candidates: 100%|██████████| 1/1 [00:00<00:00,  1.69it/s]


array([ 1.9  , -1.255], dtype=float16)

In [3]:
blender.ranker.tokenizer.decode([     1, 128001,  12018,    300,      2,      1, 128002,   8595,    300,
              2,      1, 128003,   2994,   1225,   1225,   1165,    442,    300,
              2,      0,      0,      0,      0], skip_special_tokens=False)

'[CLS]<|source|> hello![SEP][CLS]<|candidate1|> hi![SEP][CLS]<|candidate2|> f**k off![SEP][PAD][PAD][PAD][PAD]'

In [2]:
import torch
input_ids = torch.tensor([     1, 128001,  12018,    300,      2,      1, 128002,   8595,    300,
              2,      1, 128003,   2994,   1225,   1225,   1165,    442,    300,
              2,      0,      0,      0,      0], dtype=torch.long, device='cuda:0')
attention_masks = input_ids.ne(0).float()
input_ids = input_ids.unsqueeze(0)
attention_masks = attention_masks.unsqueeze(0)
pairrm(input_ids=input_ids, attention_mask=attention_masks)

SequenceClassifierOutput(loss=None, logits=tensor([1.9003], device='cuda:0', grad_fn=<MeanBackward1>), hidden_states=None, attentions=None)

In [3]:
import torch
input_ids = torch.tensor([     1, 128001,    273,    472,    274,    300,      2,      1, 128002,
            273,   3254,    274,    300,      2,      1, 128003,    273,    472,
            274,    261,    461,    300,      2], dtype=torch.long, device='cuda:0')
attention_masks = input_ids.ne(0).float()
input_ids = input_ids.unsqueeze(0)
attention_masks = attention_masks.unsqueeze(0)
pairrm(input_ids=input_ids, attention_mask=attention_masks)

SequenceClassifierOutput(loss=None, logits=tensor([-1.2547], device='cuda:0', grad_fn=<MeanBackward1>), hidden_states=None, attentions=None)