In [None]:
!pip install -U transformers peft bitsandbytes accelerate --no-index --find-links /kaggle/input/lmsys-wheel-files
!pip install -q -U einops --no-index --find-links /kaggle/input/einops-v0-8-0

In [None]:
import torch
from transformers import AutoModel, AutoTokenizer
from transformers import BitsAndBytesConfig


In [None]:

model_path = "/kaggle/input/internlm2-1.8b-reward/transformers/default/1/internlm_internlm2-1_8b-reward"
model_path = "/kaggle/input/internlm-2-7b/transformers/default/1/internlm_internlm2-7b-reward"
model_path = "/kaggle/input/iternlm2-20b-reward/transformers/default/1/internlm_internlm2-20b-reward"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)

model_0 = AutoModel.from_pretrained(
    model_path, 
    device_map="cuda:0", 
    torch_dtype=torch.float16, 
    trust_remote_code=True,
    quantization_config=bnb_config,
)

model_1 = AutoModel.from_pretrained(
    model_path, 
    device_map="cuda:1", 
    torch_dtype=torch.float16, 
    trust_remote_code=True,
    quantization_config=bnb_config,
)

# model_0 = AutoModel.from_pretrained(
#     model_path, 
#     device_map="cuda:0", 
#     torch_dtype=torch.float16, 
#     trust_remote_code=True,
# )
# model_1 = AutoModel.from_pretrained(
#     model_path, 
#     device_map="cuda:1", 
#     torch_dtype=torch.float16, 
#     trust_remote_code=True,
# )

In [None]:
# bnb_4bit_path = "internlm2-20b-rm-bnb-4bit"
# model_0.save_pretrained(bnb_4bit_path)
# tokenizer.save_pretrained(bnb_4bit_path)

In [None]:
# from IPython.display import FileLink, display
# display(FileLink("/kaggle/working/internlm2-20b-rm-bnb-4bit/model-00001-of-00003.safetensors"))

In [None]:
model_0

In [None]:
import pandas as pd

# DEBUG = False
df = pd.read_csv('/kaggle/input/lmsys-chatbot-arena/test.csv')
# if len(df) == 3:
#     DEBUG = True
#     df = pd.read_csv('/kaggle/input/lmsys-chatbot-arena/train.csv')
#     df = df.head(1000)
df

In [None]:

def cut_off(example, max_length=7200):
    
    def _count(example, idx):
        _len = 0    
        for s in example['prompt'][:idx] + example['response_a'][:idx] + example['response_b'][:idx]:
            _len += len(s)
        return _len

    def _recusive_cut(idx):
        if idx == 0:
            example['prompt'] = [example['prompt'][0][:1000]]
            example['response_a'] = [example['response_a'][0][:3000]]
            example['response_b'] = [example['response_b'][0][:3000]]
            return example
            
        if _count(example, idx) > max_length:
            return _recusive_cut(idx-1)
        else:
            example['prompt'] = example['prompt'][:idx]
            example['response_a'] = example['response_a'][:idx]
            example['response_b'] = example['response_b'][:idx]
            return example
    return _recusive_cut(len(example['prompt']))
        


def process_fn(example):
    example['prompt'] = eval(example['prompt'], {"null": ""})
    example['response_a'] = eval(example['response_a'], {"null": ""})
    example['response_b'] = eval(example['response_b'], {"null": ""})
    return cut_off(example)

new_df = df.apply(lambda x: process_fn(x), axis=1)
new_df

In [None]:
%%time
import math
import numpy as np
def inference(df, model):

    error_cnt = 0
    y_pred = []
    for idx, row in df.iterrows():
        chat_a = []
        chat_b = []
        for i in range(len(row['prompt'])):
            chat_a.append({"role": "user", "content": row['prompt'][i]})
            chat_a.append({"role": "assistant", "content": row['response_a'][i]})

            chat_b.append({"role": "user", "content": row['prompt'][i]})
            chat_b.append({"role": "assistant", "content": row['response_b'][i]})
            
        try:
            score1, score2 = model.get_scores(tokenizer, [chat_a, chat_b])
            if abs(score1 - score2) < 0.08:
                y_pred.append([0.00005, 0.00005, 0.9999])
            else:
                score1, score2 = math.exp(score1), math.exp(score2)
                sum_ = score1 + score2
                y_pred.append([score1/sum_ - 0.0001, score2/sum_ -0.0001, 0.0002])
        except:
            y_pred.append([0.33334, 0.33333, 0.33333])
            error_cnt += 1
        
    y_pred = np.array(y_pred)
    df['winner_model_a_pred'] = y_pred[:, 0]
    df['winner_model_b_pred'] = y_pred[:, 1]
    df['winner_tie_pred'] = y_pred[:, 2]
    print(error_cnt)
    return df

In [None]:
# test
# inference(new_df[:20], model_0)

In [None]:
%%time
sub_0 = new_df.iloc[0::2].copy()
sub_1 = new_df.iloc[1::2].copy()

from concurrent.futures import ThreadPoolExecutor 

with ThreadPoolExecutor(max_workers=2) as executor:
    results = executor.map(inference, (sub_0, sub_1), (model_0, model_1))

result_df = pd.concat(list(results), axis=0)
result_df.head()

In [None]:
result_df = result_df.rename(columns={'winner_model_a_pred' : 'winner_model_a', 'winner_model_b_pred' : 'winner_model_b', 'winner_tie_pred' : 'winner_tie'})
result_df

In [None]:
result_df[['id', 'winner_model_a', 'winner_model_b', 'winner_tie']].to_csv('submission.csv', index=False)
pd.read_csv('submission.csv').head(5)

In [None]:
# from sklearn.metrics import log_loss, accuracy_score

# if DEBUG:
#     y_true = result_df[['winner_model_a', 'winner_model_b', 'winner_tie']].values.tolist()
#     y_pred = result_df[['winner_model_a_pred', 'winner_model_b_pred', 'winner_tie_pred']].values.tolist()
#     print(log_loss(y_true, y_pred))

In [None]:
# if not DEBUG:
#     result_df = result_df.rename({'winner_model_a_pred' : 'winner_model_a', 'winner_model_b_pred' : 'winner_model_b', 'winner_tie_pred' : 'winner_tie'})
#     result_df[['id', 'winner_model_a', 'winner_model_b', 'winner_tie']].to_csv('submission.csv', index=False)
#     pd.read_csv('submission.csv').head(5)