In [None]:
!pip install -q -U bitsandbytes --no-index --find-links ../input/libs-install
!pip install -q -U transformers --no-index --find-links ../input/libs-install

In [None]:
import os
import gc
import re
from time import time

import torch
import transformers
import sklearn
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from transformers import Gemma2ForCausalLM, GemmaTokenizer, BitsAndBytesConfig

import time
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss

from torch.cuda.amp import autocast
from threading import Thread

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

if (not torch.cuda.is_available()): print("Sorry - GPU required!")

In [None]:
train_df = pd.read_csv('/kaggle/input/embedding/train_embed.csv')
train_embed = np.load('/kaggle/input/embedding/gemma2_train_embed.npy')

train_df.loc[:, 'label'] = np.argmax(train_df[['winner_model_a','winner_model_b','winner_tie']].values, axis=1)

In [None]:
# splits
Targets = ['winner_model_a','winner_model_b','winner_tie']

y = train_df['label'].values
train_idx, test_idx = train_test_split(train_df.index, test_size=0.1, random_state=42, stratify=y)

X_train, y_train = train_embed[train_idx], train_df.iloc[train_idx]['label'].values
X_test, y_test = train_embed[test_idx], train_df.iloc[test_idx]['label'].values

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
model_cb = CatBoostClassifier()
model_cb.load_model('/kaggle/input/catboost-mike/catboost.cbm')

In [None]:
model_cb

In [None]:
MODEL_PATH = '/kaggle/input/gemma-2-9b-hf'
MAX_LENGTH = 1024
BATCH_SIZE = 2
    
device0 = torch.device('cuda:0')
device1 = torch.device('cuda:1')

tokenizer = GemmaTokenizer.from_pretrained(MODEL_PATH)

bnb_config_4bit = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False)

model_0 = Gemma2ForCausalLM.from_pretrained(MODEL_PATH,
                                        revision="float16",
                                        device_map='cuda:0',
                                        quantization_config=bnb_config_4bit)        

model_1 = Gemma2ForCausalLM.from_pretrained(MODEL_PATH,
                                        revision="float16",
                                        device_map='cuda:1',
                                        quantization_config=bnb_config_4bit)

In [None]:
def process(input_str):
    stripped_str = input_str.strip('[]')
    sentences = [s.strip('"') for s in stripped_str.split('","')]
    return sentences[-1] if sentences else ''
  
test = pd.read_csv('/kaggle/input/lmsys-chatbot-arena/test.csv')

test.loc[:, 'prompt'] = test['prompt'].apply(process)
test.loc[:, 'response_a'] = test['response_a'].apply(process)
test.loc[:, 'response_b'] = test['response_b'].apply(process)

test['text'] = '<start_of_turn>User prompt: ' + test['prompt'] +  '\n\nModel A :\n' + test['response_a'] +'\n\n----\n\nModel B:\n'  + test['response_b'] + '<end_of_turn><eos>'
print(test['text'][0])

In [None]:
tokens = tokenizer(test['text'].tolist(),
                   padding='max_length',
                   max_length=MAX_LENGTH,
                   truncation=True,
                   return_tensors='pt')


data = pd.DataFrame()
data['INPUT_IDS'] = [tensor.tolist() for tensor in tokens['input_ids']]
data['ATTENTION_MASKS'] = [tensor.tolist() for tensor in  tokens['attention_mask']]
data[:2]

In [None]:
def get_embeddings(df, model, device, batch_size=BATCH_SIZE):  
    input_ids = torch.tensor(df['INPUT_IDS'].values.tolist(), dtype=torch.long)
    attention_mask = torch.tensor(df['ATTENTION_MASKS'].values.tolist(), dtype=torch.long)

    embed_list = []

    for start_idx in range(0, len(df), batch_size):
        end_idx = min(start_idx + batch_size, len(df))
        batch_input_ids = input_ids[start_idx:end_idx].to(device)
        batch_attention_mask = attention_mask[start_idx:end_idx].to(device)
        gc.collect()
        torch.cuda.empty_cache()
        with torch.no_grad():
            outputs = model(input_ids=batch_input_ids, attention_mask=batch_attention_mask, output_hidden_states=True)
            embed = outputs.hidden_states[-1]
            embed_mean = torch.mean(embed, dim=1).cpu() #mean pool
            embed_list.append(embed_mean) 
            
            torch.cuda.empty_cache()
        
    embeddings = torch.cat(embed_list, dim=0)
    return embeddings

def compute_embed(df, model, device, results, index):
    results[index] = get_embeddings(df, model, device)

In [None]:
st = time.time()

N_SAMPLES = len(data)
half = round(N_SAMPLES / 2)
sub1 = data.iloc[0:half].copy()
sub2 = data.iloc[half:N_SAMPLES].copy()

results = {}

t0 = Thread(target=compute_embed, args=(sub1, model_0, device0, results, 0))
t1 = Thread(target=compute_embed, args=(sub2, model_1, device1, results, 1))

t0.start()
t1.start()

t0.join()
t1.join()

print(f"Processing complete. Total time: {time.time() - st:.2f} seconds")

In [None]:
test_embeddings = torch.cat([results[0], results[1]], dim=0)
test_embeddings.shape

In [None]:
gc.collect()
del model_1
del  model_0
torch.cuda.empty_cache()

In [None]:
preds = model_cb.predict_proba(test_embeddings.numpy())
preds

In [None]:
sample_sub = pd.read_csv('/kaggle/input/lmsys-chatbot-arena/sample_submission.csv')
sample_sub[Targets] =  preds

display(sample_sub)

In [None]:
sample_sub.to_csv('submission.csv', index=False)