In [None]:
from tqdm import tqdm
import pandas as pd
import json
import torch
from transformers import AutoModel
from numpy.linalg import norm
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import re

In [None]:
device = "cuda" if torch.cuda.is_available else "cpu"
device

In [None]:
tokenizer = AutoModel.from_pretrained('/kaggle/input/jinaai/pytorch/default/4')

In [None]:
class EmbeddingModel(nn.Module):
    def __init__(self, embedding_model, max_sequences):
        super(EmbeddingModel, self).__init__()
        self.embedding = embedding_model
        self.max_seq_length = max_sequences
        self.device = device

    def forward(self, prompts, responses_a, responses_b):
        batch_features_a = []
        batch_features_b = []

        for prompt, response_a, response_b in zip(prompts, responses_a, responses_b):
            prompt = json.loads(prompt)
            response_a = json.loads(response_a)
            response_b = json.loads(response_b)
            
            prompt = ["" if p is None else p for p in prompt]
            response_a = ["" if r is None else r for r in response_a]
            response_b = ["" if r is None else r for r in response_b]
            
            
            embedded_prompt = torch.from_numpy(self.embedding.encode(prompt)).to(self.device)
           
            embedded_response_a = torch.from_numpy(self.embedding.encode(response_a)).to(self.device)
            embedded_response_b = torch.from_numpy(self.embedding.encode(response_b)).to(self.device)

            features_a = []
            features_b = []
            for i in range(len(embedded_prompt)):
                combined_a = torch.cat((embedded_prompt[i], embedded_response_a[i]), dim=0)
                combined_b = torch.cat((embedded_prompt[i], embedded_response_b[i]), dim=0)

                features_a.append(combined_a)
                features_b.append(combined_b)

            features_a = torch.stack(features_a) if features_a else torch.tensor([]).to(self.device)
            features_b = torch.stack(features_b) if features_b else torch.tensor([]).to(self.device)

            features_a = self.pad_to_shape(features_a, (self.max_seq_length, 768 * 2))
            features_b = self.pad_to_shape(features_b, (self.max_seq_length, 768 * 2))

            batch_features_a.append(features_a)
            batch_features_b.append(features_b)

        return torch.stack(batch_features_a).to(self.device), torch.stack(batch_features_b).to(self.device)

    def pad_to_shape(self, tensor, shape):
        current_shape = tensor.shape
        padding = [(0, max(s - cs, 0)) for cs, s in zip(current_shape, shape)]
        padded_tensor = F.pad(tensor, pad=[p for pair in reversed(padding) for p in pair], mode='constant', value=0)
        return padded_tensor[:shape[0], :shape[1]]

class Model(nn.Module):
    def __init__(self, embedding_model, max_sequences=64, hidden_dim=512, dropout=0.3):
        super(Model, self).__init__()
        self.device = device
        self.embedding = EmbeddingModel(embedding_model, max_sequences)
        self.lstm_input_a = nn.LSTM(768 * 2, hidden_dim, batch_first=True).to(self.device)
        self.lstm_input_b = nn.LSTM(768 * 2, hidden_dim, batch_first=True).to(self.device)

        self.conv_input_a = nn.Conv1d(768 * 2, hidden_dim, kernel_size=3, padding=1).to(self.device)
        self.conv_input_b = nn.Conv1d(768 * 2, hidden_dim, kernel_size=3, padding=1).to(self.device)

        self.fc = nn.Sequential(
            nn.Linear(hidden_dim * 2 + hidden_dim * 2, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 3),
            nn.Softmax()
        ).to(self.device)

    def forward(self, prompts, responses_a, responses_b):
        batch_a, batch_b = self.embedding(prompts, responses_a, responses_b)

        batch_a_lstm, _ = self.lstm_input_a(batch_a)  # (batch, 64, hidden_dim)
        batch_b_lstm, _ = self.lstm_input_b(batch_b)  # (batch, 64, hidden_dim)

        batch_a_cnn = self.conv_input_a(batch_a.permute(0, 2, 1)).permute(0, 2, 1)  # (batch, 64, hidden_dim)
        batch_b_cnn = self.conv_input_b(batch_b.permute(0, 2, 1)).permute(0, 2, 1)  # (batch, 64, hidden_dim)

        batch_a_lstm = batch_a_lstm[:, -1, :] 
        batch_b_lstm = batch_b_lstm[:, -1, :]  
        batch_a_cnn = batch_a_cnn[:, -1, :]    
        batch_b_cnn = batch_b_cnn[:, -1, :]
        
        combined = torch.cat([batch_a_lstm, batch_a_cnn, batch_b_lstm, batch_b_cnn], dim=1)
        flattened = combined.view(combined.size(0), -1)

        output = self.fc(flattened)
        return output
    
class DatasetLMSYS(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx]
        prompt = sample['prompt']
        response_a = sample['response_a']
        response_b = sample['response_b']
        label = sample['model_result']
        return prompt, response_a, response_b, label
    
class DatasetLMSYSTest(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx]
        _id = sample['id']
        _prompt = sample['prompt']
        _response_a = sample['response_a']
        _response_b = sample['response_b']
        return _id, _prompt, _response_a, _response_b

In [None]:
model = Model(tokenizer).to(device)

In [None]:
batch_size = 128
learning_rate = 0.001
epochs = 5

In [None]:
file_data = pd.read_csv('/kaggle/input/lmsys-chatbot-arena/train.csv')
file_data['model_result'] = file_data.apply(lambda row: 0 if row['winner_model_a'] == 1 else (1 if row['winner_model_b'] == 1 else 2), axis=1)
file_data = file_data[['prompt', 'response_a', 'response_b', 'model_result']]
train_loader = DataLoader(
    dataset=DatasetLMSYS(file_data),
    batch_size=batch_size,
    shuffle=True
)

file_test = pd.read_csv('/kaggle/input/lmsys-chatbot-arena/test.csv')
test_loader = DataLoader(
    dataset=DatasetLMSYSTest(file_test),
    batch_size=batch_size,
    shuffle=False
)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    running_loss = 0
    total_train = 0
    correct_train = 0
    # Train
    model.train()
    for batch in tqdm(train_loader):
        prompts, responses_a, responses_b, labels = batch
        labels = labels.to(device)

        outputs = model(prompts, responses_a, responses_b)
        _, predicted_idx = torch.max(outputs.data, 1)

        loss = criterion(outputs, labels)
        optimizer.zero_grad()

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        total_train += labels.size(0)
        correct_train += (predicted_idx == labels).sum().item()

        del labels, outputs
        
    train_accuracy = 100 * correct_train / total_train
    print(f"\nTraining Loss: {running_loss/len(train_loader):.4f} | Training Accuracy: {train_accuracy:.2f}%")
print("\n==> Training finished!")

In [None]:
def test(model, test_loader, device):
    model.eval() 
    results = [] 
    with torch.no_grad(): 
        for batch in tqdm(test_loader):
            ids, prompts, responses_a, responses_b = batch
            outputs = model(prompts, responses_a, responses_b)
            _, predicted_idx = torch.max(outputs.data, 1)
            
            for idx, output, prediction in zip(ids, outputs, predicted_idx):
                results.append({
                    'id': idx.item(),
                    'winner_model_a': output[0].item(),
                    'winner_model_b': output[1].item(),
                    'winner_tie': output[2].item()
                })
    df_results = pd.DataFrame(results)
    return df_results

df_results = test(model, test_loader, device)
df_results