In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"  # or "tensorflow" or "torch"

import keras_nlp
import keras
import tensorflow as tf

import numpy as np 
import pandas as pd
from tqdm import tqdm
import json

import matplotlib.pyplot as plt
import matplotlib as mpl
import transformers

In [None]:
print("TensorFlow:", tf.__version__)
print("Keras:", keras.__version__)
print("KerasNLP:", keras_nlp.__version__)

In [None]:
class CFG:
    seed = 50  # Random seed
    sequence_length = 1024  # Input sequence length
    epochs = 2 # Training epochs
    batch_size = 2  # Batch size
    label2name = {0: 'winner_model_a', 1: 'winner_model_b', 2: 'winner_tie'}
    name2label = {v:k for k, v in label2name.items()}
    class_labels = list(label2name.keys())
    class_names = list(label2name.values())

In [None]:
keras.utils.set_random_seed(CFG.seed)
keras.mixed_precision.set_global_policy("bfloat16")
BASE_PATH = '/kaggle/input/lmsys-chatbot-arena'

In [None]:
def load_data(row):
  row_list = json.loads(row)
  return " ".join(row for row in row_list if row != None)

In [None]:
# Load Train Data
df = pd.read_csv(f'{BASE_PATH}/train.csv')
# df = df.iloc[:100] #using subset of data for demo

# Take the first prompt and its associated response
df["prompt"] = df["prompt"].apply(load_data)
df["response_a"] = df["response_a"].apply(load_data)
df["response_b"] = df["response_b"].apply(load_data)

# Label conversion
df["class_name"] = df[["winner_model_a", "winner_model_b" , "winner_tie"]].idxmax(axis=1)
df["class_label"] = df.class_name.map(CFG.name2label)

# Show Sample
df.head()

In [None]:
# Load Test Data
test_df = pd.read_csv(f'{BASE_PATH}/test.csv')

# Take the first prompt and response
test_df["prompt"] = test_df["prompt"].apply(load_data)
test_df["response_a"] = test_df["response_a"].apply(load_data)
test_df["response_b"] = test_df["response_b"].apply(load_data)

# Show Sample
test_df.head()

In [None]:
df['options'] = df.apply(lambda row: [row.response_a, row.response_b], axis=1)
display(df.head(2))  # Display the first 2 rows of df

test_df['options'] = test_df.apply(lambda row: [row.response_a, row.response_b], axis=1)
display(test_df.head(2))  # Display the first 2 rows of df

In [None]:
from sklearn.model_selection import train_test_split  # Import package

train_df, valid_df = train_test_split(df, test_size=0.1, stratify=df["class_label"])

In [None]:
preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset(
    "deberta_v3_small_en",
    sequence_length=CFG.sequence_length, # Max sequence length, will be padded if shorter
    dtype="bfloat16",
)

In [None]:
outs = preprocessor(df.options.iloc[0])  # Process options for the first row

# Display the shape of each processed output
for k, v in outs.items():
    print(k, ":", v.shape)

In [None]:
def preprocess_fn(text, label=None):
    text = preprocessor(text)  # Preprocess text
    return (text, label) if label is not None else text  # Return processed text and label if available


In [None]:
def build_dataset(texts, labels=None, batch_size=32,
                  cache=True, shuffle=1024):
    AUTO = tf.data.AUTOTUNE  # AUTOTUNE option
    slices = (texts,) if labels is None else (texts, keras.utils.to_categorical(labels, num_classes=3))  # Create slices
    ds = tf.data.Dataset.from_tensor_slices(slices)  # Create dataset from slices
    ds = ds.cache() if cache else ds  # Cache dataset if enabled
    ds = ds.map(preprocess_fn, num_parallel_calls=AUTO)  # Map preprocessing function
    opt = tf.data.Options()  # Create dataset options
    if shuffle: 
        ds = ds.shuffle(shuffle, seed=CFG.seed)  # Shuffle dataset if enabled
        opt.experimental_deterministic = False
    ds = ds.with_options(opt)  # Set dataset options
    ds = ds.batch(batch_size, drop_remainder=False)  # Batch dataset
    ds = ds.prefetch(AUTO)  # Prefetch next batch
    return ds  # Return the built dataset

In [None]:
# import jax
# devices = jax.devices("gpu")
# print("devices", devices)

# data_parallel = keras.distribution.DataParallel(devices=devices)

In [None]:
def get_data():
    train_texts = train_df.options.tolist()  # Extract training texts
    train_labels = train_df.class_label.tolist()  # Extract training labels
    train_ds = build_dataset(train_texts, train_labels,
                             batch_size=CFG.batch_size,
                             shuffle=True)

    # Valid
    valid_texts = valid_df.options.tolist()  # Extract validation texts
    valid_labels = valid_df.class_label.tolist()  # Extract validation labels
    valid_ds = build_dataset(valid_texts, valid_labels,
                             batch_size=CFG.batch_size,
                             shuffle=False)

    # Build test dataset
    test_texts = test_df.options.tolist()
    test_ds = build_dataset(test_texts,
                             batch_size=min(len(test_df), CFG.batch_size),
                             shuffle=False)
    return train_ds, valid_ds, test_ds

In [None]:
# keras.distribution.set_distribution(data_parallel) #replicate model on both GPUs

In [None]:
def get_backbone():
    backbone = keras_nlp.models.DebertaV3Backbone.from_preset(
        "deberta_v3_small_en",
        dtype="bfloat16",
    )
    backbone.enable_lora(8)
    
    return backbone

In [None]:
ckpt_cb = keras.callbacks.ModelCheckpoint(f'best_model.weights.h5',
                                  monitor='val_log_loss',
                                  save_best_only=True,
                                  save_weights_only=True,
                                  mode='min')  # Get Model checkpoint callback

In [None]:
log_loss = keras.metrics.CategoricalCrossentropy(name="log_loss")

In [None]:
def get_model(backbone):
    inputs = {
        "token_ids": keras.Input(shape=(2, None), dtype=tf.int16, name="token_ids"),
        "padding_mask": keras.Input(shape=(2, None), dtype=tf.int16, name="padding_mask"),
    }

    # Compute embeddings for first response: (P + R_A) using backbone
    response_a = {k: v[:, 0, :] for k, v in inputs.items()}
    embed_a = backbone(response_a)

    # Compute embeddings for second response: (P + R_B), using the same backbone
    response_b = {k: v[:, 1, :] for k, v in inputs.items()}
    embed_b = backbone(response_b)

    # Compute final output
    embeds = keras.layers.Concatenate(axis=-1)([embed_a, embed_b])
    embeds = keras.layers.GlobalAveragePooling1D()(embeds)
    outputs = keras.layers.Dense(3, activation="softmax", name="classifier")(embeds)
    model = keras.Model(inputs, outputs)

    # Compile the model with optimizer, loss, and metrics
    model.compile(
        optimizer=keras.optimizers.Adam(5e-6),
        loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.02),
        metrics=[
            log_loss,
            keras.metrics.CategoricalAccuracy(name="accuracy"),
        ],
    )
    
    return model

In [None]:
strategy = tf.distribute.MirroredStrategy()
# Open a strategy scope.

with strategy.scope():
    
    train_ds, valid_ds, test_ds = get_data()
    
    backbone = get_backbone()
    
    model = get_model(backbone)
    
    history = model.fit(
    train_ds,
    epochs=CFG.epochs,
    validation_data=valid_ds,
    callbacks=[ckpt_cb]
    )

In [None]:
model.load_weights('/kaggle/working/best_model.weights.h5')

In [None]:
# Make predictions using the trained model on test data
test_preds = model.predict(test_ds, verbose=1)

In [None]:
sub_df = test_df[["id"]].copy()
sub_df[CFG.class_names] = test_preds.tolist()
sub_df.to_csv("submission.csv", index=False)
sub_df.head()

# 📌 | Reference

* [LLM Science Exam: KerasCore + KerasNLP [TPU]](https://www.kaggle.com/code/awsaf49/llm-science-exam-kerascore-kerasnlp-tpu)
* [AES 2.0: KerasNLP Starter](https://www.kaggle.com/code/awsaf49/aes-2-0-kerasnlp-starter)
* [LMSYS: KerasNLP Starter](https://www.kaggle.com/code/awsaf49/lmsys-kerasnlp-starter)