### Model Fine-Tuning
BERT base uncased

In [3]:
!pip install transformers datasets peft evaluate accelerate bitsandbytes pandas -q

In [8]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [None]:
import os
import argparse
from typing import Dict, Any, List

import torch
from datasets import load_dataset, Dataset, ClassLabel, Features, Value
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
)
from peft import LoraConfig, get_peft_model, TaskType
import evaluate
import numpy as np
import pandas as pd
import json

# -------------------------
# Configuration
# -------------------------
class SimpleArgs:
    csv = "mt_bench_training.csv"
    output_dir = "./model_adapter"
    model_name = "bert-base-uncased"  # <-- switched to encoder-only
    batch_size = 8
    epochs = 3
    lr = 5e-5
    seed = 42
    max_input_length = 512
    lora_r = 16
    lora_alpha = 32
    lora_dropout = 0.05
    save_total_limit = 2
    eval_steps = 10
    logging_steps = 10
    seed_data_split = 42
    test_size = 0.1
    validation_size = 0.1

args = SimpleArgs()

# -------------------------
# Utilities
# -------------------------
def build_input_texts_from_columns(examples: Dict[str, List], tokenizer) -> List[str]:
    text_inputs = []
    sep_token = tokenizer.sep_token if tokenizer.sep_token is not None else " "

    for i in range(len(examples["turn"])):
        turn = int(examples["turn"][i])
        q1 = str(examples.get("turn_1_query", [""])[i]).strip()

        if turn == 1:
            text = f"Query: {q1}"
        elif turn == 2:
            ans = str(examples.get("turn_1_answer", [""])[i]).strip()
            q2 = str(examples.get("turn_2_query", [""])[i]).strip()
            text = f"Query: {q1}{sep_token}Answer: {ans}{sep_token}Follow-up Query: {q2}"
        else:
            text = f"Query: {q1}"
        text_inputs.append(text)
    return text_inputs


def preprocess_function(examples, tokenizer, args):
    text_inputs = build_input_texts_from_columns(examples, tokenizer)

    model_inputs = tokenizer(
        text_inputs,
        max_length=args.max_input_length,
        truncation=True,
        padding=False,
    )
    model_inputs["labels"] = examples["label"]
    return model_inputs


# -------------------------
# Compute Metrics
# -------------------------
accuracy_metric = evaluate.load("accuracy")

def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        logits = preds[0]
    else:
        logits = preds

    pred_ids = np.argmax(logits, axis=1)
    acc = accuracy_metric.compute(predictions=pred_ids, references=labels)
    return {"accuracy": acc["accuracy"]}

# -------------------------
# Main Logic
# -------------------------
def main():
    torch.manual_seed(args.seed)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"--- Using device: {device.upper()} ---")

    if not os.path.exists(args.csv):
        raise FileNotFoundError(f"CSV file not found: {args.csv}")

    df = pd.read_csv(args.csv)

    unique_winners = sorted(df["winner"].unique().tolist())
    label2id = {label: i for i, label in enumerate(unique_winners)}
    id2label = {i: label for i, label in enumerate(unique_winners)}
    num_labels = len(unique_winners)
    print(f"Found {num_labels} unique labels: {unique_winners}")
    print(f"Label mapping: {label2id}")
    # Save the label mappings alongside the model adapter for inference
    mappings_path = os.path.join(args.output_dir, "label_mappings.json")
    with open(mappings_path, "w") as f:
        json.dump({"id2label": id2label, "label2id": label2id}, f)
    print(f"Label mappings saved to {mappings_path}")
    # --------------------
    df['label'] = df['winner'].map(label2id)

    features = Features({
        'question_id': Value('int64'),
        'turn': Value('int64'),
        'turn_1_query': Value('string'),
        'turn_1_answer': Value('string'),
        'turn_2_query': Value('string'),
        'winner': Value('string'),
        'label': ClassLabel(names=unique_winners)
    })

    raw_all = Dataset.from_pandas(df, features=features)

    train_val_split = raw_all.train_test_split(
        test_size=args.test_size,
        seed=args.seed_data_split,
        stratify_by_column="label"
    )
    test_ds = train_val_split["test"]
    train_val_ds = train_val_split["train"]

    train_split = train_val_ds.train_test_split(
        test_size=args.validation_size,
        seed=args.seed_data_split,
        stratify_by_column="label"
    )
    train_ds = train_split["train"]
    val_ds = train_split["test"]

    print(f"Dataset splits created: train={len(train_ds)}, validation={len(val_ds)}, test={len(test_ds)}")

    tokenizer = AutoTokenizer.from_pretrained(args.model_name, use_fast=True)
    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name,
        num_labels=num_labels,
        id2label=id2label,
        label2id=label2id,
    )

    # For BERT, we usually target query/key/value/projection layers for LoRA
    target_modules = ["query", "key", "value", "dense"]

    peft_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        r=args.lora_r,
        lora_alpha=args.lora_alpha,
        lora_dropout=args.lora_dropout,
        target_modules=target_modules,
    )

    model = get_peft_model(model, peft_config)
    print("Wrapped model with LoRA. Trainable parameters:")
    model.print_trainable_parameters()

    tokenized_train = train_ds.map(lambda examples: preprocess_function(examples, tokenizer, args), batched=True)
    tokenized_val = val_ds.map(lambda examples: preprocess_function(examples, tokenizer, args), batched=True)
    tokenized_test = test_ds.map(lambda examples: preprocess_function(examples, tokenizer, args), batched=True)

    columns_to_remove = ['question_id', 'turn', 'turn_1_query', 'turn_1_answer', 'turn_2_query', 'winner', 'label']
    tokenized_train = tokenized_train.remove_columns([col for col in columns_to_remove if col in tokenized_train.column_names])
    tokenized_val = tokenized_val.remove_columns([col for col in columns_to_remove if col in tokenized_val.column_names])
    tokenized_test = tokenized_test.remove_columns([col for col in columns_to_remove if col in tokenized_test.column_names])

    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    training_args = TrainingArguments(
        output_dir=args.output_dir,
        eval_strategy="steps",
        per_device_train_batch_size=args.batch_size,
        per_device_eval_batch_size=args.batch_size,
        num_train_epochs=args.epochs,
        learning_rate=args.lr,
        save_total_limit=args.save_total_limit,
        fp16=torch.cuda.is_available(),
        logging_steps=args.logging_steps,
        eval_steps=args.eval_steps,
        save_strategy="steps",
        save_steps=args.eval_steps,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        greater_is_better=True,
        seed=args.seed,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_val,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    print("--- Starting Training ---")
    trainer.train()
    print("--- Training Finished ---")

    print("\n--- Evaluating on the held-out Test Set ---")
    test_results = trainer.evaluate(eval_dataset=tokenized_test)
    print("Test Set Metrics:")
    print(test_results)

    print("\nSaving final PEFT adapter to:", args.output_dir)
    model.save_pretrained(args.output_dir)
    tokenizer.save_pretrained(args.output_dir)
    print("Done.")

# Run the main function
main()


#### Model Saving to google drive functionality

In [34]:
from google.colab import drive
import os
import shutil

# Mount Google Drive
drive.mount('/content/drive')

# Define the path in Google Drive where you want to save the model
# You can change 'my_finetuned_model' to a different folder name if you prefer
GOOGLE_DRIVE_SAVE_PATH = '/content/drive/MyDrive/ANLP Assignment 1'

# Create the directory in Google Drive if it doesn't exist
os.makedirs(GOOGLE_DRIVE_SAVE_PATH, exist_ok=True)

print(f"Google Drive mounted at /content/drive")
print(f"Model will be saved to {GOOGLE_DRIVE_SAVE_PATH}")

Mounted at /content/drive
Google Drive mounted at /content/drive
Model will be saved to /content/drive/MyDrive/ANLP Assignment 1


In [35]:
# Define the directory where the model adapter is saved locally after training
LOCAL_MODEL_DIR = "./model_adapter"

# Define the destination path in Google Drive
GOOGLE_DRIVE_DEST_DIR = GOOGLE_DRIVE_SAVE_PATH

# Copy the entire model adapter directory to Google Drive
if os.path.exists(LOCAL_MODEL_DIR):
    # Remove the destination directory in Google Drive if it already exists to avoid errors during copy
    if os.path.exists(GOOGLE_DRIVE_DEST_DIR):
        print(f"Removing existing directory in Google Drive: {GOOGLE_DRIVE_DEST_DIR}")
        shutil.rmtree(GOOGLE_DRIVE_DEST_DIR)

    print(f"Copying model adapter from {LOCAL_MODEL_DIR} to {GOOGLE_DRIVE_DEST_DIR}")
    shutil.copytree(LOCAL_MODEL_DIR, GOOGLE_DRIVE_DEST_DIR)
    print("Model adapter successfully saved to Google Drive.")
else:
    print(f"Local model directory not found: {LOCAL_MODEL_DIR}. Please run the training code first.")

Removing existing directory in Google Drive: /content/drive/MyDrive/ANLP Assignment 1
Copying model adapter from ./model_adapter to /content/drive/MyDrive/ANLP Assignment 1
Model adapter successfully saved to Google Drive.


### Model Inference (loads the fine-tuned model previously saved in google drive)

In [36]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from peft import PeftModel, PeftConfig
import json
import os
from google.colab import drive

# Mount Google Drive if it's not already mounted
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# Define the path in Google Drive where the model is saved
GOOGLE_DRIVE_SAVED_MODEL_PATH = '/content/drive/MyDrive/ANLP Assignment 1' # Make sure this matches the save path

def predict(query: str, model_dir: str):
    """
    Loads a PEFT model and tokenizer from a directory and performs inference.

    Args:
        query (str): The input text (user query) to classify.
        model_dir (str): The directory containing the PEFT adapter and artifacts (in Google Drive).

    Returns:
        str: The predicted class label (the best model name).
    """
    # --- 1. Load All Artifacts from the Directory ---

    # Load the PEFT config to get the base model name
    config = PeftConfig.from_pretrained(model_dir)
    base_model_name = config.base_model_name_or_path

    # Load the label mappings
    mappings_path = os.path.join(model_dir, "label_mappings.json")
    with open(mappings_path, "r") as f:
        label_mappings = json.load(f)
        # The keys in the JSON file are strings, convert them back to integers
        id2label = {int(k): v for k, v in label_mappings["id2label"].items()}
        label2id = label_mappings["label2id"]

    num_labels = len(id2label)

    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_dir)

    # --- 2. Build the Model ---

    # Load the base model with the correct classification head
    base_model = AutoModelForSequenceClassification.from_pretrained(
        base_model_name,
        num_labels=num_labels,
        id2label=id2label,
        label2id=label2id,
    )

    # Apply the LoRA adapter
    model = PeftModel.from_pretrained(base_model, model_dir)
    model.eval()

    # --- 3. Perform Inference ---
    inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits

    predicted_id = torch.argmax(logits, dim=-1).item()
    predicted_label = model.config.id2label[predicted_id]

    return predicted_label




In [37]:
if __name__ == '__main__':
    # Use the Google Drive path for inference
    ADAPTER_DIRECTORY = GOOGLE_DRIVE_SAVED_MODEL_PATH

    # Example query for inference
    test_query = "Query: Describe a vivid and unique character, using strong imagery and creative language. Please answer in fewer than two paragraphs.\
    Zephyr was a freckled-faced, wild-haired wanderer with eyes like the ocean, forever shifting from blue to green to grey. His voice was honeyed, smooth as silk and deep as a canyon, with a laugh that could shake the very foundations of the earth. His skin was tanned and weathered from a life spent under the open sky, and his hands were calloused and scarred from countless adventures.\
    He wore a patchwork coat made of scraps of leather and fur, adorned with feathers and beads that jangled with every step. His boots were sturdy and well-worn, and his hat was a wide-brimmed affair that shaded his face from the sun. He carried a staff made of gnarled wood, adorned with charms and trinkets that tinkled in the breeze. Zephyr was a solitary soul, always on the move, but those who crossed his path were forever changed by his contagious laughter and boundless spirit.\
    Revise your previous response and incorporate an allusion to a famous work of literature or historical event in each sentence."

    # Get the prediction
    best_model = predict(test_query, ADAPTER_DIRECTORY)

    print(f"Input Query:\n'{test_query}'")
    print("---")
    print(f"Predicted Best Model: {best_model}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Input Query:
'Query: Describe a vivid and unique character, using strong imagery and creative language. Please answer in fewer than two paragraphs.    Zephyr was a freckled-faced, wild-haired wanderer with eyes like the ocean, forever shifting from blue to green to grey. His voice was honeyed, smooth as silk and deep as a canyon, with a laugh that could shake the very foundations of the earth. His skin was tanned and weathered from a life spent under the open sky, and his hands were calloused and scarred from countless adventures.    He wore a patchwork coat made of scraps of leather and fur, adorned with feathers and beads that jangled with every step. His boots were sturdy and well-worn, and his hat was a wide-brimmed affair that shaded his face from the sun. He carried a staff made of gnarled wood, adorned with charms and trinkets that tinkled in the breeze. Zephyr was a solitary soul, always on the move, but those who crossed his path were forever changed by his contagious laughter