In [None]:
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from trl import DPOTrainer
import bitsandbytes as bnb
import torch

import random
import json

## Load Dataset

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
dataset = load_dataset("Venkat-Ram-Rao/msmarco_subset_for_dpo_llm_ranker_300_rows",split="train")

In [None]:
len(dataset)

In [None]:
dataset

In [None]:
# Print sample
print(dataset[1]["prompt"])

In [None]:
# Print sample
print(dataset[1]["chosen"])

In [None]:
# Print sample
print(dataset[1]["rejected"])

## Train

In [None]:
ds_train_test = dataset.train_test_split(test_size=0.05, seed=42)

In [None]:
ds_train_test["train"], ds_train_test["test"]

In [None]:
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

In [None]:
# Model to fine-tune
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    load_in_4bit=True
)
model.config.use_cache = False

In [None]:
mem = model.get_memory_footprint()
print("Memory footprint: {} ".format(mem))

In [None]:

# Reference model - If you're short on memory do not load this and set ref_model = None
ref_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    load_in_4bit=True
)

mem = ref_model.get_memory_footprint()
print("Memory footprint: {} ".format(mem))


In [None]:
out_dir = "./output"

#create the out_dir if not created
import os
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

In [None]:
# LoRA configuration
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)


# Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    gradient_checkpointing=True,
    learning_rate=8e-5,
    lr_scheduler_type="cosine",
    max_steps=30,
    save_strategy="no",
    logging_steps=1,
    output_dir=out_dir,
    optim="paged_adamw_32bit",
    warmup_steps=1,
    report_to="none",
    remove_unused_columns=False
)

In [None]:
import gc
gc.collect () 
torch.cuda.empty_cache ()

In [None]:
# default loss = sigmoid
# default data collator  (DPODataCollatorWithPadding) which requires prompt, chosen, rejected triplets

dpo_trainer = DPOTrainer(
    model,
    ref_model, # =None, #this is inefficient but saves memory
    args=training_args,
    train_dataset=ds_train_test["train"],
    tokenizer=tokenizer,
    peft_config=peft_config,
    loss_type = "ipo", #from IPO paper. Should reduce overfitting and account for bad data
    beta=0.1, #becomes tau with loss of ipo
    max_prompt_length= 1024, 
    max_length= 1280 
)



In [None]:
# Fine-tune model with DPO
dpo_trainer.train()

## Save model

In [None]:
new_model = "trained_reranker"
hub_model_name = "Venkat-Ram-Rao/trained_reranker_30_epochs_ipo_loss_v1"

In [None]:
dpo_trainer.model.save_pretrained("final_checkpoint")
tokenizer.save_pretrained("final_checkpoint")

In [None]:
del dpo_trainer, model, ref_model
gc.collect()
torch.cuda.empty_cache()

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    return_dict=True,
    torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
mem = base_model.get_memory_footprint()
print("Memory footprint: {} ".format(mem))

In [None]:
# Merge base model with the adapter
model = PeftModel.from_pretrained(base_model, "final_checkpoint")
model = model.merge_and_unload()

In [None]:
mem = model.get_memory_footprint()
print("Memory footprint: {} ".format(mem))

In [None]:
# Save model and tokenizer
model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

In [None]:
# Push them to HF
model.push_to_hub(hub_model_name, commit_message="fine tuned using DPO",
                  private=True)
tokenizer.push_to_hub(hub_model_name,
                  private=True)
     

## Interence

In [None]:
model.to("cuda")

In [None]:
input_text = ds_train_test["test"][1]["prompt"]
encoded = tokenizer(input_text, return_tensors="pt", add_special_tokens=False)
model_inputs = encoded.to("cuda")

In [None]:

generated_ids = model.generate(**model_inputs, max_new_tokens=400, do_sample=False, num_beams=1)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])

In [None]:
input_text = ds_train_test["test"][3]["prompt"]
encoded = tokenizer(input_text, return_tensors="pt", add_special_tokens=False)
model_inputs = encoded.to("cuda")

generated_ids = model.generate(**model_inputs, max_new_tokens=400, do_sample=False, num_beams=1)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])

In [None]:
input_text = ds_train_test["test"][10]["prompt"]
encoded = tokenizer(input_text, return_tensors="pt", add_special_tokens=False)
model_inputs = encoded.to("cuda")

generated_ids = model.generate(**model_inputs, max_new_tokens=400, do_sample=False, num_beams=1)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])

In [None]:
input_text = ds_train_test["test"][9]["prompt"]
encoded = tokenizer(input_text, return_tensors="pt", add_special_tokens=False)
model_inputs = encoded.to("cuda")

generated_ids = model.generate(**model_inputs, max_new_tokens=400, do_sample=False, num_beams=1)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])

## Run all test set

In [None]:
resp = []

for i in tqdm(range(0,len(ds_train_test["test"]))):
    input_text = ds_train_test["test"][i]["prompt"]
    encoded = tokenizer(input_text, return_tensors="pt", add_special_tokens=False)
    model_inputs = encoded.to("cuda")

    generated_ids = model.generate(**model_inputs, max_new_tokens=400, 	do_sample=False, num_beams=1)
    decoded = tokenizer.batch_decode(generated_ids)
    #print(decoded[0])
    resp.append(decoded[0])

In [None]:
import pandas as pd
# create a dataframe from the list
df = pd.DataFrame(resp)

df.head()

In [None]:
df.to_csv('outputs.csv', index=False)