In [None]:
# Install the latest development version of trl from GitHub
!pip install git+https://github.com/huggingface/trl.git

# Verify trl version
import trl
print(f"trl version: {trl.__version__}")

import torch
import json
import psutil
import random
import numpy as np
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import DPOTrainer, DPOConfig
from peft import LoraConfig, get_peft_model, TaskType

# Clear GPU cache at the start to free up memory from previous runs
torch.cuda.empty_cache()

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Memory monitoring function
def print_memory_footprint():
    if torch.cuda.is_available():
        gpu_memory = torch.cuda.memory_allocated() / (1024 ** 3)  # Convert to GB
        gpu_memory_cached = torch.cuda.memory_reserved() / (1024 ** 3)
        print(f"[GPU] Memory Allocated: {gpu_memory:.2f} GB, Cached: {gpu_memory_cached:.2f} GB")
    else:
        print("[GPU] No GPU detected.")
    memory = psutil.virtual_memory()
    used_memory_gb = memory.used / (1024 ** 3)
    total_memory_gb = memory.total / (1024 ** 3)
    print(f"[CPU] Memory Usage: {used_memory_gb:.2f} GB / {total_memory_gb:.2f} GB")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
tokenizer.pad_token = tokenizer.eos_token

# Best SFT model path
best_sft_model_path = "/kaggle/input/model-5/other/default/1/tinyllama-qa-exp-lowtemp"

# Set seed for reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

# Load Anthropic preference dataset
dataset = load_dataset("Anthropic/hh-rlhf")['train']

# Select a fixed subset of 1000 examples
subset_size = 1000
dataset = dataset.shuffle(seed=42).select(range(subset_size))

# Preprocess dataset for DPO
def preprocess_preference_data(example):
    chosen = example['chosen']
    rejected = example['rejected']
    prompt = chosen.rsplit("Assistant:", 1)[0] + "Assistant:"
    chosen_response = chosen.rsplit("Assistant:", 1)[1].strip()
    rejected_response = rejected.rsplit("Assistant:", 1)[1].strip()
    return {
        'prompt': prompt,
        'chosen': chosen_response,
        'rejected': rejected_response
    }

preference_dataset = dataset.map(preprocess_preference_data)

# Define evaluation prompts
eval_prompts = [
    "Question: What is the capital city of Japan? Answer:",
    "Question: Who wrote the novel 'Pride and Prejudice'? Answer:",
    "Question: What is the chemical symbol for gold? Answer:",
    "Question: In which year did the Titanic sink? Answer:",
    "Question: What is the largest mammal on Earth? Answer:",
    "Question: Who painted the Mona Lisa? Answer:",
    "Question: What is the main source of energy for Earth's climate system? Answer:",
    "Question: What is the longest river in the world? Answer:",
    "Question: Who discovered penicillin? Answer:",
    "Question: What is the primary language spoken in Brazil? Answer:"
]

# Function to generate responses with memory optimizations
def generate_responses(model_path, prompts):
    torch.cuda.empty_cache()
    print("Before loading model:")
    print_memory_footprint()
    
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_path,
            torch_dtype=torch.float16,
            device_map="auto",
            low_cpu_mem_usage=True
        )
        print("After loading model:")
        print_memory_footprint()
        
        model.eval()
        responses = []
        for prompt in prompts:
            input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
            with torch.no_grad():
                with torch.autocast(device_type='cuda', dtype=torch.float16):
                    output = model.generate(
                        input_ids,
                        max_new_tokens=50,
                        pad_token_id=tokenizer.eos_token_id,
                        temperature=0.7,
                        do_sample=True,
                        top_p=0.9,
                        repetition_penalty=1.1
                    )
            response = tokenizer.decode(output[0], skip_special_tokens=True).split("Answer:")[-1].strip()
            responses.append(response)
        
        return responses
    finally:
        if 'model' in locals():
            del model
        torch.cuda.empty_cache()
        print("After deleting model:")
        print_memory_footprint()

# Generate responses for best SFT model
print("Generating SFT responses...")
sft_responses = generate_responses(best_sft_model_path, eval_prompts)
with open("sft_responses.json", "w") as f:
    json.dump({"prompts": eval_prompts, "responses": sft_responses}, f)

# Define DPO configurations with beta
dpo_configurations = [

    {
        "name": "DPO_Experimental2",
        "lora": {
            "r": 8,
            "target_modules": ["q_proj", "v_proj"],
            "lora_alpha": 16,
            "lora_dropout": 0.05,
            "bias": "none"
        },
        "dpo": {
            "learning_rate": 5e-5,
            "batch_size": 1,
            "epochs": 2,
            "gradient_accumulation_steps": 2,
            "beta": 0.8
        },
        "output_dir": "./dpo_trial5"
    }
]

# Train and evaluate DPO models
for config in dpo_configurations:
    print(f"\nStarting DPO Trial: {config['name']}")
    print("Configuration:")
    print(json.dumps(config, indent=2))
    print_memory_footprint()
    
    try:
        # Load model with memory optimizations
        print("Loading model...")
        model = AutoModelForCausalLM.from_pretrained(
            best_sft_model_path,
            torch_dtype=torch.float16,
            device_map="auto",
            low_cpu_mem_usage=True
        )
        
        print("Applying LoRA...")
        lora_config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,
            r=config["lora"]["r"],
            lora_alpha=config["lora"]["lora_alpha"],
            lora_dropout=config["lora"]["lora_dropout"],
            target_modules=config["lora"]["target_modules"],
            bias=config["lora"]["bias"]
        )
        model = get_peft_model(model, lora_config)
        model.print_trainable_parameters()
        
        # Load reference model
        print("Loading reference model...")
        ref_model = AutoModelForCausalLM.from_pretrained(
            best_sft_model_path,
            torch_dtype=torch.float16,
            device_map="auto",
            low_cpu_mem_usage=True
        )
        
        # Create DPO config
        print("Creating DPO trainer...")
        dpo_config = DPOConfig(
            output_dir=config["output_dir"],
            per_device_train_batch_size=config["dpo"]["batch_size"],
            num_train_epochs=config["dpo"]["epochs"],
            learning_rate=config["dpo"]["learning_rate"],
            gradient_accumulation_steps=config["dpo"]["gradient_accumulation_steps"],
            beta=config["dpo"]["beta"],
            logging_dir=f"./logs/{config['name']}",
            logging_steps=10,
            save_strategy="epoch",
            report_to="none",
            fp16=True,
            gradient_checkpointing=True,
            gradient_checkpointing_kwargs={"use_reentrant": False},
            optim="adafactor",
            max_grad_norm=0.3,
            remove_unused_columns=False
        )
        
        dpo_trainer = DPOTrainer(
            model=model,
            ref_model=ref_model,
            args=dpo_config,
            train_dataset=preference_dataset
        )
        
        print("Starting training...")
        dpo_trainer.train()
        
        print("Saving model...")
        dpo_trainer.save_model(config["output_dir"])
        
        print("Generating responses...")
        responses = generate_responses(config["output_dir"], eval_prompts)
        with open(f"{config['output_dir']}/responses.json", "w") as f:
            json.dump({"prompts": eval_prompts, "responses": responses}, f)
            
        print(f"Completed {config['name']} successfully!")
            
    except Exception as e:
        print(f"Error during DPO training {config['name']}: {str(e)}")
        import traceback
        traceback.print_exc()
    finally:
        # Clean up
        if 'model' in locals():
            del model
        if 'ref_model' in locals():
            del ref_model
        if 'dpo_trainer' in locals():
            del dpo_trainer
        torch.cuda.empty_cache()
        print("Memory after cleanup:")
        print_memory_footprint()

print("\n✅ DPO training complete. Evaluate the outputs in 'sft_responses.json' and each 'dpo_trialX/responses.json'.")

In [None]:
import json
import os

# Define paths to the response JSON files
sft_response_path = "sft_responses.json"
dpo_response_paths = {
    "DPO_Experimental1": "./dpo_trial4/responses.json",
    "DPO_Experimental2": "./dpo_trial5/responses.json"
}

# Function to load and print responses from a JSON file
def print_responses(file_path, model_name):
    print(f"\n=== Responses for {model_name} ===")
    try:
        if not os.path.exists(file_path):
            print(f"Error: File {file_path} not found.")
            return
        
        with open(file_path, "r") as f:
            data = json.load(f)
        
        prompts = data.get("prompts", [])
        responses = data.get("responses", [])
        
        if not prompts or not responses or len(prompts) != len(responses):
            print(f"Error: Invalid or mismatched data in {file_path}.")
            return
        
        for i, (prompt, response) in enumerate(zip(prompts, responses), 1):
            print(f"\nPrompt {i}: {prompt}")
            print(f"Response: {response}")
    
    except Exception as e:
        print(f"Error reading {file_path}: {str(e)}")

# Print SFT responses
print_responses(sft_response_path, "Best SFT Model")

# Print DPO responses for both configurations
for model_name, file_path in dpo_response_paths.items():
    print_responses(file_path, model_name)

print("\n✅ Response printing complete.")