# DPO Training (Unsloth Version)

Uses **Unsloth** for 2-3x faster DPO training with 50% less memory.

**Environment**: Google Colab T4 GPU (15GB VRAM)

## Workflow
1. Install Unsloth
2. Load DPO dataset
3. Load model with Unsloth + LoRA
4. Train with DPOTrainer
5. Save and convert to GGUF
6. Download model

## 1. Install Unsloth

In [None]:
%%capture
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [None]:
import torch
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

In [None]:
import json
import os
import gc
from datetime import datetime
from datasets import Dataset
from trl import DPOTrainer, DPOConfig

## 2. Configuration

In [None]:
CONFIG = {
    # Model (Unsloth optimized)
    "model_name": "unsloth/Meta-Llama-3.1-8B-Instruct",
    "max_seq_length": 2048,
    
    # LoRA
    "lora_r": 16,
    "lora_alpha": 32,
    "lora_dropout": 0.05,
    "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj",
                       "gate_proj", "up_proj", "down_proj"],
    
    # Training
    "num_epochs": 3,
    "batch_size": 2,
    "gradient_accumulation_steps": 8,
    "learning_rate": 5e-5,
    "beta": 0.1,
    "max_length": 1024,
    "max_prompt_length": 512,
    "warmup_ratio": 0.1,
    
    # Paths
    "train_file": "data/dpo/train.jsonl",
    "eval_file": "data/dpo/eval.jsonl",
    "output_dir": "outputs/dpo-training",
    "adapter_dir": "outputs/dpo-adapter",
    "gguf_dir": "models",
    "gguf_name": "psychologist-8b",
    "quantization_method": "q4_k_m"
}

os.makedirs(CONFIG["output_dir"], exist_ok=True)
os.makedirs(CONFIG["adapter_dir"], exist_ok=True)
os.makedirs(CONFIG["gguf_dir"], exist_ok=True)
os.makedirs("data/dpo", exist_ok=True)

print(f"Config loaded!")
print(f"Effective batch size: {CONFIG['batch_size'] * CONFIG['gradient_accumulation_steps']}")

## 3. Load Dataset

In [None]:
# Option A: Clone from GitHub
!git clone https://github.com/yuchangyuan1/6895_project_Agent.git temp_repo
!cp temp_repo/data/dpo/*.jsonl data/dpo/
!rm -rf temp_repo
print("Dataset loaded from GitHub!")

# Option B: Upload files directly
# from google.colab import files
# uploaded = files.upload()  # Upload train.jsonl and eval.jsonl
# !mv train.jsonl eval.jsonl data/dpo/

In [None]:
def load_dpo_dataset(filepath: str) -> Dataset:
    """Load DPO dataset from JSONL file."""
    records = []
    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            records.append(json.loads(line.strip()))
    
    dataset = Dataset.from_dict({
        "prompt": [r["prompt"] for r in records],
        "chosen": [r["chosen"] for r in records],
        "rejected": [r["rejected"] for r in records]
    })
    print(f"Loaded {len(dataset)} records from {filepath}")
    return dataset

train_dataset = load_dpo_dataset(CONFIG["train_file"])
eval_dataset = load_dpo_dataset(CONFIG["eval_file"])

print(f"\nTrain: {len(train_dataset)}, Eval: {len(eval_dataset)}")
print(f"Sample prompt: {train_dataset[0]['prompt'][:100]}...")

## 4. Load Model with Unsloth

In [None]:
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=CONFIG["model_name"],
    max_seq_length=CONFIG["max_seq_length"],
    dtype=None,  # Auto-detect
    load_in_4bit=True,
)

print(f"Model loaded! GPU: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")

In [None]:
# Add LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r=CONFIG["lora_r"],
    target_modules=CONFIG["target_modules"],
    lora_alpha=CONFIG["lora_alpha"],
    lora_dropout=CONFIG["lora_dropout"],
    bias="none",
    use_gradient_checkpointing="unsloth",  # Optimized checkpointing
    random_state=42,
    use_rslora=False,
    loftq_config=None,
)

print(f"LoRA added! r={CONFIG['lora_r']}, alpha={CONFIG['lora_alpha']}")
print(f"GPU: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")

## 5. Initialize DPO Trainer

In [None]:
dpo_config = DPOConfig(
    output_dir=CONFIG["output_dir"],
    num_train_epochs=CONFIG["num_epochs"],
    per_device_train_batch_size=CONFIG["batch_size"],
    per_device_eval_batch_size=CONFIG["batch_size"],
    gradient_accumulation_steps=CONFIG["gradient_accumulation_steps"],
    learning_rate=CONFIG["learning_rate"],
    lr_scheduler_type="cosine",
    warmup_ratio=CONFIG["warmup_ratio"],
    beta=CONFIG["beta"],
    max_length=CONFIG["max_length"],
    max_prompt_length=CONFIG["max_prompt_length"],
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    optim="adamw_8bit",
    logging_steps=10,
    save_strategy="epoch",
    eval_strategy="epoch",
    save_total_limit=2,
    seed=42,
    report_to="none",
)

print(f"DPO Config: epochs={CONFIG['num_epochs']}, beta={CONFIG['beta']}")
print(f"Effective batch: {CONFIG['batch_size'] * CONFIG['gradient_accumulation_steps']}")

In [None]:
trainer = DPOTrainer(
    model=model,
    ref_model=None,  # Unsloth handles reference model efficiently
    args=dpo_config,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer,
)

print("DPO Trainer initialized!")
print(f"GPU: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")

## 6. Train

In [None]:
print(f"Starting DPO training at {datetime.now().strftime('%H:%M:%S')}")
print("="*50)

train_result = trainer.train()

print("="*50)
print(f"Training complete at {datetime.now().strftime('%H:%M:%S')}")
print(f"Total steps: {train_result.global_step}")
print(f"Final loss: {train_result.training_loss:.4f}")

In [None]:
# Evaluate
print("Running evaluation...")
eval_results = trainer.evaluate()

print("\nEvaluation Results:")
for key, value in eval_results.items():
    if isinstance(value, float):
        print(f"  {key}: {value:.4f}")
    else:
        print(f"  {key}: {value}")

## 7. Save LoRA Adapter

In [None]:
# Save LoRA adapter
model.save_pretrained(CONFIG["adapter_dir"])
tokenizer.save_pretrained(CONFIG["adapter_dir"])
print(f"LoRA adapter saved to {CONFIG['adapter_dir']}")
!ls -la {CONFIG["adapter_dir"]}

## 8. Convert to GGUF

In [None]:
# Unsloth has built-in GGUF export!
# This merges LoRA weights and converts to GGUF in one step

print(f"Converting to GGUF ({CONFIG['quantization_method']})...")

model.save_pretrained_gguf(
    CONFIG["gguf_dir"],
    tokenizer,
    quantization_method=CONFIG["quantization_method"],
)

print("\nGGUF conversion complete!")
!ls -lh {CONFIG["gguf_dir"]}/*.gguf

## 9. Verify Model

In [None]:
# Test inference with the trained model
FastLanguageModel.for_inference(model)

test_prompt = "I've been feeling anxious lately and can't sleep well. What should I do?"
messages = [{"role": "user", "content": test_prompt}]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

with torch.no_grad():
    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=256,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        use_cache=True,
    )

response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
print(f"Q: {test_prompt}\n")
print(f"A: {response}")

## 10. Download

In [None]:
# Find the GGUF file
import glob
gguf_files = glob.glob(f"{CONFIG['gguf_dir']}/*.gguf")
print(f"GGUF files: {gguf_files}")

# Download
from google.colab import files
for f in gguf_files:
    print(f"Downloading {f}...")
    files.download(f)

In [None]:
# Alternative: Save to Google Drive
# from google.colab import drive
# drive.mount('/content/drive')
# !cp {CONFIG["gguf_dir"]}/*.gguf /content/drive/MyDrive/
# print("Saved to Google Drive!")

## 11. Summary

In [None]:
print("="*60)
print("DPO TRAINING COMPLETE (Unsloth)")
print("="*60)
print(f"\nModel: {CONFIG['model_name']}")
print(f"Epochs: {CONFIG['num_epochs']}")
print(f"DPO Beta: {CONFIG['beta']}")
print(f"LoRA: r={CONFIG['lora_r']}, alpha={CONFIG['lora_alpha']}")
print(f"\nOutputs:")
print(f"  Adapter: {CONFIG['adapter_dir']}")
print(f"  GGUF: {CONFIG['gguf_dir']}/")
print("="*60)
print("\nNext Steps:")
print("1. Download the GGUF model")
print("2. Set LOCAL_MODEL_PATH=models/xxx.gguf")
print("3. Set LLM_TYPE=LOCAL")
print("4. Run inference!")
print("="*60)

## 12. Cleanup

In [None]:
del model, tokenizer, trainer
torch.cuda.empty_cache()
gc.collect()
print("Cleanup complete!")