## **Prep**

In [None]:
!pip install transformers datasets trl accelerate bitsandbytes huggingface_hub

In [None]:
!pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git

In [None]:
!pip install accelerate bitsandbytes langdetect

In [None]:
!huggingface-cli login

## **Datasets**

In [None]:
from datasets import load_dataset

dataset2 = load_dataset('llmf25/shuffled_dpo')
print(dataset2)

## **Models**

### **DPO**

In [None]:
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import DPOTrainer, DPOConfig

# Single: "Qwen/Qwen2.5-0.5B"
# Hybrid: "llmf25/qwen2_5_0_5b_llmf25_sft_mini"
model_name = "llmf25/qwen2_5_0_5b_llmf25_sft_mini"
output_dir2 = "./hybrid_dpo"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, dtype=torch.bfloat16, trust_remote_code=True)

In [None]:
def format_dpo_examples(sample):
    user_question = f"{sample['prompt']}"

    chosen_messages = [
        {"role": "user", "content": user_question},
        {"role": "assistant", "content": sample['chosen']}
    ]
    chosen_formatted = tokenizer.apply_chat_template(chosen_messages, tokenize=False)

    rejected_messages = [
        {"role": "user", "content": user_question},
        {"role": "assistant", "content": sample['rejected']}
    ]
    rejected_formatted = tokenizer.apply_chat_template(rejected_messages, tokenize=False)

    return {
        'chosen': chosen_formatted,
        'rejected': rejected_formatted
    }

train_dataset2 = dataset2['train'].map(
    format_dpo_examples
)

print("Formatted train_dataset:")
print(train_dataset2)

In [None]:
# DPO config
dpo_config = DPOConfig(
    output_dir=output_dir2,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=16,
    learning_rate=1e-5,
    num_train_epochs=1,
    max_length=1024,
    logging_steps=100,
    save_strategy="epoch",
    save_steps=200,
    eval_steps=200,
    remove_unused_columns=False,
    lr_scheduler_type="cosine",
    weight_decay=0.03,
    warmup_ratio=0.1,
    report_to="none",
    beta=0.1,
    bf16=True if torch.cuda.is_available() else False,
)

trainer = DPOTrainer(
    model=model,
    args=dpo_config,
    train_dataset=train_dataset2,
    processing_class=tokenizer
)

trainer.train()

trainer.save_model(output_dir2)
tokenizer.save_pretrained(output_dir2)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(output_dir2, dtype=torch.bfloat16, trust_remote_code=True)
print(f"Model loaded from {output_dir2}")

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
print(f"Tokenizer loaded from {model_name}")

model.push_to_hub("llmf25/hybrid_sft_dpo")
print("Model pushed to Hugging Face Hub: llmf25/hybrid_sft_dpo")

tokenizer.push_to_hub("llmf25/hybrid_sft_dpo")
print("Tokenizer pushed to Hugging Face Hub: llmf25/hybrid_sft_dpo")

### **ORPO**

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import Dataset
from trl import ORPOTrainer, ORPOConfig

print("Libraries imported successfully.")

In [None]:
# Single: "Qwen/Qwen2.5-0.5B"
# Hybrid: "llmf25/qwen2_5_0_5b_llmf25_sft_mini"

model_name = "llmf25/qwen2_5_0_5b_llmf25_sft_mini"
output_dir_orpo = "./hybrid_orpo"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, dtype=torch.bfloat16, trust_remote_code=True)

print(f"Model and tokenizer loaded for {model_name}.")

In [None]:
def format_orpo_examples(sample):
    user_question = f"{sample['prompt']}"

    chosen_messages = [
        {"role": "user", "content": user_question},
        {"role": "assistant", "content": sample['chosen']}
    ]
    chosen_formatted = tokenizer.apply_chat_template(chosen_messages, tokenize=False)

    rejected_messages = [
        {"role": "user", "content": user_question},
        {"role": "assistant", "content": sample['rejected']}
    ]
    rejected_formatted = tokenizer.apply_chat_template(rejected_messages, tokenize=False)

    return {
        'chosen': chosen_formatted,
        'rejected': rejected_formatted
    }

train_dataset_orpo = dataset2['train'].map(
    format_orpo_examples
)

print("Formatted train_dataset_orpo:")
print(train_dataset_orpo)

In [None]:
orpo_config = ORPOConfig(
    output_dir=output_dir_orpo,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=16,
    learning_rate=1e-5,
    num_train_epochs=1,
    max_length=1024,
    logging_steps=100,
    save_strategy="epoch",
    save_steps=200,
    eval_steps=200,
    remove_unused_columns=False,
    lr_scheduler_type="cosine",
    weight_decay=0.03,
    warmup_ratio=0.1,
    report_to="none",
    beta=0.1,
    bf16=True if torch.cuda.is_available() else False,
)

print("ORPOConfig initialized successfully.")

In [None]:
trainer = ORPOTrainer(
    model=model,
    args=orpo_config,
    train_dataset=train_dataset_orpo,
    processing_class=tokenizer
)
trainer.train()

trainer.save_model(orpo_config.output_dir)
tokenizer.save_pretrained(orpo_config.output_dir)

print(f"ORPO model trained and saved to {orpo_config.output_dir}.")

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(output_dir_orpo, dtype=torch.bfloat16, trust_remote_code=True)
print(f"Model loaded from {output_dir_orpo}")

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
print(f"Tokenizer loaded from {model_name}")

model.push_to_hub("llmf25/hybrid_sft_orpo")
print("Model pushed to Hugging Face Hub: llmf25/hybrid_sft_orpo")

tokenizer.push_to_hub("llmf25/hybrid_sft_orpo")
print("Tokenizer pushed to Hugging Face Hub: llmf25/hybrid_sft_orpo")

## **Evaluation**

In [None]:
!lm_eval \
  --model hf \
  --model_args pretrained=llmf25/hybrid_sft_orpo,dtype=bfloat16,trust_remote_code=True \
  --tasks medqa_4options \
  --device cuda:0 \
  --num_fewshot 0 \
  --batch_size auto \
  --output_path dpo_results/medqa.jsonl\
  --log_samples

In [None]:
!lm_eval \
  --model hf \
  --model_args pretrained=llmf25/hybrid_sft_orpo,dtype=bfloat16,trust_remote_code=True \
  --tasks headqa_en \
  --device cuda:0 \
  --num_fewshot 0 \
  --batch_size auto \
  --output_path orpo_results/headqa.jsonl\
  --log_samples

In [None]:
!lm_eval \
  --model hf \
  --model_args pretrained=llmf25/hybrid_sft_dpo,dtype=bfloat16,trust_remote_code=True \
  --tasks medqa_4options \
  --device cuda:0 \
  --num_fewshot 0 \
  --batch_size auto \
  --output_path dpo_results/medqa_dpo.jsonl\
  --log_samples

In [None]:
!lm_eval \
  --model hf \
  --model_args pretrained=llmf25/hybrid_sft_dpo,dtype=bfloat16,trust_remote_code=True \
  --tasks medmcqa \
  --device cuda:0 \
  --num_fewshot 0 \
  --batch_size auto \
  --output_path dpo_results/medmcqa.jsonl\
  --log_samples

In [None]:
!lm_eval \
  --model hf \
  --model_args pretrained=llmf25/hybrid_sft_dpo,dtype=bfloat16,trust_remote_code=True \
  --tasks headqa_en \
  --device cuda:0 \
  --num_fewshot 0 \
  --batch_size auto \
  --output_path dpo_results/headqa.jsonl\
  --log_samples

In [None]:
!lm_eval \
  --model hf \
  --model_args pretrained=Qwen/Qwen2.5-0.5B,dtype=bfloat16,trust_remote_code=True \
  --tasks headqa_en \
  --device cuda:0 \
  --num_fewshot 0 \
  --batch_size auto \
  --output_path results/headqa.jsonl\
  --log_samples

In [None]:
!lm_eval \
  --model hf \
  --model_args pretrained=Qwen/Qwen2.5-0.5B,dtype=bfloat16,trust_remote_code=True \
  --tasks medqa_4options \
  --device cuda:0 \
  --num_fewshot 0 \
  --batch_size auto \
  --output_path results/medqa.jsonl\
  --log_samples

In [None]:
!lm_eval \
  --model hf \
  --model_args pretrained=Qwen/Qwen2.5-0.5B,dtype=bfloat16,trust_remote_code=True \
  --tasks medmcqa \
  --device cuda:0 \
  --num_fewshot 0 \
  --batch_size auto \
  --output_path results/medmcqa.jsonl\
  --log_samples

In [None]:
!lm_eval \
  --model hf \
  --model_args pretrained=llmf25/qwen2_5_0_5b_llmf25_sft_mini,dtype=bfloat16,trust_remote_code=True \
  --tasks medmcqa \
  --device cuda:0 \
  --num_fewshot 0 \
  --batch_size auto \
  --output_path results/medmcqa.jsonl\
  --log_samples

In [None]:
!lm_eval \
  --model hf \
  --model_args pretrained=llmf25/qwen2_5_0_5b_llmf25_sft_mini,dtype=bfloat16,trust_remote_code=True \
  --tasks medqa_4options \
  --device cuda:0 \
  --num_fewshot 0 \
  --batch_size auto \
  --output_path results/medqa.jsonl\
  --log_samples

In [None]:
!lm_eval \
  --model hf \
  --model_args pretrained=llmf25/qwen2_5_0_5b_llmf25_sft_mini,dtype=bfloat16,trust_remote_code=True \
  --tasks headqa_en \
  --device cuda:0 \
  --num_fewshot 0 \
  --batch_size auto \
  --output_path results/headqa.jsonl\
  --log_samples