In [1]:
model_id = "google/gemma-2-2b-it"
output_model_path = "model_artifacts/gemma-2-2b-dpo"

train_split: str = "train[30:]"
eval_split: str = "train[:30]"

## Load datasets

In [2]:
from datasets import load_dataset
from more_itertools import first

dataset_train =  load_dataset("sardukar/physiology-mcqa-8k", split=train_split)
dataset_eval = load_dataset("sardukar/physiology-mcqa-8k", split=eval_split)

def preprocess_dpo_data(sample: dict) -> dict:
    new_sample = {
        "prompt": [first(filter(lambda msg: msg["role"] == "user", sample["prompt"]))],
        "chosen": [first(filter(lambda msg: msg["role"] == "assistant", sample["chosen"]))],
        "rejected": [first(filter(lambda msg: msg["role"] == "assistant", sample["rejected"]))],
    }
    return new_sample

dataset_train = dataset_train.map(preprocess_dpo_data)
dataset_eval = dataset_eval.map(preprocess_dpo_data)

# Dataset is already in prompt / chosen / rejected format
display(dataset_train)
display(dataset_eval)

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 8800
})

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 30
})

## Finetuning with `DPOTrainer`

In [3]:
import os
import torch
from dotenv import load_dotenv
from huggingface_hub import login
from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from trl import DPOConfig, DPOTrainer

load_dotenv("../.env")
hf_token = os.environ["HF_TOKEN"]
print(hf_token[:8] + "*" * (len(hf_token) - 13) + hf_token[-5:])

login(hf_token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


hf_AAlNN************************hCQBo


In [4]:
peft_config = LoraConfig(
    r=16,                       # ≤8 recommended for small models
    lora_alpha=32,              # 2*r recommended for DPO
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    # target_modules=['o_proj', 'qkv_proj'] #phi-3
    target_modules="all-linear"
)


In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
# tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    attn_implementation='eager',  # for gemma
)
model.config.use_cache = False

`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
training_args = DPOConfig(
    # training args
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    remove_unused_columns=True,
    optim="paged_adamw_32bit",
    learning_rate=5.0e-06,
    bf16=True,
    num_train_epochs=1,
    lr_scheduler_type="cosine",
    warmup_steps=2,
    logging_strategy="steps",
    logging_steps=100,
    eval_strategy="steps",
    eval_steps=100,
    save_strategy="epoch",
    output_dir=output_model_path,
    report_to="none",

    # DPO params
    beta=0.1,
    max_prompt_length=2048,
    max_length=2048,
)

In [7]:
trainer = DPOTrainer(
    model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_eval,
    processing_class=tokenizer,
    peft_config=peft_config,
)

trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


OutOfMemoryError: CUDA out of memory. Tried to allocate 3.12 GiB. GPU 0 has a total capacity of 22.17 GiB of which 835.94 MiB is free. Process 2661322 has 5.75 GiB memory in use. Process 1157165 has 15.38 GiB memory in use. Of the allocated memory 10.57 GiB is allocated by PyTorch, and 4.58 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)