In [1]:
model_id = "microsoft/Phi-3-mini-128k-instruct"
output_model_path = "model_artifacts/Phi-3-mini-dpo-sardukar"

train_split: str = "train[300:]"
eval_split: str = "train[:300]"

## Load datasets

In [2]:
from datasets import load_dataset
from more_itertools import first

dataset_train =  load_dataset("sardukar/physiology-mcqa-8k", split=train_split)
dataset_eval = load_dataset("sardukar/physiology-mcqa-8k", split=eval_split)

def preprocess_dpo_data(sample: dict) -> dict:
    new_sample = {
        "prompt": [first(filter(lambda msg: msg["role"] == "user", sample["prompt"]))],
        "chosen": [first(filter(lambda msg: msg["role"] == "assistant", sample["chosen"]))],
        "rejected": [first(filter(lambda msg: msg["role"] == "assistant", sample["rejected"]))],
    }
    return new_sample

dataset_train = dataset_train.map(preprocess_dpo_data)
dataset_eval = dataset_eval.map(preprocess_dpo_data)

# Dataset is already in prompt / chosen / rejected format
display(dataset_train)
display(dataset_eval)

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 8530
})

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 300
})

## Finetuning with `DPOTrainer`

In [3]:
import os
import torch
from dotenv import load_dotenv
from huggingface_hub import login
from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from trl import DPOConfig, DPOTrainer

load_dotenv("../.env")
hf_token = os.environ["HF_TOKEN"]
print(hf_token[:8] + "*" * (len(hf_token) - 13) + hf_token[-5:])

login(hf_token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


hf_AAlNN************************hCQBo


In [4]:
peft_config = LoraConfig(
    r=12,                        # ≤8 recommended for small models
    lora_alpha=24,              # 2*r recommended for DPO
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    # target_modules=['o_proj', 'qkv_proj'] #phi-3
    target_modules="all-linear"
)

In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
# tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    attn_implementation=("eager" if "gemma" in model_id else None),
)
model.config.use_cache = False

`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
training_args = DPOConfig(
    # training args
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    remove_unused_columns=True,
    optim="paged_adamw_32bit",
    learning_rate=1e-05,
    label_smoothing_factor=0.05,
    bf16=True,
    num_train_epochs=1,
    lr_scheduler_type="cosine",
    warmup_steps=2,
    logging_strategy="steps",
    logging_steps=100,
    eval_strategy="steps",
    eval_steps=100,
    save_strategy="epoch",
    output_dir=output_model_path,
    report_to="none",

    # DPO params
    beta=0.1,
    max_prompt_length=8192,
    max_length=8192,
)

In [7]:
trainer = DPOTrainer(
    model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_eval,
    processing_class=tokenizer,
    peft_config=peft_config,
)

trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
100,0.1202,0.065621,4.09842,-3.918415,0.96,8.016835,-352.69873,-151.802063,18.713146,12.051064
200,0.053,0.057469,4.879029,-4.249614,0.963333,9.128644,-344.89267,-155.114044,18.773344,12.114598
300,0.0428,0.057425,4.738125,-4.566767,0.966667,9.304892,-346.301697,-158.285568,18.706327,12.078616
400,0.0481,0.052925,5.03541,-5.106596,0.966667,10.142007,-343.328857,-163.683884,18.647087,11.899955
500,0.0371,0.042681,5.738383,-4.117891,0.98,9.856275,-336.299103,-153.796829,18.608992,11.46515
600,0.0337,0.04291,5.328509,-6.382841,0.97,11.711349,-340.397858,-176.44632,18.750893,11.751192
700,0.0315,0.045975,5.615154,-7.087044,0.97,12.702197,-337.531403,-183.488358,18.773146,11.824639
800,0.0326,0.05166,5.457411,-7.569355,0.98,13.026768,-339.108856,-188.311478,18.751772,11.792716
900,0.0303,0.047391,5.251222,-7.522584,0.976667,12.773805,-341.170715,-187.843735,18.722242,11.769544
1000,0.0266,0.046893,4.977066,-7.993609,0.98,12.970674,-343.912292,-192.554016,18.72238,11.55065


TrainOutput(global_step=2132, training_loss=0.03673357162869222, metrics={'train_runtime': 3929.4602, 'train_samples_per_second': 2.171, 'train_steps_per_second': 0.543, 'total_flos': 0.0, 'train_loss': 0.03673357162869222, 'epoch': 0.9997655334114889})

In [8]:
trainer.model.save_pretrained("model_artifacts/temp")
tokenizer.save_pretrained("model_artifacts/temp")

('model_artifacts/temp/tokenizer_config.json',
 'model_artifacts/temp/special_tokens_map.json',
 'model_artifacts/temp/tokenizer.json')

In [9]:
import gc

try: del trainer
except: print("trainer not found")

try: del model
except: print("model not found")

gc.collect()
torch.cuda.empty_cache()

In [10]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    return_dict=True,
    torch_dtype=torch.bfloat16,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [11]:
from peft import PeftModel

model = PeftModel.from_pretrained(base_model, "model_artifacts/temp")
model = model.merge_and_unload()

In [12]:
model.save_pretrained(output_model_path)
tokenizer.save_pretrained(output_model_path)

('model_artifacts/Phi-3-mini-dpo-sardukar/tokenizer_config.json',
 'model_artifacts/Phi-3-mini-dpo-sardukar/special_tokens_map.json',
 'model_artifacts/Phi-3-mini-dpo-sardukar/tokenizer.json')