## Direct Preference Optimization (DPO) Using Hugging Face

In [2]:
!pip install torch==2.3.1
!pip install --user trl==0.11.4 # for optimization training
!pip install peft==0.14.0 # for creating LoRA architecture
!pip install matplotlib==3.9.0
!pip install pandas
!pip install numpy==1.26.0
!pip install --user datasets==3.2.0
!pip install transformers==4.45.2

In [1]:
import multiprocessing
import os
import requests
import tarfile
import pandas as pd
import matplotlib.pyplot as plt

import torch
from datasets import load_dataset

from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer,TrainingArguments, GPT2Tokenizer, set_seed, GenerationConfig
from trl import DPOConfig, DPOTrainer

### Create and Configure the Model and Tokenizer

In [2]:
model = AutoModelForCausalLM.from_pretrained("gpt2")

# As we are using LoRA, this will not be used
model_ref = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

# Set the padding side to "right" to fix the overflow issue with FP16 training
tokenizer.padding_side = "right"

# Disable the use of the cache during the model's forward pass
model.config.use_cache = False

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [3]:
model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

### Quantized Model Configuration (not used in this notebook due to resource limitation)

In [5]:
'''## Quantized model --only available on GPU
from transformers import BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(load_in_4bit=True,
                                         bnb_4bit_use_double_quant=True,
                                         bnb_4bit_quant_type="nf4",
                                         bnb_4bit_compute_dtype=torch.bfloat16)

# Load GPT-2 model with the specified quantization configuration
model = AutoModelForCausalLM.from_pretrained("gpt2", quantization_config=quantization_config)
model_ref = AutoModelForCausalLM.from_pretrained("gpt2", quantization_config=quantization_config)
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
model.config.use_cache = False'''

### Preprocess Data Set

In [6]:
ds = load_dataset("BarraHome/ultrafeedback_binarized")

README.md: 0.00B [00:00, ?B/s]

train_prefs-00000-of-00001.parquet:   0%|          | 0.00/226M [00:00<?, ?B/s]

test_prefs-00000-of-00001.parquet:   0%|          | 0.00/7.29M [00:00<?, ?B/s]

test_sft-00000-of-00001.parquet:   0%|          | 0.00/3.72M [00:00<?, ?B/s]

train_gen-00000-of-00001.parquet:   0%|          | 0.00/184M [00:00<?, ?B/s]

test_gen-00000-of-00001.parquet:   0%|          | 0.00/3.02M [00:00<?, ?B/s]

Generating train_prefs split:   0%|          | 0/61135 [00:00<?, ? examples/s]

Generating train_sft split:   0%|          | 0/61135 [00:00<?, ? examples/s]

Generating test_prefs split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Generating test_sft split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Generating train_gen split:   0%|          | 0/61135 [00:00<?, ? examples/s]

Generating test_gen split:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [7]:
ds.keys(), ds["train_prefs"][0].keys()

(dict_keys(['train_prefs', 'train_sft', 'test_prefs', 'test_sft', 'train_gen', 'test_gen']),
 dict_keys(['prompt', 'prompt_id', 'chosen', 'rejected', 'messages', 'score_chosen', 'score_rejected']))

In [8]:
ds["train_prefs"][0]

{'prompt': 'how can i develop a habit of drawing daily',
 'prompt_id': '086b3e24f29b8956a01059f79c56db35d118a06fb6b844b095737d042795cd43',
 'chosen': [{'content': 'how can i develop a habit of drawing daily',
   'role': 'user'},
  {'content': "Developing a daily habit of drawing can be challenging but with consistent practice and a few tips, it can become an enjoyable and rewarding part of your daily routine. Here are some strategies to help you develop the habit of drawing daily:\n\n1. Set a specific time: Allocate a specific time of the day to draw. It could be in the morning, afternoon, or evening. Make drawing a part of your daily routine.\n2. Set a specific duration: Determine the amount of time you want to spend on drawing each day. It can be as little as 10 minutes or as long as an hour. Be consistent with the duration to help build the habit.\n3. Start small and simple: Don't try to create a masterpiece every day, start with simple and easy-to-do sketches. Focus on improving yo

In [9]:
# Reducing the volume of data (due to resource limitations) by selecting 50 samples
for key in ds:
    cnt=50
    ds[key] = ds[key].select(range(cnt))

def process(row):
    del row["prompt_id"]
    del row["messages"]
    del row["score_chosen"]
    del row["score_rejected"]
    row["chosen"] = row["chosen"][-1]["content"]
    row["rejected"] = row["rejected"][-1]["content"]
    return row

ds = ds.map(process, num_proc=multiprocessing.cpu_count(), load_from_cache_file=False,)
train_dataset = ds['train_prefs']
eval_dataset = ds['test_prefs']

Map (num_proc=2):   0%|          | 0/50 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/50 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/50 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/50 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/50 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/50 [00:00<?, ? examples/s]

In [10]:
train_dataset[0]

{'prompt': 'how can i develop a habit of drawing daily',
 'chosen': "Developing a daily habit of drawing can be challenging but with consistent practice and a few tips, it can become an enjoyable and rewarding part of your daily routine. Here are some strategies to help you develop the habit of drawing daily:\n\n1. Set a specific time: Allocate a specific time of the day to draw. It could be in the morning, afternoon, or evening. Make drawing a part of your daily routine.\n2. Set a specific duration: Determine the amount of time you want to spend on drawing each day. It can be as little as 10 minutes or as long as an hour. Be consistent with the duration to help build the habit.\n3. Start small and simple: Don't try to create a masterpiece every day, start with simple and easy-to-do sketches. Focus on improving your skills gradually.\n4. Use a variety of tools and mediums: Experiment with different tools like pencils, pens, markers, and different mediums like paper, canvas, or digital 

### LoRA Configuration

In [11]:
peft_config = LoraConfig(r=4, target_modules=['c_proj','c_attn'],
                         task_type="CAUSAL_LM", lora_alpha=8,
                         lora_dropout=0.1, bias="none")

### DPO Configuration

In [12]:
from peft import get_peft_model
training_args = DPOConfig(beta=0.1,  # temperature parameter for the DPO loss(0.1-0.5)
                          output_dir="dpo", num_train_epochs=5, per_device_train_batch_size=1,
                          per_device_eval_batch_size=1, remove_unused_columns=False,
                          logging_steps=10,  # number of steps between logging training progress
                          gradient_accumulation_steps=1, learning_rate=1e-4,
                          evaluation_strategy="epoch",  # after each step or epoch)
                          warmup_steps=2, fp16=False,  # 16-bit (float16) precision
                          save_steps=500,  # saving checkpoints
                          report_to='none')  # The reporting backend to use (set to 'none' to disable, also report to wandb or tensorboard)



### DPO Training

In [13]:
trainer = DPOTrainer(model=model, ref_model=None, args=training_args, train_dataset=train_dataset,
                     eval_dataset=eval_dataset, tokenizer=tokenizer, peft_config=peft_config,
                     #max_prompt_length=512,
                     max_length=512) # maximum sequence length


Deprecated positional argument(s) used in DPOTrainer, please use the DPOConfig to set these arguments instead.


Tokenizing train dataset:   0%|          | 0/50 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1154 > 1024). Running this sequence through the model will result in indexing errors


Tokenizing eval dataset:   0%|          | 0/50 [00:00<?, ? examples/s]

### Training Model

I didn't run this cell because it is time-consuming as I am running the notebook on CPU.

In [None]:
trainer.train()

In [None]:
!python --v

In [None]:
print(hasattr(model, "generate"))

In [None]:
dpo_model = AutoModelForCausalLM.from_pretrained('./dpo/checkpoint-250')

### Loading Trained Model (as an Alternative to train it on GPU)

In [14]:
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/YIDeT3qihEpWChdXN_RmTg/DPO-tar.gz'
filename = './DPO.tar'

response = requests.get(url)
with open(filename, 'wb') as f:
    f.write(response.content)

if tarfile.is_tarfile(filename):
    with tarfile.open(filename, 'r') as tar:
        tar.extractall()
        print("Files extracted:", tar.getnames())
else:
    print("The adownloaded file is not a tar file.")

Files extracted: ['DPO', 'DPO/adapter_config.json', 'DPO/tokenizer_config.json', 'DPO/merges.txt', 'DPO/adapter_model.safetensors', 'DPO/special_tokens_map.json', 'DPO/training_args.bin', 'DPO/README.md', 'DPO/vocab.json']


In [15]:
dpo_model = AutoModelForCausalLM.from_pretrained('./DPO')

### Generation

In [16]:
set_seed(42)

generation_config = GenerationConfig(do_sample=True, # to generate diverse text
                                     top_k=1, temperature=0.1, max_new_tokens=25, pad_token_id=tokenizer.eos_token_id)

PROMPT = "Is a higher octane gasoline better for your car?"
inputs = tokenizer(PROMPT, return_tensors='pt')
outputs = dpo_model.generate(**inputs, generation_config=generation_config)
print("DPO response:\t",tokenizer.decode(outputs[0], skip_special_tokens=True))

gpt2_model = AutoModelForCausalLM.from_pretrained('gpt2')
outputs = gpt2_model.generate(**inputs, generation_config=generation_config)
print("\nGPT2 response:\t",tokenizer.decode(outputs[0], skip_special_tokens=True))

DPO response:	 Is a higher octane gasoline better for your car?

The answer is yes. The higher octane gasoline is better for your car.

The higher octane gasoline

GPT2 response:	 Is a higher octane gasoline better for your car?

The answer is yes. The higher octane gasoline is more efficient and more fuel efficient.

The higher oct


* The model is trained on a small data for 5 epochs only.

In [26]:
dataset = load_dataset("argilla/ultrafeedback-binarized-preferences-cleaned")
dataset['train']

Dataset({
    features: ['source', 'prompt', 'chosen', 'chosen-rating', 'chosen-model', 'rejected', 'rejected-rating', 'rejected-model'],
    num_rows: 60917
})

In [27]:
cnt = 10000
dataset['train'] = dataset['train'].select(range(cnt))

def process(row):
    del row["source"]
    del row["chosen-rating"]
    del row["chosen-model"]
    del row["rejected-rating"]
    del row["rejected-model"]
    row["chosen"] = row["chosen"][-1]["content"]
    row["rejected"] = row["rejected"][-1]["content"]
    return row

dataset['train'] = dataset['train'].map(process, num_proc=multiprocessing.cpu_count(), load_from_cache_file=False)

Map (num_proc=2):   0%|          | 0/10000 [00:00<?, ? examples/s]

In [28]:
train_size = int(0.8 * len(dataset['train']))
eval_size = len(dataset['train']) - train_size

train_dataset = dataset['train'].select(range(train_size))
eval_dataset = dataset['train'].select(range(train_size, train_size + eval_size))

train_dataset, train_dataset[0]

(Dataset({
     features: ['prompt', 'chosen', 'rejected'],
     num_rows: 8000
 }),
 {'prompt': 'Can you write a C++ program that prompts the user to enter the name of a country and checks if it borders the Mediterranean Sea? Here\'s some starter code to help you out:\n#include <iostream>\n#include <string>\nusing namespace std;\nint main() {\n    string country;\n    // prompt user for input\n    cout << "Enter the name of a country: ";\n    cin >> country;\n    // check if country borders the Mediterranean Sea\n    // [C++ code]\n    return 0;\n}',
  'chosen': 'Here\'s a C++ program that prompts the user to enter the name of a country and checks if it borders the Mediterranean Sea:\n\n#include <iostream>\n#include <string>\n#include <set>\n#include <map>\n#include <algorithm>\n\nusing namespace std;\n\nint main() {\n    // store countries and their bordering seas in a map\n    map<string, set<string>> countries;\n    countries["Algeria"] = {"Mediterranean Sea", "North African Coast"

In [43]:
PROMPT = input()

What are the 5 most spoken languages in the world?


In [44]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
generation_config = GenerationConfig(do_sample=True, top_k=50, temperature=0.7, max_new_tokens=50, pad_token_id=tokenizer.eos_token_id)

In [45]:
def generate_dpo_response(prompt):
    inputs = tokenizer(prompt, return_tensors='pt')
    outputs = dpo_model.generate(**inputs, generation_config=generation_config)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def generate_gpt2_response(prompt):
    inputs = tokenizer(prompt, return_tensors='pt')
    outputs = gpt2_model.generate(**inputs, generation_config=generation_config)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

dpo_response = generate_dpo_response(PROMPT)
gpt2_response = generate_gpt2_response(PROMPT)

print("DPO response:\t", dpo_response)
print("\nGPT-2 response:\t", gpt2_response)

DPO response:	 What are the 5 most spoken languages in the world?

A: English, French, Italian, Spanish .

. English, French, Italian, Spanish . A lot of people like me, but I don't take it quite as seriously as I would like.

. People like me

GPT-2 response:	 What are the 5 most spoken languages in the world?

English:

"English is the language of peace, and it is the language that we live by. It is the language of war. It is the language of tyranny. It is the language of oppression. It is the language of
