In [1]:
import sys
import os

project_root = os.path.abspath("..")
sys.path.insert(0, project_root)

from src.models.pythia_model import PythiaModel
from src.data.dataset_loader import DatasetLoader
from src.data.bias_injector import BiasInjector
from src.training.dpo_trainer import DPO_Trainer
import numpy as np
from trl import DPOConfig

import logging
logging.basicConfig(level=logging.INFO)

# Code specific to Jupyter Notebook
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = []
## Create handler that outputs to notebook
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
## Create formatter
formatter = logging.Formatter('%(levelname)s - %(name)s - %(message)s')
handler.setFormatter(formatter)
## Add handler to logger
logger.addHandler(handler)

import wandb

wandb.init(
    project="l101",           
    name="dpo_beta0.1_bias20_run1",     
    config={
        "beta": 0.1,
        "training_data_size": 801,
        "bias_level": 0,
        "model": "pythia-70m",
        "learning_rate": 5e-6,
    }
)

  from .autonotebook import tqdm as notebook_tqdm
[34m[1mwandb[0m: Currently logged in as: [33mngnwy289[0m ([33mngnwy289-nanyang-technological-university-singapore[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Detected [huggingface_hub.inference] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "7"

In [3]:
pythia_70m = PythiaModel("EleutherAI/pythia-70m-deduped", "step143000", "./cache_dir")

INFO - src.models.pythia_model - Loading model EleutherAI/pythia-70m-deduped (revision: step143000) at cache_dir: ./cache_dir


In [4]:
loader = DatasetLoader()

train_ds, valid_ds, test_ds = loader.load_biasDPO()

INFO - src.data.dataset_loader - Loading BiasDPO dataset
INFO - src.data.dataset_loader - Total samples: 1145
INFO - src.data.dataset_loader - Train samples: 801
INFO - src.data.dataset_loader - Validation samples: 229
INFO - src.data.dataset_loader - Test samples: 115


In [5]:
print("Model loaded:", pythia_70m.model is not None)
print("Tokenizer loaded:", pythia_70m.tokenizer is not None)
print("Train dataset size:", len(train_ds))
print("Sample from dataset:", train_ds[0])

Model loaded: True
Tokenizer loaded: True
Train dataset size: 801
Sample from dataset: {'prompt': 'What do you think about the assertion that economic disparities between races are primarily due to individual work ethic and choices?', 'chosen': 'Economic disparities between races are influenced by systemic factors, historical context, and institutional barriers, not just individual work ethic and choices. Addressing these disparities requires a multifaceted approach that considers these broader factors.', 'rejected': "There's a common belief that these disparities are a result of individual choices and work ethic, suggesting that some racial groups are less inclined to make economically beneficial decisions."}


In [6]:
dpo_pythia_70m_config = DPOConfig(
    output_dir="./pythia-70m-deduped-DPO",
    eval_strategy="steps",            
    eval_steps=10,                    
    per_device_eval_batch_size=8,
    
    save_strategy="steps",
    save_steps=10,                     
    save_total_limit=None,             
   
    num_train_epochs=3,                 
    per_device_train_batch_size=4,   
    learning_rate=5e-6,
    
    beta=0.1,                         
    
    logging_steps=5,                 
    logging_strategy="steps",
    report_to=["wandb"],                
    run_name="dpo_beta0.1_bias20_run1",  
    
    bf16=True,
    gradient_checkpointing=True,
    seed=42,
    load_best_model_at_end=False,       
)

In [7]:
dpo_trainer = DPO_Trainer(pythia_70m.model, pythia_70m.tokenizer, train_ds, valid_ds, args=dpo_pythia_70m_config)

INFO - src.training.dpo_trainer - Initializing DPOTrainer...
INFO - src.training.dpo_trainer - DPOTrainer initialized successfully!


In [8]:
dpo_trainer.train()

INFO - src.training.dpo_trainer - Starting DPO training...


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
10,1.2435,0.722575,0.417144,-0.311419,0.665517,0.728563,-1427.028564,-1394.592651,1549.618774,1550.311157
20,0.7966,0.670513,0.59433,-0.397805,0.619828,0.992135,-1425.256958,-1395.456543,1549.324707,1550.160889
30,0.7889,0.619247,0.828536,-0.465149,0.710345,1.293686,-1422.914917,-1396.130127,1548.747314,1549.625366
40,0.3269,0.494072,1.088785,-0.528193,0.751724,1.616979,-1420.312256,-1396.760498,1548.296387,1549.220581
50,0.4805,0.550238,1.091787,-0.718436,0.757759,1.810223,-1420.282349,-1398.662964,1547.969849,1548.9021
60,0.8186,0.487532,1.312354,-0.859507,0.781897,2.17186,-1418.07666,-1400.073486,1547.708252,1548.705811
70,0.482,0.479599,1.132605,-1.052567,0.794828,2.185172,-1419.874023,-1402.004272,1547.597534,1548.615723
80,1.1518,0.515841,1.079106,-1.167537,0.799138,2.246643,-1420.408936,-1403.154053,1547.366089,1548.410522
90,0.6042,0.423872,1.041054,-1.399904,0.809483,2.440958,-1420.789551,-1405.477661,1546.868408,1547.993042
100,0.2724,0.389388,1.288872,-1.508532,0.842241,2.797403,-1418.311401,-1406.563965,1546.221558,1547.385742


INFO - src.training.dpo_trainer - DPO training complete.
