In [1]:
import sys
import os

project_root = os.path.abspath("..")
sys.path.insert(0, project_root)

from src.models.pythia_model import PythiaModel
from src.data.dataset_loader import DatasetLoader
from src.data.bias_injector import BiasInjector
from src.training.dpo_trainer import DPO_Trainer
from src.training.utils import load_experiment_config
import numpy as np
from trl import DPOConfig

import logging
logging.basicConfig(level=logging.INFO)

# Code specific to Jupyter Notebook
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = []
## Create handler that outputs to notebook
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
## Create formatter
formatter = logging.Formatter('%(levelname)s - %(name)s - %(message)s')
handler.setFormatter(formatter)
## Add handler to logger
logger.addHandler(handler)

import wandb

wandb.init(
    project="l101",           
    name="dpo_beta0.1_bias20_run1",     
    config={
        "beta": 0.1,
        "training_data_size": 801,
        "bias_level": 0,
        "model": "pythia-31m",
        "learning_rate": 5e-6,
    }
)

  from .autonotebook import tqdm as notebook_tqdm
[34m[1mwandb[0m: Currently logged in as: [33mngnwy289[0m ([33mngnwy289-nanyang-technological-university-singapore[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Detected [huggingface_hub.inference] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "6"

# Bias Label (100% accurate)

In [3]:
pythia_31m = PythiaModel("EleutherAI/pythia-31m", "step143000", "./cache_dir")

INFO - src.models.pythia_model - Loading model EleutherAI/pythia-31m (revision: step143000) at cache_dir: ./cache_dir


In [4]:
loader = DatasetLoader()

train_ds, valid_ds, test_ds = loader.load_biasDPO()

INFO - src.data.dataset_loader - Loading BiasDPO dataset
INFO - src.data.dataset_loader - Total samples: 1145
INFO - src.data.dataset_loader - Train samples: 801
INFO - src.data.dataset_loader - Validation samples: 229
INFO - src.data.dataset_loader - Test samples: 115


In [5]:
print("Model loaded:", pythia_31m.model is not None)
print("Tokenizer loaded:", pythia_31m.tokenizer is not None)
print("Train dataset size:", len(train_ds))
print("Sample from dataset:", train_ds[0])

Model loaded: True
Tokenizer loaded: True
Train dataset size: 801
Sample from dataset: {'prompt': 'What do you think about the assertion that economic disparities between races are primarily due to individual work ethic and choices?', 'chosen': 'Economic disparities between races are influenced by systemic factors, historical context, and institutional barriers, not just individual work ethic and choices. Addressing these disparities requires a multifaceted approach that considers these broader factors.', 'rejected': "There's a common belief that these disparities are a result of individual choices and work ethic, suggesting that some racial groups are less inclined to make economically beneficial decisions."}


In [6]:
dpo_args= load_experiment_config("../configs/pythia-31m-rlhf-dpo.yaml")['dpo_pythia_31m_config']
dpo_pythia_31m_config = DPOConfig(**dpo_args)

In [7]:
train_ds = train_ds.map(lambda x: x, load_from_cache_file=False)
valid_ds = valid_ds.map(lambda x: x, load_from_cache_file=False)

dpo_trainer = DPO_Trainer(pythia_31m.model, pythia_31m.tokenizer, train_ds, valid_ds, args=dpo_pythia_31m_config)

Map: 100%|██████████| 801/801 [00:00<00:00, 14697.28 examples/s]
Map: 100%|██████████| 229/229 [00:00<00:00, 11357.94 examples/s]

INFO - src.training.dpo_trainer - Initializing DPOTrainer...





INFO - src.training.dpo_trainer - DPOTrainer initialized successfully!


In [8]:
dpo_trainer.train()

INFO - src.training.dpo_trainer - Starting DPO training...


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
10,1.1607,0.738058,0.093303,-0.63873,0.64569,0.732033,-214.18187,-180.521118,1975.681152,1976.273682
20,0.4161,0.650819,0.176148,-0.921288,0.688793,1.097436,-213.353424,-183.346725,1975.331543,1975.993164
30,0.8215,0.779783,-0.029702,-0.829207,0.65431,0.799505,-215.411942,-182.425934,1974.893677,1975.64563
40,0.7773,0.696035,0.121367,-1.060473,0.682759,1.181841,-213.90123,-184.738571,1974.491821,1975.286133
50,0.7425,0.609244,0.13446,-1.221766,0.727586,1.356226,-213.770309,-186.351501,1974.000732,1974.877319
60,0.5636,0.623075,0.342134,-1.330693,0.756034,1.672827,-211.693573,-187.440765,1973.387085,1974.338379
70,0.9436,0.557732,0.354772,-1.473727,0.75,1.8285,-211.567215,-188.871124,1972.837769,1973.855835
80,0.5142,0.644498,0.210361,-1.587126,0.691379,1.797487,-213.011292,-190.005096,1972.541382,1973.571655
90,0.5811,0.597246,0.431425,-1.487329,0.725862,1.918754,-210.800644,-189.007141,1972.237915,1973.233276
100,0.8153,0.542056,0.259421,-1.778805,0.74569,2.038225,-212.520706,-191.92189,1971.771362,1972.857666


INFO - src.training.dpo_trainer - DPO training complete.


In [9]:
print(f"Best checkpoint: {dpo_trainer.trainer.state.best_model_checkpoint}")
print(f"Best metric: {dpo_trainer.trainer.state.best_metric}")

Best checkpoint: ./pythia-31m-DPO/checkpoint-450
Best metric: 0.2034720927476883


In [10]:
import pandas as pd

state = dpo_trainer.trainer.state
logs = state.log_history

df = pd.DataFrame(logs)
df_every_10 = df[df['step'] % 10 == 0] if 'step' in df.columns else df.iloc[::10]
relevant_cols = [col for col in df_every_10.columns if not col.startswith('_')]

# print(df_every_10[relevant_cols].to_string(index=False))

df_every_10[relevant_cols].to_csv('dpo_31m_training_logs_100_0.csv', index=False)
print("\nLogs saved to dpo_training_logs_100_0.csv")


Logs saved to dpo_training_logs_100_0.csv


# Bias Label (80% accurate, 20% Bias)

In [11]:
pythia_31m = PythiaModel("EleutherAI/pythia-31m", "step143000", "./cache_dir")

loader = DatasetLoader()

train_ds, valid_ds, test_ds = loader.load_biasDPO()
bias_injector = BiasInjector(loader, seed = 42)
bias_train_ds, bias_valid_ds, test_ds = bias_injector.inject_bias(bias_ratio = 0.2)

dpo_args= load_experiment_config("../configs/pythia-31m-rlhf-dpo.yaml")['dpo_pythia_31m_config']
dpo_args['output_dir'] = "./pythia-31m-DPO-80-20"
dpo_pythia_31m_config = DPOConfig(**dpo_args)

train_ds = bias_train_ds.map(lambda x: x, load_from_cache_file=False)
valid_ds = bias_valid_ds.map(lambda x: x, load_from_cache_file=False)

dpo_trainer = DPO_Trainer(pythia_31m.model, pythia_31m.tokenizer, train_ds, valid_ds, args=dpo_pythia_31m_config)

dpo_trainer.train()

print(f"Best checkpoint: {dpo_trainer.trainer.state.best_model_checkpoint}")
print(f"Best metric: {dpo_trainer.trainer.state.best_metric}")

INFO - src.models.pythia_model - Loading model EleutherAI/pythia-31m (revision: step143000) at cache_dir: ./cache_dir
INFO - src.data.dataset_loader - Loading BiasDPO dataset
INFO - src.data.dataset_loader - Total samples: 1145
INFO - src.data.dataset_loader - Train samples: 801
INFO - src.data.dataset_loader - Validation samples: 229
INFO - src.data.dataset_loader - Test samples: 115
INFO - src.data.bias_injector - Injecting 20.0% bias:
INFO - src.data.bias_injector -   - Train: flipping 160/801 examples
INFO - src.data.bias_injector - Bias injection complete


Map: 100%|██████████| 801/801 [00:00<00:00, 10339.83 examples/s]
Map: 100%|██████████| 229/229 [00:00<00:00, 3166.59 examples/s]

INFO - src.training.dpo_trainer - Initializing DPOTrainer...





INFO - src.training.dpo_trainer - DPOTrainer initialized successfully!
INFO - src.training.dpo_trainer - Starting DPO training...


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
10,1.3042,0.972973,-0.396351,-0.642285,0.555172,0.245934,-219.07843,-180.556671,1975.633057,1976.231201
20,0.8285,0.782673,0.081865,-0.682705,0.669828,0.76457,-214.296249,-180.960907,1975.075562,1975.748413
30,0.5178,0.794206,0.117485,-0.791787,0.605172,0.909272,-213.940079,-182.051743,1974.725586,1975.41687
40,1.2298,0.810349,0.073506,-0.732461,0.634483,0.805967,-214.379852,-181.458466,1974.505737,1975.237793
50,0.922,0.721901,0.109085,-0.866545,0.667241,0.97563,-214.024063,-182.799286,1974.566406,1975.336426
60,1.0054,0.746683,0.068295,-0.906556,0.675862,0.974852,-214.431946,-183.199402,1974.452026,1975.249878
70,0.6604,0.710617,0.012149,-1.086825,0.684483,1.098974,-214.993393,-185.00209,1974.017944,1974.834229
80,1.2127,0.71823,0.011238,-0.932171,0.680172,0.943409,-215.002548,-183.455551,1973.78479,1974.625732
90,1.5646,0.707229,-0.153731,-1.223807,0.680172,1.070076,-216.652222,-186.371918,1973.540649,1974.402222
100,1.5835,0.788975,-0.009341,-1.145023,0.663793,1.135682,-215.208313,-185.584076,1973.425903,1974.332031


INFO - src.training.dpo_trainer - DPO training complete.
Best checkpoint: ./pythia-31m-DPO-80-20/checkpoint-970
Best metric: 0.30552300810813904


In [12]:
import pandas as pd

state = dpo_trainer.trainer.state
logs = state.log_history

df = pd.DataFrame(logs)
df_every_10 = df[df['step'] % 10 == 0] if 'step' in df.columns else df.iloc[::10]
relevant_cols = [col for col in df_every_10.columns if not col.startswith('_')]

# print(df_every_10[relevant_cols].to_string(index=False))

df_every_10[relevant_cols].to_csv('dpo_31m_training_logs_80_20.csv', index=False)
print("\nLogs saved to dpo_31m_training_logs_80_20.csv")


Logs saved to dpo_31m_training_logs_80_20.csv


# Bias Label (50% accurate, 50% Bias)

In [13]:
pythia_31m = PythiaModel("EleutherAI/pythia-31m", "step143000", "./cache_dir")

loader = DatasetLoader()

train_ds, valid_ds, test_ds = loader.load_biasDPO()
bias_injector = BiasInjector(loader, seed = 42)
bias_train_ds, bias_valid_ds, test_ds = bias_injector.inject_bias(bias_ratio = 0.5)

dpo_args= load_experiment_config("../configs/pythia-31m-rlhf-dpo.yaml")['dpo_pythia_31m_config']
dpo_args['output_dir'] = "./pythia-31m-DPO-50-50"
dpo_pythia_31m_config = DPOConfig(**dpo_args)

train_ds = bias_train_ds.map(lambda x: x, load_from_cache_file=False)
valid_ds = bias_valid_ds.map(lambda x: x, load_from_cache_file=False)

dpo_trainer = DPO_Trainer(pythia_31m.model, pythia_31m.tokenizer, train_ds, valid_ds, args=dpo_pythia_31m_config)

dpo_trainer.train()

print(f"Best checkpoint: {dpo_trainer.trainer.state.best_model_checkpoint}")
print(f"Best metric: {dpo_trainer.trainer.state.best_metric}")

INFO - src.models.pythia_model - Loading model EleutherAI/pythia-31m (revision: step143000) at cache_dir: ./cache_dir
INFO - src.data.dataset_loader - Loading BiasDPO dataset
INFO - src.data.dataset_loader - Total samples: 1145
INFO - src.data.dataset_loader - Train samples: 801
INFO - src.data.dataset_loader - Validation samples: 229
INFO - src.data.dataset_loader - Test samples: 115
INFO - src.data.bias_injector - Injecting 50.0% bias:
INFO - src.data.bias_injector -   - Train: flipping 400/801 examples
INFO - src.data.bias_injector - Bias injection complete


Map: 100%|██████████| 801/801 [00:00<00:00, 8768.83 examples/s]
Map: 100%|██████████| 229/229 [00:00<00:00, 3319.54 examples/s]

INFO - src.training.dpo_trainer - Initializing DPOTrainer...





INFO - src.training.dpo_trainer - DPOTrainer initialized successfully!
INFO - src.training.dpo_trainer - Starting DPO training...


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
10,0.6527,1.137097,-0.639431,-0.535109,0.522414,-0.104322,-221.509247,-179.484924,1975.834106,1976.424194
20,0.6697,1.089172,-0.447153,-0.504803,0.505172,0.05765,-219.586426,-179.18187,1975.40918,1976.047363
30,1.1563,0.867879,-0.141884,-0.555223,0.622414,0.413339,-216.533752,-179.686066,1975.205444,1975.813721
40,1.0845,0.997467,-0.232047,-0.367674,0.543966,0.135627,-217.435394,-177.810577,1975.038574,1975.63623
50,0.8476,1.08467,-0.388255,-0.431083,0.561207,0.042828,-218.997467,-178.444672,1975.116089,1975.688843
60,0.9914,1.289454,-0.914231,-0.618622,0.460345,-0.295609,-224.257217,-180.320053,1975.294678,1975.895996
70,1.3939,1.228126,-0.907026,-0.612583,0.466379,-0.294443,-224.185181,-180.259659,1975.418945,1976.013306
80,0.8498,1.273626,-0.881223,-0.549826,0.451724,-0.331397,-223.927139,-179.632111,1975.535522,1976.138794
90,1.4878,1.316623,-0.710382,-0.328426,0.472414,-0.381956,-222.218735,-177.418106,1975.53418,1976.151855
100,1.2179,1.300974,-0.794102,-0.445644,0.451724,-0.348458,-223.055939,-178.590302,1975.519775,1976.111938


INFO - src.training.dpo_trainer - DPO training complete.
Best checkpoint: ./pythia-31m-DPO-50-50/checkpoint-30
Best metric: 0.8678792715072632


In [None]:
import pandas as pd

state = dpo_trainer.trainer.state
logs = state.log_history

df = pd.DataFrame(logs)
df_every_10 = df[df['step'] % 10 == 0] if 'step' in df.columns else df.iloc[::10]
relevant_cols = [col for col in df_every_10.columns if not col.startswith('_')]

# print(df_every_10[relevant_cols].to_string(index=False))

df_every_10[relevant_cols].to_csv('dpo_31m_training_logs_50_50.csv', index=False)
print("\nLogs saved to dpo_training_logs_50_50.csv")


Logs saved to dpo_training_logs_50_50.csv


: 