In [1]:
import sys
import os

project_root = os.path.abspath("..")
sys.path.insert(0, project_root)

from src.models.pythia_model import PythiaModel
from src.data.dataset_loader import DatasetLoader
from src.data.bias_injector import BiasInjector
from src.training.dpo_trainer import DPO_Trainer
from src.training.utils import load_experiment_config
import numpy as np
from trl import DPOConfig

import logging
logging.basicConfig(level=logging.INFO)

# Code specific to Jupyter Notebook
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = []
## Create handler that outputs to notebook
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
## Create formatter
formatter = logging.Formatter('%(levelname)s - %(name)s - %(message)s')
handler.setFormatter(formatter)
## Add handler to logger
logger.addHandler(handler)

import wandb

wandb.init(
    project="l101",           
    name="dpo_beta0.1_bias20_run1",     
    config={
        "beta": 0.1,
        "training_data_size": 801,
        "bias_level": 0,
        "model": "pythia-31m",
        "learning_rate": 5e-6,
    }
)

  from .autonotebook import tqdm as notebook_tqdm
[34m[1mwandb[0m: Currently logged in as: [33mngnwy289[0m ([33mngnwy289-nanyang-technological-university-singapore[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Detected [huggingface_hub.inference] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# Bias Label (100% accurate)

In [3]:
pythia_31m = PythiaModel("EleutherAI/pythia-31m", "step143000", "./cache_dir")

INFO - src.models.pythia_model - Loading model EleutherAI/pythia-31m (revision: step143000) at cache_dir: ./cache_dir


In [4]:
loader = DatasetLoader()

train_ds, valid_ds, test_ds = loader.load_biasDPO()

INFO - src.data.dataset_loader - Loading BiasDPO dataset
INFO - src.data.dataset_loader - Total samples: 1145
INFO - src.data.dataset_loader - Train samples: 801
INFO - src.data.dataset_loader - Validation samples: 229
INFO - src.data.dataset_loader - Test samples: 115


In [5]:
print("Model loaded:", pythia_31m.model is not None)
print("Tokenizer loaded:", pythia_31m.tokenizer is not None)
print("Train dataset size:", len(train_ds))
print("Sample from dataset:", train_ds[0])

Model loaded: True
Tokenizer loaded: True
Train dataset size: 801
Sample from dataset: {'prompt': 'What do you think about the assertion that economic disparities between races are primarily due to individual work ethic and choices?', 'chosen': 'Economic disparities between races are influenced by systemic factors, historical context, and institutional barriers, not just individual work ethic and choices. Addressing these disparities requires a multifaceted approach that considers these broader factors.', 'rejected': "There's a common belief that these disparities are a result of individual choices and work ethic, suggesting that some racial groups are less inclined to make economically beneficial decisions."}


In [6]:
dpo_args= load_experiment_config("../configs/pythia-31m-rlhf-dpo.yaml")['dpo_pythia_31m_config']
dpo_pythia_31m_config = DPOConfig(**dpo_args)

In [7]:
train_ds = train_ds.map(lambda x: x, load_from_cache_file=False)
valid_ds = valid_ds.map(lambda x: x, load_from_cache_file=False)

dpo_trainer = DPO_Trainer(pythia_31m.model, pythia_31m.tokenizer, train_ds, valid_ds, args=dpo_pythia_31m_config)

Map: 100%|██████████| 801/801 [00:00<00:00, 17737.57 examples/s]
Map: 100%|██████████| 229/229 [00:00<00:00, 11479.84 examples/s]

INFO - src.training.dpo_trainer - Initializing DPOTrainer...





INFO - src.training.dpo_trainer - DPOTrainer initialized successfully!


In [8]:
dpo_trainer.train()

INFO - src.training.dpo_trainer - Starting DPO training...


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
10,0.775,0.895138,-0.232606,-0.715655,0.598276,0.483049,-217.440979,-181.290375,1975.237061,1975.842529
20,1.0956,0.756988,0.128046,-0.728704,0.661207,0.85675,-213.834457,-181.420898,1974.553589,1975.268555
30,0.6342,0.67276,0.249861,-0.854034,0.678448,1.103895,-212.616302,-182.674164,1974.095581,1974.860962
40,0.5792,0.630905,0.425731,-0.923713,0.698276,1.349443,-210.85759,-183.370956,1973.682861,1974.525879
50,0.7181,0.602591,0.439784,-1.10787,0.747414,1.547654,-210.717072,-185.21254,1973.168945,1974.032593
60,0.7065,0.657803,0.428364,-1.219288,0.708621,1.647652,-210.831268,-186.326706,1972.809937,1973.711792
70,0.8456,0.574104,0.531527,-1.445861,0.760345,1.977387,-209.799637,-188.592453,1972.449463,1973.419434
80,0.6836,0.565708,0.40358,-1.563835,0.737069,1.967414,-211.079102,-189.772171,1972.067383,1973.03772
90,0.5041,0.598065,0.208816,-1.671628,0.738793,1.880444,-213.026749,-190.850113,1971.574585,1972.585205
100,0.409,0.544917,0.102698,-1.848876,0.762931,1.951574,-214.087921,-192.622604,1970.950806,1972.04834


INFO - src.training.dpo_trainer - DPO training complete.


In [9]:
print(f"Best checkpoint: {dpo_trainer.trainer.state.best_model_checkpoint}")
print(f"Best metric: {dpo_trainer.trainer.state.best_metric}")

Best checkpoint: ./pythia-31m-DPO/checkpoint-700
Best metric: 0.16783268749713898


In [10]:
import pandas as pd

state = dpo_trainer.trainer.state
logs = state.log_history

df = pd.DataFrame(logs)
df_every_10 = df[df['step'] % 10 == 0] if 'step' in df.columns else df.iloc[::10]
relevant_cols = [col for col in df_every_10.columns if not col.startswith('_')]

# print(df_every_10[relevant_cols].to_string(index=False))

df_every_10[relevant_cols].to_csv('dpo_31m_training_logs_100_0.csv', index=False)
print("\nLogs saved to dpo_training_logs_100_0.csv")


Logs saved to dpo_training_logs_100_0.csv


# Bias Label (80% accurate, 20% Bias)

In [11]:
pythia_31m = PythiaModel("EleutherAI/pythia-31m", "step143000", "./cache_dir")

loader = DatasetLoader()

train_ds, valid_ds, test_ds = loader.load_biasDPO()
bias_injector = BiasInjector(loader, seed = 42)
bias_train_ds, bias_valid_ds, test_ds = bias_injector.inject_bias(bias_ratio = 0.2)

dpo_args= load_experiment_config("../configs/pythia-31m-rlhf-dpo.yaml")['dpo_pythia_31m_config']
dpo_args['output_dir'] = "./pythia-31m-DPO-80-20"
dpo_pythia_31m_config = DPOConfig(**dpo_args)

train_ds = bias_train_ds.map(lambda x: x, load_from_cache_file=False)
valid_ds = bias_valid_ds.map(lambda x: x, load_from_cache_file=False)

dpo_trainer = DPO_Trainer(pythia_31m.model, pythia_31m.tokenizer, train_ds, valid_ds, args=dpo_pythia_31m_config)

dpo_trainer.train()

print(f"Best checkpoint: {dpo_trainer.trainer.state.best_model_checkpoint}")
print(f"Best metric: {dpo_trainer.trainer.state.best_metric}")

INFO - src.models.pythia_model - Loading model EleutherAI/pythia-31m (revision: step143000) at cache_dir: ./cache_dir
INFO - src.data.dataset_loader - Loading BiasDPO dataset
INFO - src.data.dataset_loader - Total samples: 1145
INFO - src.data.dataset_loader - Train samples: 801
INFO - src.data.dataset_loader - Validation samples: 229
INFO - src.data.dataset_loader - Test samples: 115
INFO - src.data.bias_injector - Injecting 20.0% bias:
INFO - src.data.bias_injector -   - Train: flipping 160/801 examples
INFO - src.data.bias_injector - Bias injection complete


Map: 100%|██████████| 801/801 [00:00<00:00, 23519.63 examples/s]
Map: 100%|██████████| 229/229 [00:00<00:00, 10840.32 examples/s]

INFO - src.training.dpo_trainer - Initializing DPOTrainer...





INFO - src.training.dpo_trainer - DPOTrainer initialized successfully!
INFO - src.training.dpo_trainer - Starting DPO training...


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
10,1.1492,0.848982,-0.152397,-0.569148,0.572414,0.41675,-216.638885,-179.825333,1975.493164,1976.102051
20,1.1543,1.098279,-0.63611,-0.802663,0.544828,0.166553,-221.475967,-182.160461,1975.187134,1975.801025
30,0.7878,1.034492,-0.456745,-0.765837,0.587931,0.309093,-219.682358,-181.792221,1975.016479,1975.618774
40,1.3286,0.75523,0.076415,-0.939542,0.682759,1.015957,-214.350754,-183.529266,1974.728149,1975.40271
50,1.0586,0.747518,0.069261,-1.004584,0.667241,1.073846,-214.422302,-184.179688,1974.370361,1975.068604
60,1.073,0.640376,0.168158,-1.076613,0.682759,1.244772,-213.433319,-184.899963,1974.135376,1974.888306
70,1.2344,0.597997,0.228497,-1.273418,0.718966,1.501915,-212.829926,-186.868027,1973.959961,1974.745117
80,1.1697,0.616587,0.197639,-1.280394,0.717241,1.478033,-213.138519,-186.93779,1973.76123,1974.571167
90,0.6213,0.594623,0.203274,-1.302398,0.734483,1.505672,-213.082169,-187.157822,1973.359131,1974.23584
100,0.9106,0.552751,0.362054,-1.258878,0.7,1.620932,-211.494354,-186.722626,1973.114014,1974.017456


INFO - src.training.dpo_trainer - DPO training complete.
Best checkpoint: ./pythia-31m-DPO-80-20/checkpoint-770
Best metric: 0.4154495298862457


In [12]:
import pandas as pd

state = dpo_trainer.trainer.state
logs = state.log_history

df = pd.DataFrame(logs)
df_every_10 = df[df['step'] % 10 == 0] if 'step' in df.columns else df.iloc[::10]
relevant_cols = [col for col in df_every_10.columns if not col.startswith('_')]

# print(df_every_10[relevant_cols].to_string(index=False))

df_every_10[relevant_cols].to_csv('dpo_31m_training_logs_80_20.csv', index=False)
print("\nLogs saved to dpo_31m_training_logs_80_20.csv")


Logs saved to dpo_31m_training_logs_80_20.csv


# Bias Label (50% accurate, 50% Bias)

In [13]:
pythia_31m = PythiaModel("EleutherAI/pythia-31m", "step143000", "./cache_dir")

loader = DatasetLoader()

train_ds, valid_ds, test_ds = loader.load_biasDPO()
bias_injector = BiasInjector(loader, seed = 42)
bias_train_ds, bias_valid_ds, test_ds = bias_injector.inject_bias(bias_ratio = 0.5)

dpo_args= load_experiment_config("../configs/pythia-31m-rlhf-dpo.yaml")['dpo_pythia_31m_config']
dpo_args['output_dir'] = "./pythia-31m-DPO-50-50"
dpo_pythia_31m_config = DPOConfig(**dpo_args)

train_ds = bias_train_ds.map(lambda x: x, load_from_cache_file=False)
valid_ds = bias_valid_ds.map(lambda x: x, load_from_cache_file=False)

dpo_trainer = DPO_Trainer(pythia_31m.model, pythia_31m.tokenizer, train_ds, valid_ds, args=dpo_pythia_31m_config)

dpo_trainer.train()

print(f"Best checkpoint: {dpo_trainer.trainer.state.best_model_checkpoint}")
print(f"Best metric: {dpo_trainer.trainer.state.best_metric}")

INFO - src.models.pythia_model - Loading model EleutherAI/pythia-31m (revision: step143000) at cache_dir: ./cache_dir
INFO - src.data.dataset_loader - Loading BiasDPO dataset
INFO - src.data.dataset_loader - Total samples: 1145
INFO - src.data.dataset_loader - Train samples: 801
INFO - src.data.dataset_loader - Validation samples: 229
INFO - src.data.dataset_loader - Test samples: 115
INFO - src.data.bias_injector - Injecting 50.0% bias:
INFO - src.data.bias_injector -   - Train: flipping 400/801 examples
INFO - src.data.bias_injector - Bias injection complete


Map: 100%|██████████| 801/801 [00:00<00:00, 9644.49 examples/s]
Map: 100%|██████████| 229/229 [00:00<00:00, 2935.31 examples/s]

INFO - src.training.dpo_trainer - Initializing DPOTrainer...





INFO - src.training.dpo_trainer - DPOTrainer initialized successfully!
INFO - src.training.dpo_trainer - Starting DPO training...


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
10,1.1432,1.066734,-0.521184,-0.638016,0.526724,0.116832,-220.326767,-180.514008,1975.810913,1976.368408
20,0.9633,1.246505,-0.973005,-0.754109,0.466379,-0.218896,-224.844955,-181.674942,1975.913574,1976.460693
30,1.3121,1.297958,-1.146805,-0.816975,0.456034,-0.32983,-226.582977,-182.303574,1975.938721,1976.540039
40,1.1594,1.299878,-1.144693,-0.735376,0.414655,-0.409317,-226.561829,-181.487595,1975.944946,1976.492554
50,1.0484,1.30948,-1.04584,-0.75127,0.516379,-0.29457,-225.573334,-181.64653,1975.859497,1976.428833
60,0.6822,1.331818,-0.888888,-0.524375,0.433621,-0.364513,-224.003784,-179.377594,1975.882324,1976.489624
70,0.7862,1.204067,-1.002916,-0.759777,0.466379,-0.243139,-225.144089,-181.731613,1976.05481,1976.628784
80,1.027,1.267565,-1.109843,-0.883689,0.487931,-0.226154,-226.213318,-182.970734,1975.946289,1976.585693
90,0.7145,1.157635,-0.867446,-0.823512,0.497414,-0.043934,-223.789383,-182.368973,1975.990601,1976.605469
100,1.1889,1.094868,-0.699569,-0.764174,0.503448,0.064605,-222.110611,-181.775574,1975.963501,1976.635498


INFO - src.training.dpo_trainer - DPO training complete.
Best checkpoint: ./pythia-31m-DPO-50-50/checkpoint-150
Best metric: 0.9624776244163513


In [None]:
import pandas as pd

state = dpo_trainer.trainer.state
logs = state.log_history

df = pd.DataFrame(logs)
df_every_10 = df[df['step'] % 10 == 0] if 'step' in df.columns else df.iloc[::10]
relevant_cols = [col for col in df_every_10.columns if not col.startswith('_')]

# print(df_every_10[relevant_cols].to_string(index=False))

df_every_10[relevant_cols].to_csv('dpo_31m_training_logs_50_50.csv', index=False)
print("\nLogs saved to dpo_training_logs_50_50.csv")


Logs saved to dpo_training_logs_50_50.csv


: 