In [1]:
# This notebook is used to generate the mixed datasets
import pandas as pd
import ndjson
import os

In [2]:
# - reward-bench
splits = {'raw': 'data/raw-00000-of-00001.parquet', 'filtered': 'data/filtered-00000-of-00001.parquet'}
data_reward_bench = pd.read_parquet("hf://datasets/allenai/reward-bench/" + splits["filtered"])
selected_data_reward_bench = data_reward_bench.loc[data_reward_bench['subset'].str.startswith("xstest-") | data_reward_bench['subset'].str.startswith("refusals-") | data_reward_bench['subset'].str.startswith("donotanswer") | data_reward_bench['subset'].str.startswith("hep-")
]
selected_data_reward_bench["new_id"] = selected_data_reward_bench["subset"] + "/" + selected_data_reward_bench["id"].astype(str)
selected_data_reward_bench = selected_data_reward_bench[['new_id', 'prompt']].rename(columns={'new_id': 'id'})

  from .autonotebook import tqdm as notebook_tqdm
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data_reward_bench["new_id"] = selected_data_reward_bench["subset"] + "/" + selected_data_reward_bench["id"].astype(str)


In [3]:
# - CodeUltraFeedback
data_code_ultra_feedback = pd.read_parquet("hf://datasets/coseal/CodeUltraFeedback/data/train-00000-of-00001.parquet")
selected_data_code_ultra_feedback = data_code_ultra_feedback[['instruction']].reset_index()
selected_data_code_ultra_feedback["id"] = 'code_ultra_feedback/' + selected_data_code_ultra_feedback["index"].astype(str)
selected_data_code_ultra_feedback = selected_data_code_ultra_feedback[['id', 'instruction']].rename(columns={'instruction': 'prompt'})
selected_data_code_ultra_feedback = selected_data_code_ultra_feedback.sample(n=1016, random_state=1)

In [4]:
# - mix-instruct
splits = {'train': 'train_data_prepared.jsonl', 'validation': 'val_data_prepared.jsonl', 'test': 'test_data_prepared.jsonl'}
data_mix_instruct = pd.read_json("hf://datasets/llm-blender/mix-instruct/" + splits["train"], lines=True)
selected_data_mix_instruct = data_mix_instruct[['id', 'instruction', 'input']]
selected_data_mix_instruct['prompt'] = selected_data_mix_instruct['instruction'] + ' ' + selected_data_mix_instruct['input']
selected_data_mix_instruct = selected_data_mix_instruct[['id', 'prompt']]
selected_data_mix_instruct = selected_data_mix_instruct.sample(n=6000, random_state=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data_mix_instruct['prompt'] = selected_data_mix_instruct['instruction'] + ' ' + selected_data_mix_instruct['input']


In [5]:
# - BeaverTails
splits = {'330k_train': 'round0/330k/train.jsonl.xz', '330k_test': 'round0/330k/test.jsonl.xz', '30k_train': 'round0/30k/train.jsonl.gz', '30k_test': 'round0/30k/test.jsonl.gz'}
data_beaver_tails = pd.read_json("hf://datasets/PKU-Alignment/BeaverTails/" + splits["30k_train"], lines=True)

def is_prompt_harmful(category):
    for k, v in category.items():
        if v:
            return True
    return False

# select only the prompts that are harmful
selected_data_beaver_tails = data_beaver_tails[data_beaver_tails['category'].apply(is_prompt_harmful)]
selected_data_beaver_tails = selected_data_beaver_tails[['prompt']].drop_duplicates().reset_index()
selected_data_beaver_tails["id"] = 'beaver_tails/' + selected_data_beaver_tails["index"].astype(str)
selected_data_beaver_tails = selected_data_beaver_tails[['id', 'prompt']]
selected_data_beaver_tails = selected_data_beaver_tails.sample(n=1260, random_state=1)

In [7]:
# save the selected data as a JSONL file
selected_data = pd.concat([selected_data_reward_bench, selected_data_code_ultra_feedback, selected_data_beaver_tails, selected_data_mix_instruct])
selected_data_dict = selected_data.to_dict(orient='records')

file_path = '../data/mixed_dataset.jsonl'
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, 'w', encoding='utf8') as f:
    ndjson.dump(selected_data_dict, f, ensure_ascii=False)

In [None]:
# merge model responses
from utils import load_sample
import json

model_name_list = ['gpt-4o', 'gpt-35-turbo', 'llama-31-8b', 'mistral-7b', 'mistral-8x7b', 'phi-3-medium', 'phi-3-mini']
response_list = [load_sample(fname=f'../outputs/mixed_dataset_{model_name}.jsonl', is_jsonl=True) for model_name in model_name_list]

data_original = response_list[0]
for data_addon, model_addon in zip(response_list[1:], model_name_list[1:]):
    for d1, d2 in zip(data_original, data_addon):
        d1[model_addon] = d2[model_addon]

file_name = '../data/mixed_dataset_ALL.jsonl'
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_name, 'w') as f:
    for d in data_original:
        f.write(json.dumps(d) + "\n")

In [14]:
# split the mixed dataset into train, validation, and test sets
from utils import load_sample
import json

# load and permutate the mixed dataset and split it into train, validation, and test sets
data = load_sample(fname='../data/mixed_dataset_armoRM_ALL_token_num.jsonl')
import random
random.seed(1)
random.shuffle(data)

train_data = data[:int(0.8*len(data))]
validation_data = data[int(0.8*len(data)):int(0.9*len(data))]
test_data = data[int(0.9*len(data)):]
print(len(train_data), len(validation_data), len(test_data))

with open(f"../data/mixed_dataset_armoRM_ALL_token_num_train.jsonl", 'w') as f:
    for d in train_data:
        f.write(json.dumps(d) + "\n")
        
with open(f"../data/mixed_dataset_armoRM_ALL_token_num_validation.jsonl", 'w') as f:
    for d in validation_data:
        f.write(json.dumps(d) + "\n")
        
with open(f"../data/mixed_dataset_armoRM_ALL_token_num_test.jsonl", 'w') as f:
    for d in test_data:
        f.write(json.dumps(d) + "\n")
        

8000 1000 1000


In [None]:
# load the train splits of the mixed dataset and randomly select 10k response pairs stored as Dataset to train the distilled reward model.
from utils import load_sample
from datasets import Dataset, DatasetDict

model_name_list = ['gpt-4o', 'gpt-35-turbo', 'llama-31-8b', 'mistral-7b', 'mistral-8x7b', 'phi-3-medium', 'phi-3-mini']

split_list = ['train', 'validation']
split2response_dict = {}
for split in split_list:
    data = load_sample(fname=f'../data/mixed_dataset_armoRM_ALL_token_num_{split}.jsonl')
    chosen_response_list = []
    rejected_response_list = []
    for d in data:
        user_prompt = d['prompt']
        for model_name in model_name_list:
            model_response = d[model_name]['responses']
            model_scores = d[model_name]['armoRM_scores']
            if min(model_scores) == max(model_scores):
                continue
            sorted_model_response_by_score = sorted(zip(model_response, model_scores), key=lambda x: x[1])
            chosen_response = sorted_model_response_by_score[-1][0]
            rejected_response = sorted_model_response_by_score[0][0]
            
            # get the middle response
            middle_idx = len(sorted_model_response_by_score)//2
            middle_response = sorted_model_response_by_score[middle_idx][0]
            
            if min(model_scores) < sorted_model_response_by_score[middle_idx][1]:
                chosen_response_list.append(f"Human: {user_prompt} Assistant: {middle_response}")
                rejected_response_list.append(f"Human: {user_prompt} Assistant: {rejected_response}")
            
            if max(model_scores) > sorted_model_response_by_score[middle_idx][1]:
                chosen_response_list.append(f"Human: {user_prompt} Assistant: {chosen_response}")
                rejected_response_list.append(f"Human: {user_prompt} Assistant: {middle_response}")
    
    chosen_response_dataset = Dataset.from_dict({'chosen': chosen_response_list, 'rejected': rejected_response_list})
    split2response_dict[split] = chosen_response_dataset
    
split2response_dataset_dict = DatasetDict({'train': split2response_dict['train'], 'validation': split2response_dict['validation']})
split2response_dataset_dict.save_to_disk('../data/mixed_dataset_armoRM_ALL_token_num_reward_modelling_min_max_mid')