In [1]:
import os
os.environ["HF_HOME"] = "/mnt/hdd1/ljiahao/xianglin/cache/huggingface"

In [2]:
raw_path = "/mnt/hdd1/ljiahao/xianglin/llm-as-a-judge-attack/raw"
data_path = "/mnt/hdd1/ljiahao/xianglin/llm-as-a-judge-attack/data"

In [3]:
import json
import os

# Format
1. download data to raw data path, or huggingface default path
2. save metadata.json to data_path, including keys: instruction, (category), others
3. save completion json to data_path/model_name.json, including keys: instruction, output,


# MT Bench

In [None]:
!cd raw_path
!git clone https://huggingface.co/spaces/lmsys/mt-bench

In [None]:
path = f"{raw_path}/mt-bench/data/mt_bench/question.jsonl"
import json

# load jsonl
questions = []
with open(path, "r") as f:
    for line in f:
        data = json.loads(line)
        questions.append(data)
questions[0]

In [None]:
# save metadata to data
mt_bench_path = os.path.join(data_path, "MTBench")
os.makedirs(mt_bench_path, exist_ok=True)
save_path = os.path.join(mt_bench_path, "metadata.json")

metadata = list()
for question in questions:
    metadata.append({
        "question_id": question['question_id'],
        "instruction": question['turns'][0],
        "original_category": question['category'],
    })

# save metadata to data
with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

In [None]:
# load model answer
answer_dir = os.path.join(raw_path, "mt-bench/data/mt_bench/model_answer")
files = os.listdir(answer_dir)

for file in files:
    model_name = file.split(".")[0]

    # load model answer
    path = os.path.join(answer_dir, file)
    answers = list()
    with open(path, "r") as f:
        for line in f:
            data = json.loads(line)
            answers.append(data)
    
    new_dataset = list()
    for answer in answers:
        for question in questions:
            if answer['question_id'] == question['question_id']:
                new_item = {
                    "instruction": question['turns'][0],
                    "output": answer['choices'][0]['turns'][0],
                    "original_category": question['category'],
                }
                new_dataset.append(new_item)
    
    print(model_name)
    print(new_dataset[0])
    print("-"*100)
    # save new dataset
    save_path = os.path.join(mt_bench_path, f"{model_name}.json")
    with open(save_path, "w") as f:
        json.dump(new_dataset, f)


# AlpacaEval

In [None]:
!cd raw_path
!git clone https://github.com/tatsu-lab/alpaca_eval.git

In [18]:
path = os.path.join(raw_path, "alpaca_eval", "results", "NullModel", "model_outputs.json")

# load jsonl
with open(path, "r") as f:
    data = json.load(f)


In [None]:
alpaca_path = os.path.join(data_path, "AlpacaEval")
os.makedirs(alpaca_path, exist_ok=True)

save_path = os.path.join(alpaca_path, "metadata.json")
metadata = list()
for item in data:
    metadata.append({
        "instruction": item["instruction"],
        "dataset": item["dataset"]
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)

# Show example element
metadata[0]

In [None]:
result_dir = os.path.join(raw_path, "alpaca_eval", "results")
dirs = os.listdir(result_dir)

for dir in dirs:
    path = os.path.join(result_dir, dir, "model_outputs.json")
    with open(path, "r") as f:
        data = json.load(f)
    new_dataset = list()
    for item in data:
        new_dataset.append({
            "instruction": item["instruction"],
            "output": item["output"]
        })
    print(dir)
    print(new_dataset[0])
    print("-"*100)
    
    save_path = os.path.join(alpaca_path, f"{dir}.json")
    with open(save_path, "w") as f:
        json.dump(new_dataset, f)

# ArenaHard 2.0

In [None]:
! git clone https://github.com/lmarena/arena-hard-auto.git

In [None]:
import json
import os

save_dir = os.path.join(raw_path, "arena-hard-auto", "data", "arena-hard-v2.0")

questions = list()
with open(os.path.join(save_dir, "question.jsonl"), "r") as f:
    for line in f:
        data = json.loads(line)
        questions.append(data)

# Example element
questions[0]

In [None]:
# save metadata
arena_hard_path = os.path.join(data_path, "ArenaHard")
os.makedirs(arena_hard_path, exist_ok=True)

save_path = os.path.join(arena_hard_path, "metadata.json")
metadata = list()
for question in questions:
    metadata.append({
        "instruction": question["prompt"],
        "original_category": question["category"],
        "original_subcategory": question["subcategory"],
        "uid": question["uid"],
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)

# Example element
print("Example element:")
metadata[0]




In [None]:
answers_dir = os.path.join(raw_path, "arena-hard-auto", "data", "arena-hard-v2.0", "model_answer")

files = os.listdir(answers_dir)
files
    

In [None]:
answers_dict = dict()

for file in files:
    model_name = file[:-6]
    path = os.path.join(answers_dir, file)

    answers = list()
    with open(path, "r") as f:
        for line in f:
            data = json.loads(line)
            answers.append(data)
    
    # show answers example
    print(answers[0])
    
    answers_dict[model_name] = answers

In [None]:
# show answers example
answers_dict["o3-mini-2025-01-31"][0]

In [None]:
# zip with uid
uid_to_question = dict()
for item in metadata:
    uid_to_question[item["uid"]] = item["instruction"]

for file in files:
    model_name = file[:-6]
    answers = answers_dict[model_name]
    print(model_name)
    print(len(answers))
    
    new_dataset = list()
    for answer in answers:
        item = {
            "instruction": uid_to_question[answer["uid"]],
            "output": answer['messages'][-1]['content']['answer']
        }
        new_dataset.append(item)
    
    print("Example Element:")
    print(new_dataset[0])
        
    save_path = os.path.join(arena_hard_path, f"{model_name}.json")
    with open(save_path, "w") as f:
        json.dump(new_dataset, f)



# Subjective Dataset:
1. yleo/emerton_dpo_pairs_judge
2. Intel/orca_dpo_pairs
3. jondurbin/py-dpo-v0.1
4. jondurbin/truthy-dpo-v0.1

In [None]:
from datasets import load_dataset
ds = load_dataset("yleo/emerton_dpo_pairs_judge")

In [13]:
emerton_dpo_path = os.path.join(data_path, "EmertonDPO")
os.makedirs(emerton_dpo_path, exist_ok=True)
save_path = os.path.join(emerton_dpo_path, "metadata.json")

metadata = list()
for item in ds['train']:
    metadata.append({
        "instruction": item['input'],
        "category": "subjective",
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

{'instruction': "You will be given a definition of a task first, then some input of the task.\nThis task is about using the specified sentence and converting the sentence to Resource Description Framework (RDF) triplets of the form (subject, predicate object). The RDF triplets generated must be such that the triplets accurately capture the structure and semantics of the input sentence. The input is a sentence and the output is a list of triplets of the form [subject, predicate, object] that capture the relationships present in the sentence. When a sentence has more than 1 RDF triplet possible, the output must contain all of them.\n\nAFC Ajax (amateurs)'s ground is Sportpark De Toekomst where Ajax Youth Academy also play.\nOutput:",
 'category': 'subjective'}

In [None]:
ds = load_dataset("Intel/orca_dpo_pairs")
ds

DatasetDict({
    train: Dataset({
        features: ['system', 'question', 'chosen', 'rejected'],
        num_rows: 12859
    })
})

In [17]:
orca_dpo_path = os.path.join(data_path, "OrcaDPO")
os.makedirs(orca_dpo_path, exist_ok=True)
save_path = os.path.join(orca_dpo_path, "metadata.json")

metadata = list()
for item in ds['train']:
    metadata.append({
        "instruction": item['question'],
        "category": "subjective",
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

{'instruction': "You will be given a definition of a task first, then some input of the task.\nThis task is about using the specified sentence and converting the sentence to Resource Description Framework (RDF) triplets of the form (subject, predicate object). The RDF triplets generated must be such that the triplets accurately capture the structure and semantics of the input sentence. The input is a sentence and the output is a list of triplets of the form [subject, predicate, object] that capture the relationships present in the sentence. When a sentence has more than 1 RDF triplet possible, the output must contain all of them.\n\nAFC Ajax (amateurs)'s ground is Sportpark De Toekomst where Ajax Youth Academy also play.\nOutput:",
 'category': 'subjective'}

In [18]:
ds = load_dataset("jondurbin/py-dpo-v0.1")
ds

README.md:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

py-dpo.parquet:   0%|          | 0.00/14.6M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9466 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'id'],
        num_rows: 9466
    })
})

In [20]:
py_dpo_path = os.path.join(data_path, "PyDPO")
os.makedirs(py_dpo_path, exist_ok=True)
save_path = os.path.join(py_dpo_path, "metadata.json")

metadata = list()
for item in ds['train']:
    metadata.append({
        "instruction": item['prompt'],
        "category": "subjective",
        "id": item['id'],
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

{'instruction': 'Use the function to debug the given program and prevent the segmentation fault. Your solution should also handle the case where the array contains duplicate elements. You are not allowed to use any additional data structures. Additionally, the time complexity of your solution should be O(n) and the space complexity should be O(1).\n\n```python\ndef debug_program(arr):\n    n = len(arr)\n    for i in range(n):\n        if arr[i] == i:\n            return i\n    return -1\n\n# Test Case\narr = [0, 1, 2, 3, 4]\nprint(debug_program(arr))  # Expected output: -1\n```\n\n**Additional Requirements:**\n\n- The program should be able to handle arrays of any length.\n- The program should be able to handle arrays with duplicate elements.\n- The solution should use a divide and conquer approach to solve the problem.\n- The solution should not modify the input array.\n- The solution should be implemented in Python.',
 'category': 'subjective',
 'id': '8c94f83f-6a5a-5f8c-98a2-e242d77

In [21]:
ds = load_dataset("jondurbin/truthy-dpo-v0.1")
ds

README.md:   0%|          | 0.00/904 [00:00<?, ?B/s]

truthy-dpo.parquet:   0%|          | 0.00/653k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1016 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'source', 'system', 'prompt', 'chosen', 'rejected'],
        num_rows: 1016
    })
})

In [22]:
truthy_dpo_path = os.path.join(data_path, "TruthyDPO")
os.makedirs(truthy_dpo_path, exist_ok=True)
save_path = os.path.join(truthy_dpo_path, "metadata.json")

metadata = list()
for item in ds['train']:
    metadata.append({
        "instruction": item['prompt'],
        "category": "subjective",
        "id": item['id'],
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

{'instruction': "What's the nearest national park to you?",
 'category': 'subjective',
 'id': '04c275bf738fd391b7fe25e25fe7bed3'}

Objective Dataset: MMLU Pro

In [24]:
from datasets import load_dataset

# Login using e.g. `huggingface-cli login` to access this dataset
ds = load_dataset("TIGER-Lab/MMLU-Pro")
ds

DatasetDict({
    test: Dataset({
        features: ['question_id', 'question', 'options', 'answer', 'answer_index', 'cot_content', 'category', 'src'],
        num_rows: 12032
    })
    validation: Dataset({
        features: ['question_id', 'question', 'options', 'answer', 'answer_index', 'cot_content', 'category', 'src'],
        num_rows: 70
    })
})

In [25]:
mmlu_pro_validation_path = os.path.join(data_path, "MMLUPro", "validation")
os.makedirs(mmlu_pro_validation_path, exist_ok=True)
save_path = os.path.join(mmlu_pro_validation_path, "metadata.json")

metadata = list()
for item in ds['validation']:
    metadata.append({
        "instruction": item['question'],
        "answer": item['answer'],
        "category": item['category'],
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

{'instruction': 'The symmetric group $S_n$ has $\n\\factorial{n}$ elements, hence it is not true that $S_{10}$ has 10 elements.\nFind the characteristic of the ring 2Z.',
 'answer': 'A',
 'category': 'math'}

In [26]:
mmlu_pro_test_path = os.path.join(data_path, "MMLUPro", "test")
os.makedirs(mmlu_pro_test_path, exist_ok=True)
save_path = os.path.join(mmlu_pro_test_path, "metadata.json")

metadata = list()
for item in ds['test']:
    metadata.append({
        "instruction": item['question'],
        "answer": item['answer'],
        "category": item['category'],
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

{'instruction': 'Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.',
 'answer': 'I',
 'category': 'business'}