In [1]:
import os
os.environ["HF_HOME"] = "/mnt/hdd1/ljiahao/xianglin/cache/huggingface"

In [2]:
raw_path = "/mnt/hdd1/ljiahao/xianglin/llm-as-a-judge-attack/raw"
data_path = "/mnt/hdd1/ljiahao/xianglin/llm-as-a-judge-attack/data"

In [3]:
import json
import os

# Format
1. download data to raw data path, or huggingface default path
2. save metadata.json to data_path, including keys: instruction, (category), others
3. save completion json to data_path/model_name.json, including keys: instruction, output,


# MT Bench

In [None]:
!cd raw_path
!git clone https://huggingface.co/spaces/lmsys/mt-bench

In [None]:
path = f"{raw_path}/mt-bench/data/mt_bench/question.jsonl"
import json

# load jsonl
questions = []
with open(path, "r") as f:
    for line in f:
        data = json.loads(line)
        questions.append(data)
questions[0]

In [None]:
# save metadata to data
mt_bench_path = os.path.join(data_path, "MTBench")
os.makedirs(mt_bench_path, exist_ok=True)
save_path = os.path.join(mt_bench_path, "metadata.json")

metadata = list()
for question in questions:
    metadata.append({
        "question_id": question['question_id'],
        "instruction": question['turns'][0],
        "original_category": question['category'],
    })

# save metadata to data
with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

In [None]:
# load model answer
answer_dir = os.path.join(raw_path, "mt-bench/data/mt_bench/model_answer")
files = os.listdir(answer_dir)

for file in files:
    model_name = file.split(".")[0]

    # load model answer
    path = os.path.join(answer_dir, file)
    answers = list()
    with open(path, "r") as f:
        for line in f:
            data = json.loads(line)
            answers.append(data)
    
    new_dataset = list()
    for answer in answers:
        for question in questions:
            if answer['question_id'] == question['question_id']:
                new_item = {
                    "instruction": question['turns'][0],
                    "output": answer['choices'][0]['turns'][0],
                    "original_category": question['category'],
                }
                new_dataset.append(new_item)
    
    print(model_name)
    print(new_dataset[0])
    print("-"*100)
    # save new dataset
    save_path = os.path.join(mt_bench_path, f"{model_name}.json")
    with open(save_path, "w") as f:
        json.dump(new_dataset, f)


# AlpacaEval

In [None]:
!cd raw_path
!git clone https://github.com/tatsu-lab/alpaca_eval.git

In [18]:
path = os.path.join(raw_path, "alpaca_eval", "results", "NullModel", "model_outputs.json")

# load jsonl
with open(path, "r") as f:
    data = json.load(f)


In [None]:
alpaca_path = os.path.join(data_path, "AlpacaEval")
os.makedirs(alpaca_path, exist_ok=True)

save_path = os.path.join(alpaca_path, "metadata.json")
metadata = list()
for item in data:
    metadata.append({
        "instruction": item["instruction"],
        "dataset": item["dataset"]
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)

# Show example element
metadata[0]

In [None]:
result_dir = os.path.join(raw_path, "alpaca_eval", "results")
dirs = os.listdir(result_dir)

for dir in dirs:
    path = os.path.join(result_dir, dir, "model_outputs.json")
    with open(path, "r") as f:
        data = json.load(f)
    new_dataset = list()
    for item in data:
        new_dataset.append({
            "instruction": item["instruction"],
            "output": item["output"]
        })
    print(dir)
    print(new_dataset[0])
    print("-"*100)
    
    save_path = os.path.join(alpaca_path, f"{dir}.json")
    with open(save_path, "w") as f:
        json.dump(new_dataset, f)

# ArenaHard 2.0

In [None]:
! git clone https://github.com/lmarena/arena-hard-auto.git

In [None]:
import json
import os

save_dir = os.path.join(raw_path, "arena-hard-auto", "data", "arena-hard-v2.0")

questions = list()
with open(os.path.join(save_dir, "question.jsonl"), "r") as f:
    for line in f:
        data = json.loads(line)
        questions.append(data)

# Example element
questions[0]

In [None]:
# save metadata
arena_hard_path = os.path.join(data_path, "ArenaHard")
os.makedirs(arena_hard_path, exist_ok=True)

save_path = os.path.join(arena_hard_path, "metadata.json")
metadata = list()
for question in questions:
    metadata.append({
        "instruction": question["prompt"],
        "original_category": question["category"],
        "original_subcategory": question["subcategory"],
        "uid": question["uid"],
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)

# Example element
print("Example element:")
metadata[0]




In [None]:
answers_dir = os.path.join(raw_path, "arena-hard-auto", "data", "arena-hard-v2.0", "model_answer")

files = os.listdir(answers_dir)
files
    

In [None]:
answers_dict = dict()

for file in files:
    model_name = file[:-6]
    path = os.path.join(answers_dir, file)

    answers = list()
    with open(path, "r") as f:
        for line in f:
            data = json.loads(line)
            answers.append(data)
    
    # show answers example
    print(answers[0])
    
    answers_dict[model_name] = answers

In [None]:
# show answers example
answers_dict["o3-mini-2025-01-31"][0]

In [None]:
# zip with uid
uid_to_question = dict()
for item in metadata:
    uid_to_question[item["uid"]] = item["instruction"]

for file in files:
    model_name = file[:-6]
    answers = answers_dict[model_name]
    print(model_name)
    print(len(answers))
    
    new_dataset = list()
    for answer in answers:
        item = {
            "instruction": uid_to_question[answer["uid"]],
            "output": answer['messages'][-1]['content']['answer']
        }
        new_dataset.append(item)
    
    print("Example Element:")
    print(new_dataset[0])
        
    save_path = os.path.join(arena_hard_path, f"{model_name}.json")
    with open(save_path, "w") as f:
        json.dump(new_dataset, f)



# Subjective Dataset:
1. yleo/emerton_dpo_pairs_judge
2. Intel/orca_dpo_pairs
3. jondurbin/py-dpo-v0.1
4. jondurbin/truthy-dpo-v0.1

In [None]:
from datasets import load_dataset
ds = load_dataset("yleo/emerton_dpo_pairs_judge")

In [None]:
emerton_dpo_path = os.path.join(data_path, "EmertonDPO")
os.makedirs(emerton_dpo_path, exist_ok=True)
save_path = os.path.join(emerton_dpo_path, "metadata.json")

metadata = list()
for item in ds['train']:
    metadata.append({
        "instruction": item['input'],
        "category": "subjective",
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

In [None]:
ds = load_dataset("Intel/orca_dpo_pairs")
ds

In [None]:
orca_dpo_path = os.path.join(data_path, "OrcaDPO")
os.makedirs(orca_dpo_path, exist_ok=True)
save_path = os.path.join(orca_dpo_path, "metadata.json")

metadata = list()
for item in ds['train']:
    metadata.append({
        "instruction": item['question'],
        "category": "subjective",
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

In [None]:
ds = load_dataset("jondurbin/py-dpo-v0.1")
ds

In [None]:
py_dpo_path = os.path.join(data_path, "PyDPO")
os.makedirs(py_dpo_path, exist_ok=True)
save_path = os.path.join(py_dpo_path, "metadata.json")

metadata = list()
for item in ds['train']:
    metadata.append({
        "instruction": item['prompt'],
        "category": "subjective",
        "id": item['id'],
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

In [None]:
ds = load_dataset("jondurbin/truthy-dpo-v0.1")
ds

In [None]:
truthy_dpo_path = os.path.join(data_path, "TruthyDPO")
os.makedirs(truthy_dpo_path, exist_ok=True)
save_path = os.path.join(truthy_dpo_path, "metadata.json")

metadata = list()
for item in ds['train']:
    metadata.append({
        "instruction": item['prompt'],
        "category": "subjective",
        "id": item['id'],
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

Objective Dataset: MMLU Pro

In [None]:
from datasets import load_dataset

# Login using e.g. `huggingface-cli login` to access this dataset
ds = load_dataset("TIGER-Lab/MMLU-Pro")
ds

In [None]:
mmlu_pro_validation_path = os.path.join(data_path, "MMLUPro", "validation")
os.makedirs(mmlu_pro_validation_path, exist_ok=True)
save_path = os.path.join(mmlu_pro_validation_path, "metadata.json")

metadata = list()
for item in ds['validation']:
    metadata.append({
        "instruction": item['question'],
        "answer": item['answer'],
        "category": item['category'],
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

In [None]:
mmlu_pro_test_path = os.path.join(data_path, "MMLUPro", "test")
os.makedirs(mmlu_pro_test_path, exist_ok=True)
save_path = os.path.join(mmlu_pro_test_path, "metadata.json")

metadata = list()
for item in ds['test']:
    metadata.append({
        "instruction": item['question'],
        "answer": item['answer'],
        "category": item['category'],
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

# MLR Bench

In [None]:
raw_data_path = os.path.join(raw_path, "mlrbench")
task_path = os.path.join(raw_data_path, "tasks")

os.listdir(task_path)

In [19]:
# task description

task_prompt_template = """You are an excellent machine learning researcher. You are given a task description of a research topic.
Please generate innovative and practical ideas and write a research paper based on the task description.

{task_description}
"""

In [None]:
mlrbench_path = os.path.join(data_path, "mlrbench")
os.makedirs(mlrbench_path, exist_ok=True)
save_path = os.path.join(mlrbench_path, "metadata.json")

metadata = list()
for file in os.listdir(task_path):
    # read the md file
    with open(os.path.join(task_path, file), "r") as f:
        data = f.read()
    metadata.append({
        "instruction": task_prompt_template.format(task_description=data),
        "workshop": f"{file[:-3]}",
        "category": "writing",
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

In [None]:
# get all the ready completion
answers_dir = os.path.join(raw_data_path, "agent_results")

answers_dirs = [file.replace("end2end_", "") for file in os.listdir(answers_dir) if file.startswith("end2end_")]
answers_dirs


In [None]:
# zip with uid
map_to_question = dict()
for item in metadata:
    map_to_question[item["workshop"]] = item["instruction"]

for answer_dir in answers_dirs:
    workshop_names = os.listdir(os.path.join(answers_dir, "end2end_"+answer_dir))
    # remove file and keep dir
    workshop_names = [name for name in workshop_names if os.path.isdir(os.path.join(answers_dir, "end2end_"+answer_dir, name))]

    new_dataset = list()
    for workshop_name in workshop_names:
        paper_path = os.path.join(answers_dir, "end2end_"+answer_dir, workshop_name, "results", "paper.md")
        with open(paper_path, "r") as f:
            paper = f.read()
        new_dataset.append({
            "instruction": map_to_question[workshop_name],
            "workshop": workshop_name,
            "output": paper
        })
    print("Example Element:")
    print(new_dataset[0])
        
    save_path = os.path.join(mlrbench_path, f"{answer_dir}.json")
    with open(save_path, "w") as f:
        json.dump(new_dataset, f)

In [26]:
# Get the score from the dataset
review_path = os.path.join(raw_path, "mlrbench", "agent_reviews")

claude_response_path = os.path.join(review_path, "subset_reviews_claude-3-7-sonnet-20250219")
gemini_response_path = os.path.join(review_path, "subset_reviews_gemini-2.5-pro-preview")
