In [15]:
import os
os.environ["HF_HOME"] = "/data2/xianglin/cache/huggingface"

In [16]:
raw_path = "/data2/xianglin/llm-as-a-judge-attack/raw"
data_path = "/data2/xianglin/llm-as-a-judge-attack/data"

# MT Bench

In [None]:
!cd raw_path
!git clone https://huggingface.co/spaces/lmsys/mt-bench

In [17]:
path = f"{raw_path}/mt-bench/data/mt_bench/question.jsonl"
import json

# load jsonl
questions = []
with open(path, "r") as f:
    for line in f:
        data = json.loads(line)
        questions.append(data)
questions[0]

{'question_id': 81,
 'category': 'writing',
 'turns': ['Compose an engaging travel blog post about a recent trip to Hawaii, highlighting cultural experiences and must-see attractions.',
  'Rewrite your previous response. Start every sentence with the letter A.']}

In [18]:
# save metadata to data
mt_bench_path = os.path.join(data_path, "MTBench")
os.makedirs(mt_bench_path, exist_ok=True)
save_path = os.path.join(mt_bench_path, "metadata.json")

metadata = list()
for question in questions:
    metadata.append({
        "question_id": question['question_id'],
        "instruction": question['turns'][0],
        "original_category": question['category'],
    })

# save metadata to data
with open(save_path, "w") as f:
    json.dump(metadata, f)
metadata[0]

{'question_id': 81,
 'instruction': 'Compose an engaging travel blog post about a recent trip to Hawaii, highlighting cultural experiences and must-see attractions.',
 'original_category': 'writing'}

In [19]:
# load model answer
answer_dir = os.path.join(raw_path, "mt-bench/data/mt_bench/model_answer")
files = os.listdir(answer_dir)

for file in files:
    model_name = file.split(".")[0]

    # load model answer
    path = os.path.join(answer_dir, file)
    answers = list()
    with open(path, "r") as f:
        for line in f:
            data = json.loads(line)
            answers.append(data)
    
    new_dataset = list()
    for answer in answers:
        for question in questions:
            if answer['question_id'] == question['question_id']:
                new_item = {
                    "instruction": question['turns'][0],
                    "output": answer['choices'][0]['turns'][0],
                    "original_category": question['category'],
                }
                new_dataset.append(new_item)
    # save new dataset
    save_path = os.path.join(mt_bench_path, f"{model_name}.json")
    with open(save_path, "w") as f:
        json.dump(new_dataset, f)


# AlpacaEval

In [8]:
!cd raw_path
!git clone https://github.com/tatsu-lab/alpaca_eval.git

Cloning into 'alpaca_eval'...
remote: Enumerating objects: 236, done.[K
remote: Counting objects: 100% (24/24), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 236 (delta 9), reused 0 (delta 0), pack-reused 212 (from 1)[K
Receiving objects: 100% (236/236), 3.37 MiB | 6.61 MiB/s, done.
Resolving deltas: 100% (114/114), done.


In [8]:
path = os.path.join(raw_path, "alpaca_eval/results/NullModel/model_outputs.json")

import json

# load jsonl
with open(path, "r") as f:
    data = json.load(f)


In [9]:
alpaca_path = os.path.join(data_path, "AlpacaEval")
os.makedirs(alpaca_path, exist_ok=True)

save_path = os.path.join(alpaca_path, "metadata.json")
metadata = list()
for item in data:
    metadata.append({
        "instruction": item["instruction"],
        "dataset": item["dataset"]
    })
with open(save_path, "w") as f:
    json.dump(metadata, f)



In [None]:
result_dir = os.path.join(raw_path, "alpaca_eval", "results")
dirs = os.listdir(result_dir)

for dir in dirs:
    path = os.path.join(result_dir, dir, "model_outputs.json")
    with open(path, "r") as f:
        data = json.load(f)
    new_dataset = list()
    for item in data:
        new_dataset.append({
            "instruction": item["instruction"],
            "output": item["output"]
        })
    save_path = os.path.join(alpaca_path, f"{dir}.json")
    with open(save_path, "w") as f:
        json.dump(new_dataset, f)

# ArenaHard 2.0

In [None]:
! git clone https://github.com/lmarena/arena-hard-auto.git

In [12]:
save_dir = os.path.join(raw_path, "arena-hard-auto", "data", "arena-hard-v2.0")

questions = list()
with open(os.path.join(save_dir, "question.jsonl"), "r") as f:
    for line in f:
        data = json.loads(line)
        questions.append(data)


In [13]:
arena_hard_path = os.path.join(data_path, "ArenaHard")
os.makedirs(arena_hard_path, exist_ok=True)

save_path = os.path.join(arena_hard_path, "metadata.json")
metadata = list()
for question in questions:
    metadata.append({
        "instruction": question
    })

with open(save_path, "w") as f:
    json.dump(metadata, f)


In [14]:
answers_dir = os.path.join(raw_path, "arena-hard-auto", "data", "arena-hard-v2.0", "model_answer")

files = os.listdir(answers_dir)

for file in files:
    model_name = file.split(".")[0]
    path = os.path.join(answers_dir, file)

    answers = list()
    with open(path, "r") as f:
        for line in f:
            data = json.loads(line)
            answers.append(data)

    new_dataset = list()
    for question, answer in zip(questions, answers):
        new_dataset.append({
            "instruction": question,
            "output": answer
        })
    save_path = os.path.join(arena_hard_path, f"{model_name}.json")
    with open(save_path, "w") as f:
        json.dump(new_dataset, f)