In [4]:
import os
from utils import EXP_ROOT, load_jsonl

qa_root = os.path.join(EXP_ROOT, "datasets/kg-datasets/ja-0.5/eval_qa/03_en_qa")
v1_file = os.path.join(qa_root, f"ja_responses.v1.jsonl")
rerun_file = os.path.join(qa_root, f"ja_responses.rerun.jsonl")

v1_responses = load_jsonl(v1_file)
rerun_responses = load_jsonl(rerun_file)
rerun_ids = {response['request_id']:response for response in rerun_responses}

In [10]:
# Merge reruned and original requests
from re import escape
from utils import dump_jsonl

v2_responses = []
for i, request in enumerate(v1_responses):
    request_id = request['request_id']
    if request_id in rerun_ids:
        v2_responses.append(rerun_ids[request_id])
    else:
        v2_responses.append(request)
dump_jsonl(v2_responses, os.path.join(qa_root, f"ja_responses.jsonl"))

In [6]:
import os
import pandas as pd

df = pd.read_csv("./annotation/ja_evaluation.v2.csv", index_col=0)

In [12]:
filtered_responses = [response for response in v2_responses if response['request_id'] not in df.index]

In [16]:
import random
random.shuffle(filtered_responses)

toadd_responses = filtered_responses[:5]

In [19]:
import csv
import random

# Define the output CSV file
output_path = "./annotation/ja_evaluation.v2.csv"

# Define the header
header = [
    "request_id", "sentence", "triple", "in-lang", "cross-lang", "answer", "prompt",
    "faithfulness", "factuality", "paraphrase", "fluency",
    "equivalence", "diversity",
    "distractor_1", "d1_plausibility", "d1_compatibility", "d1_incorrectness",
    "distractor_2", "d2_plausibility", "d2_compatibility", "d2_incorrectness",
    "distractor_3", "d3_plausibility", "d3_compatibility", "d3_incorrectness"
]

# Open CSV for writing
with open(output_path, mode="a", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=header)
    writer.writeheader()

    for idx, response in enumerate(toadd_responses):
        generation = response["generation"]
        triple = response['metadata']['input']['triple']
        sentence = response['metadata']['sentence']
        prompt = generation["prompt"]
        paraphrase = generation["question"]
        correct_ans = triple['object']
        distractors = generation["distractors"]

        # Shuffle for display, but keep distractors labeled
        all_options = [correct_ans] + distractors
        random.shuffle(all_options)

        row = {
            "request_id": response['request_id'],
            "sentence": sentence.replace("\n", " "),
            "triple": str(triple),
            "in-lang": "",
            "cross-lang": "",
            "answer": correct_ans,
            "prompt": prompt,
            "faithfulness": "", 
            "factuality": "", 
            "paraphrase": paraphrase,
            "fluency": "",
            "equivalence": "",
            "diversity": "",
            "distractor_1": distractors[0],
            "d1_plausibility": "", "d1_compatibility": "", "d1_incorrectness": "",
            "distractor_2": distractors[1],
            "d2_plausibility": "", "d2_compatibility": "", "d2_incorrectness": "",
            "distractor_3": distractors[2],
            "d3_plausibility": "", "d3_compatibility": "", "d3_incorrectness": "",
        }

        writer.writerow(row)

print(f"✅ CSV file written to {output_path}")

✅ CSV file written to ./annotation/ja_evaluation.v2.csv
