In [3]:
import json
from datetime import datetime
import uuid
import os

In [15]:
input_dir = "./delivery-17-jun"
output_file = "final_combined_workitems.json"

In [5]:
def format_instructions(instr):
    return json.dumps(instr["instructions"])

In [18]:

# Final structure
final_output = {
    "workitems": []
}

# Loop through all JSON files in input directory
for filename in os.listdir(input_dir):
    if filename.endswith(".json"):
        input_path = os.path.join(input_dir, filename)
        task_id = os.path.splitext(filename)[0]

        with open(input_path, "r") as f:
            data = json.load(f)

        # Build per-task structure
        task_item = {
            "workItemId": task_id,
            "workflow": "Verifiable Instruction Following",
            "locale": "en_US",
            "inputData": {
                "turnInputData": []
            },
            "metadata": {},
            task_id: [
                {
                    "data": {
                        "taskAnswers": [
                            {
                                "turnLevelOutput": [],
                                "language": "en_US",
                                "dialogue_length": str(data["dialogue_metadata"]["dialogue_length"]),
                                "task_type": data["dialogue_metadata"]["task_type"],
                                "task_difficulty": data["dialogue_metadata"]["task_difficulty"]
                            }
                        ]
                    },
                    "metadata": {
                        "taskId": task_id,
                        "operationType": "LABELLING",
                        "labelledTimestamp": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.0Z"),
                        "obfuscatedDaAlias": "Turing"
                    }
                }
            ]
        }

        # Handle turns and responses
        for turn in data["turns"]:
            responses = [
                {
                    "modelId": "Nova Premier",
                    "responseText": turn.get("response", ""),
                    "respondedByRole": "User",
                    "errorMessage": ""
                }
            ]

            if "nova_response" in turn:
                responses.append({
                    "modelId": "Nova Premier",
                    "responseText": turn["nova_response"],
                    "respondedByRole": "Bot",
                    "errorMessage": ""
                })

            if "4o_response" in turn:
                responses.append({
                    "modelId": "GPT-4o",
                    "responseText": turn["4o_response"],
                    "respondedByRole": "Bot",
                    "errorMessage": ""
                })

            if "deepseek_response" in turn:
                responses.append({
                    "modelId": "DeepSeek",
                    "responseText": turn["deepseek_response"],
                    "respondedByRole": "Bot",
                    "errorMessage": ""
                })

            if "mistral_response" in turn:
                responses.append({
                    "modelId": "Mistral",
                    "responseText": turn["mistral_response"],
                    "respondedByRole": "Bot",
                    "errorMessage": ""
                })

            turn_output = {
                "prompt-turn": {
                    "prompt": turn["prompt"],
                    "promptedByRole": "User",
                    "selectedResponseIndex": 1,
                    "responses": responses
                },
                "instructions": format_instructions(turn["instructions"]),
                "instruction_change": turn["instructions"].get("metadata", [])
            }

            task_item[task_id][0]["data"]["taskAnswers"][0]["turnLevelOutput"].append(turn_output)

        # Add the task item to the top-level workitems list
        final_output["workitems"].append(task_item)

# Write final combined JSON
with open(output_file, "w") as f:
    json.dump(final_output, f, indent=4)

print(f"✅ Final combined JSON written to {output_file}")

✅ Final combined JSON written to final_combined_workitems.json


  "labelledTimestamp": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.0Z"),
