#### Create TrainSet_Full.json and TestSet_Full.json files:
Convert the dstc8-schema-guided-dialogue-master/train/dialogue_xxx and dstc8-schema-guided-dialogue-master/test/ dialogue files into a single TrainSet_Full and TestSet_Full.json files, similar in structure to the one provided by VUB.

**Note**: *Not used yet, but might come in handy in the future*

In [None]:
import json
import os
import glob

def convert_json_files(input_dir, output_file):
    """
    Convert individual dialogue JSON files to a single file with the same structure as files in output_dir.
    
    Args:
        input_dir: Directory containing individual dialogue JSON files
        output_file: Path to output file
    """
    # Get all JSON files in the input directory
    json_files = glob.glob(os.path.join(input_dir, "**/*.json"), recursive=True)
    print(f"Found {len(json_files)} JSON files in {input_dir}")
    
    # Initialize the output data
    output_data = []
    
    # Process each JSON file
    for file_path in json_files:
        with open(file_path, 'r') as f:
            dialogues = json.load(f)
            
        for dialogue in dialogues:
            # Extract information
            dialogue_data = {
                "dialogue_id": dialogue.get("dialogue_id", ""),
                "service": dialogue.get("services", [""])[0],  # Take the first service
                "turns": []
            }
            
            for turn in dialogue.get("turns", []):
                speaker = turn.get("speaker", "")
                utterance = turn.get("utterance", "")
                
                # Extract dialogue acts
                dialogue_acts = []
                slots = []
                
                # Process frames to get acts and slots
                for frame in turn.get("frames", []):
                    # Extract actions/acts
                    for action in frame.get("actions", []):
                        act = action.get("act", "")
                        
                        # Ensure the act prefix matches the speaker
                        prefix = "USER_" if speaker == "USER" else "SYSTEM_"
                        mapped_act = f"{prefix}{act}"
                        
                        if mapped_act not in dialogue_acts:
                            dialogue_acts.append(mapped_act)
                    
                    # Extract slots
                    for slot_info in frame.get("slots", []):
                        slot = slot_info.get("slot", "")
                        if slot and slot not in slots:
                            slots.append(slot)
                    
                    # Add slots from actions
                    for action in frame.get("actions", []):
                        slot = action.get("slot", "")
                        if slot and slot not in slots and slot != "intent":
                            slots.append(slot)
                    
                    # Get slots from state if it exists (user turns)
                    if speaker == "USER" and "state" in frame:
                        # Add requested slots
                        for slot in frame["state"].get("requested_slots", []):
                            if slot and slot not in slots:
                                slots.append(slot)
                        
                        # Add slots from slot_values
                        for slot in frame["state"].get("slot_values", {}):
                            if slot and slot not in slots:
                                slots.append(slot)
                
                # Combine dialogue acts with pipes
                dialogue_act = "|".join(dialogue_acts) if dialogue_acts else ""
                slot_str = " & ".join(slots) if slots else ""
                
                turn_data = {
                    "index": turn.get("index", 0) if "index" in turn else len(dialogue_data["turns"]),
                    "speaker": speaker,
                    "utterance": utterance,
                    "dialogue_act": dialogue_act,
                    "slot": slot_str
                }
                dialogue_data["turns"].append(turn_data)
                
            output_data.append(dialogue_data)
    
    # Write the output data to file
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    with open(output_file, 'w') as f:
        json.dump(output_data, f, indent=2)
    
    print(f"Converted {len(output_data)} dialogues to {output_file}")

# Convert train files
convert_json_files(
    "dstc8-schema-guided-dialogue-master/train/",
    "SGD Dataset/TrainSet_Full.json"
)

# Convert test files
convert_json_files(
    "dstc8-schema-guided-dialogue-master/test/",
    "SGD Dataset/TestSet_Full.json"
)

Found 128 JSON files in dstc8-schema-guided-dialogue-master/train/
Converted 16168 dialogues to SGD Dataset/TrainSet_Full.json
Found 35 JSON files in dstc8-schema-guided-dialogue-master/test/
Converted 4222 dialogues to SGD Dataset/TestSet_Full.json
