In [None]:
import os
import json
import re

def get_models(dir_path):
    if not os.path.exists(dir_path):
        return []

    models = []
    for model_name in os.listdir(dir_path):
        meta_dir = os.path.join(dir_path, model_name, 'meta')
        results_dir = os.path.join(dir_path, model_name, 'generations')
        model_path = os.path.join(dir_path, model_name)

        model = {
            "name": model_name,
            "type": "model",
            "path": model_path.split('basement/')[1],
            "experiments": []
        }

        if os.path.exists(meta_dir):
            meta_files = [
                f for f in os.listdir(meta_dir) if f.endswith('.json')
            ]
            model["experiments"] = []
            for meta_file in meta_files:
                results_base_name = re.sub(r'_dp-\d+\.\d+', '', meta_file).replace('.json', '')
                meta_file_path = os.path.join(meta_dir, meta_file)
                results_file_path = os.path.join(results_dir, results_base_name + '.jsonl')

                experiment = {
                    "name": meta_file,
                    "type": "meta",
                    "meta": json.loads(open(meta_file_path, 'r').read()),
                    "path": meta_file_path.split('basement/')[1],
                    "results": {
                        "name": results_base_name,
                        "type": "results",
                        "path": results_file_path.split('basement/')[1]
                    } if os.path.exists(results_file_path) else None
                }
                model["experiments"].append(experiment)
        models.append(model)
    return models

def get_top_ks(dir_path):
    if not os.path.exists(dir_path):
        return []

    top_ks = []
    for top_k in os.listdir(dir_path):
        top_k_path = os.path.join(dir_path, top_k)
        models = get_models(top_k_path)
        top_ks.append({
            "name": top_k,
            "type": "top_k",
            "path": top_k_path.split('basement/')[1],
            "models": models
        })
    return top_ks

def get_splits(dir_path):
    if not os.path.exists(dir_path):
        return []

    splits = []
    for split in os.listdir(dir_path):
        split_path = os.path.join(dir_path, split)
        setups = get_setups(split_path)
        splits.append({
            "name": split,
            "type": "split",
            "path": split_path.split('basement/')[1],
            "setups": setups
        })
    return splits

def get_setups(dir_path):
    setups = []
    for setup in os.listdir(dir_path):
        setup_path = os.path.join(dir_path, setup)
        is_dense_setup = 'dense' in setup.lower()

        if is_dense_setup:
            dense_split_dir = os.listdir(setup_path)[0]
            dense_split_path = os.path.join(setup_path, dense_split_dir)
            dense_setups = []
            for dense_setup in os.listdir(setup_path):
                dense_setup_name = f"{setup}/{dense_setup}"
                dense_setup_path = os.path.join(setup_path, dense_setup)
                top_ks = get_top_ks(dense_setup_path)
                dense_setups.append({
                    "name": dense_setup_name,
                    "type": "top_k",
                    "dense": True,
                    "top_ks": top_ks
                })
            setups.extend(dense_setups)
        else:
            top_ks = get_top_ks(setup_path)
            setups.append({
                "name": setup,
                "type": "top_k",
                "path": setup_path.split('basement/')[1],
                "dense": False,
                "top_ks": top_ks
            })
    return setups

def parse_data_dir(root_dir):
    datasets = []
    for dataset in os.listdir(root_dir):
        if 'git' in  dataset or 'experiments' in dataset:
            continue
        dataset_path = os.path.join(root_dir, dataset)
        splits = get_splits(dataset_path)
        datasets.append({
            "name": dataset,
            "type": "dataset",
            "path": dataset_path.split('basement/')[1],
            "splits": splits
        })
    return datasets

if __name__ == "__main__":
    data_dir = "./basement"
    datasets = parse_data_dir(data_dir)

    with open('./basement/experiments.json', 'w') as f:
        json.dump(datasets, f, indent=2)

    print("Experiment structure saved to experiments.json")