In [None]:
import os
import json
import glob
import pandas as pd
from datasets import Dataset, DatasetDict, Features, Value, ClassLabel, load_dataset
from huggingface_hub import HfApi, create_repo

# Function to load JSON/JSONL files
def load_json_file(file_path):
    if file_path.endswith('.json'):
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    elif file_path.endswith('.jsonl'):
        data = []
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                data.append(json.loads(line.strip()))
        return data
    else:
        raise ValueError(f"Unsupported file format: {file_path}")

# Get all test_hard files
dataset_map = {
    'medqa': 'MedQA',
    'pubmedqa': 'PubMedQA', 
    'medmcqa': 'MedMCQA',
    'medbullets': 'MedBullets',
    'mmlu': 'MMLU',
    'mmlu-pro': 'MMLU-Pro',
    'afrimedqa': 'AfrimedQA',
    'medexqa': 'MedExQA',
    'medxpertqa-r': 'MedXpertQA-R',
    'medxpertqa-u': 'MedXpertQA-U',
}


# Create a dataset dictionary to store all subsets
datasets = {}

# Find all hard files
hard_files = []
for dataset_key, dataset_name in dataset_map.items():
    dataset_dict = DatasetDict()
    file_path = f"{dataset_key}/test_hard.jsonl"
    data = load_json_file(file_path)
    dataset = Dataset.from_pandas(pd.DataFrame(data))
    dataset_dict['test_hard'] = dataset
    
    file_path = f"{dataset_key}/test.jsonl"
    data = load_json_file(file_path)
    dataset = Dataset.from_pandas(pd.DataFrame(data))
    dataset_dict['test'] = dataset
    datasets[dataset_name] = dataset_dict

repo_name = "medagents-benchmark"
# Push to hub with config names for each su set
for dataset_name, dataset_dict in datasets.items():
    dataset_dict.push_to_hub(
        repo_id=repo_name,
        config_name=dataset_name,
        private=False
    )
    print(f"Uploaded {dataset_name} configuration")

Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 204.91ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  2.30it/s]
Creating parquet from Arrow format: 100%|██████████| 2/2 [00:00<00:00, 60.72ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.13s/it]


Uploaded MedQA configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 572.99ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  3.23it/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 199.14ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  4.66it/s]


Uploaded PubMedQA configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1105.22ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  3.34it/s]
Creating parquet from Arrow format: 100%|██████████| 3/3 [00:00<00:00, 424.57ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  1.55it/s]


Uploaded MedMCQA configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 356.93ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  2.55it/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 141.64ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  3.98it/s]


Uploaded MedBullets configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 751.67ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  3.52it/s]
Creating parquet from Arrow format: 100%|██████████| 2/2 [00:00<00:00, 468.45ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  2.74it/s]


Uploaded MMLU configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 240.55ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  3.21it/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 186.95ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  3.19it/s]


Uploaded MMLU-Pro configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 628.83ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  3.21it/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 619.45ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  3.67it/s]


Uploaded AfrimedQA configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 380.33ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:02<00:00,  2.34s/it]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 275.58ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:05<00:00,  5.26s/it]


Uploaded MedExQA configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 512.94ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  2.99it/s]
Creating parquet from Arrow format: 100%|██████████| 2/2 [00:00<00:00, 51.59ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  2.69it/s]


Uploaded MedXpertQA-R configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 579.40ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  3.76it/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 120.54ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  2.39it/s]


Uploaded MedXpertQA-U configuration
