In [1]:
import os
import json
import glob
import pandas as pd
from datasets import Dataset, DatasetDict, Features, Value, ClassLabel, load_dataset
from huggingface_hub import HfApi, create_repo

# Function to load JSON/JSONL files
def load_json_file(file_path):
    if file_path.endswith('.json'):
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    elif file_path.endswith('.jsonl'):
        data = []
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                data.append(json.loads(line.strip()))
        return data
    else:
        raise ValueError(f"Unsupported file format: {file_path}")

# Get all test_hard files
dataset_map = {
    'medqa': 'MedQA',
    'pubmedqa': 'PubMedQA', 
    'medmcqa': 'MedMCQA',
    'medbullets': 'MedBullets',
    'mmlu': 'MMLU',
    'mmlu-pro': 'MMLU-Pro',
    'afrimedqa': 'AfrimedQA',
    'medexqa': 'MedExQA',
    'medxpertqa-r': 'MedXpertQA-R',
    'medxpertqa-u': 'MedXpertQA-U',
}


# Create a dataset dictionary to store all subsets
datasets = {}

# Find all hard files
hard_files = []
for dataset_key, dataset_name in dataset_map.items():
    dataset_dict = DatasetDict()
    file_path = f"{dataset_key}/test_hard.jsonl"
    data = load_json_file(file_path)
    dataset = Dataset.from_pandas(pd.DataFrame(data))
    dataset_dict['test_hard'] = dataset
    
    file_path = f"{dataset_key}/test.jsonl"
    data = load_json_file(file_path)
    dataset = Dataset.from_pandas(pd.DataFrame(data))
    dataset_dict['test'] = dataset
    datasets[dataset_name] = dataset_dict

repo_name = "medagents-benchmark"
# Push to hub with config names for each su set
for dataset_name, dataset_dict in datasets.items():
    dataset_dict.push_to_hub(
        repo_id=repo_name,
        config_name=dataset_name,
        private=False
    )
    print(f"Uploaded {dataset_name} configuration")

  from .autonotebook import tqdm as notebook_tqdm
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 71.66ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  6.51it/s]
Creating parquet from Arrow format: 100%|██████████| 2/2 [00:00<00:00, 79.48ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  1.77it/s]


Uploaded MedQA configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 582.06ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  9.47it/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 131.98ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00, 12.10it/s]


Uploaded PubMedQA configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1063.46ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  4.63it/s]
Creating parquet from Arrow format: 100%|██████████| 3/3 [00:00<00:00, 445.97ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00, 10.61it/s]


Uploaded MedMCQA configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 362.05ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  4.13it/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 53.00ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  9.61it/s]
No files have been modified since last commit. Skipping to prevent empty commit.


Uploaded MedBullets configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 751.13ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00, 11.98it/s]
Creating parquet from Arrow format: 100%|██████████| 2/2 [00:00<00:00, 272.82ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00, 12.85it/s]
No files have been modified since last commit. Skipping to prevent empty commit.


Uploaded MMLU configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 666.19ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00, 11.17it/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 177.62ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00, 12.97it/s]
No files have been modified since last commit. Skipping to prevent empty commit.


Uploaded MMLU-Pro configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 590.41ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  8.18it/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 627.51ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00, 15.24it/s]
No files have been modified since last commit. Skipping to prevent empty commit.


Uploaded AfrimedQA configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 842.91ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00, 13.67it/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 383.22ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  5.29it/s]
No files have been modified since last commit. Skipping to prevent empty commit.


Uploaded MedExQA configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 465.72ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00, 15.18it/s]
Creating parquet from Arrow format: 100%|██████████| 2/2 [00:00<00:00, 34.99ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  6.30it/s]


Uploaded MedXpertQA-R configuration


Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 298.63ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00, 11.44it/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 96.12ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  9.82it/s]


Uploaded MedXpertQA-U configuration


In [3]:
dataset_data = []
for dataset_key, dataset_name in dataset_map.items():
    for item in datasets[dataset_name]['test_hard']:
        # Format the data in the required format
        formatted_item = {
            "question": item["question"],
            "answer": item["answer_idx"]
        }
        
        # Add options if they exist
        if "options" in item:
            for key, value in item["options"].items():
                formatted_item[key] = value

        dataset_data.append(formatted_item)
    
# Save to a JSONL file
with open(f"full_hard_dataset.jsonl", "w", encoding="utf-8") as f:
    for item in dataset_data:
        f.write(json.dumps(item) + "\n")

print(f"Saved {len(dataset_data)} items to full_hard_dataset.jsonl")

Saved 894 items to full_hard_dataset.jsonl
