In [None]:
from pyhealth.datasets import MIMIC3Dataset

root = "https://storage.googleapis.com/pyhealth/Synthetic_MIMIC-III"
dataset = MIMIC3Dataset(
    root=root,
    dataset_name="mimic3",
    tables=[
        "diagnoses_icd",
        "procedures_icd",
        "noteevents"
    ]
)

In [None]:
from pyhealth.tasks import MIMIC3ICD9Coding

mimic3_coding = MIMIC3ICD9Coding()
samples = dataset.set_task(mimic3_coding)
# Print sample information
print(f"Total samples generated: {len(samples)}")
if len(samples) > 0:
    print("First sample:")
    print(f"  - Text length: {len(samples[0]['text'])} characters")
    print(f"  - Number of ICD codes: {len(samples[0]['icd_codes'])}")
    if len(samples[0]['icd_codes']) > 0:
        print(f"  - Sample ICD codes: {samples[0]['icd_codes'][:5] if len(samples[0]['icd_codes']) > 5 else samples[0]['icd_codes']}")


In [None]:
from pyhealth.datasets import split_by_sample


train_dataset, val_dataset, test_dataset = split_by_sample(
    dataset=samples,
    ratios=[0.7, 0.1, 0.2]
)

In [None]:
from pyhealth.datasets import get_dataloader


train_dataloader = get_dataloader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = get_dataloader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = get_dataloader(test_dataset, batch_size=32, shuffle=False)

In [None]:
from pyhealth.models import TransformersModel

model = TransformersModel(
    model_name="google-bert/bert-base-uncased",
    dataset=samples,
)
model.mode