## Description
To prototype a `EHRFoundationalModelMIMIC4(BaseTask)` class, focusing first on clinical discharge notes + radiology notes pre-processor

### Setup

In [1]:
# Change directory to package root
import os
PROJECT_ROOT = '/Users/wpang/Desktop/PyHealth'
os.chdir(PROJECT_ROOT)

# Other General Packages
from datetime import datetime
from typing import Any, Dict, List, Optional

In [2]:
# PyHealth Packages
from pyhealth.datasets import MIMIC4Dataset
from pyhealth.tasks import MultimodalMortalityPredictionMIMIC4

In [3]:
# Paths
EHR_ROOT = os.path.join(PROJECT_ROOT, "srv/local/data/physionet.org/files/mimiciv/2.2")
NOTE_ROOT = os.path.join(PROJECT_ROOT, "srv/local/data/physionet.org/files/mimic-iv-note/2.2")
CXR_ROOT = os.path.join(PROJECT_ROOT,"srv/local/data/physionet.org/files/mimic-cxr-jpg/2.0.0")
CACHE_DIR = os.path.join(PROJECT_ROOT,"srv/local/data/wp/pyhealth_cache")

### Utilities

In [4]:
import shutil
def delete_cache(cache_directory):
    for item in os.listdir(cache_directory):
        item_path = os.path.join(cache_directory, item)
        if os.path.isfile(item_path) or os.path.islink(item_path):
            os.unlink(item_path)
        elif os.path.isdir(item_path):
            shutil.rmtree(item_path)

    print(f"Cache deleted successfully from: {cache_directory}")

delete_cache(CACHE_DIR)

Cache deleted successfully from: /Users/wpang/Desktop/PyHealth/srv/local/data/wp/pyhealth_cache


### Code

In [5]:
dataset = MIMIC4Dataset(
        ehr_root=EHR_ROOT,
        note_root=NOTE_ROOT,
        cxr_root=CXR_ROOT,
        ehr_tables=["diagnoses_icd", "procedures_icd",
                "prescriptions", "labevents"],
        note_tables=["discharge", "radiology"],
        cxr_tables=["metadata", "negbio"],
        cache_dir=CACHE_DIR,
        num_workers=16
    )

Memory usage Starting MIMIC4Dataset init: 446.6 MB
Initializing mimic4 dataset from /Users/wpang/Desktop/PyHealth/srv/local/data/physionet.org/files/mimiciv/2.2|/Users/wpang/Desktop/PyHealth/srv/local/data/physionet.org/files/mimic-iv-note/2.2|/Users/wpang/Desktop/PyHealth/srv/local/data/physionet.org/files/mimic-cxr-jpg/2.0.0 (dev mode: False)
Initializing MIMIC4EHRDataset with tables: ['diagnoses_icd', 'procedures_icd', 'prescriptions', 'labevents'] (dev mode: False)
Using default EHR config: /Users/wpang/Desktop/PyHealth/pyhealth/datasets/configs/mimic4_ehr.yaml
Memory usage Before initializing mimic4_ehr: 446.6 MB
Initializing mimic4_ehr dataset from /Users/wpang/Desktop/PyHealth/srv/local/data/physionet.org/files/mimiciv/2.2 (dev mode: False)
Memory usage After initializing mimic4_ehr: 446.9 MB
Memory usage After EHR dataset initialization: 446.9 MB
Initializing MIMIC4NoteDataset with tables: ['discharge', 'radiology'] (dev mode: False)
Using default note config: /Users/wpang/Desk



Memory usage Before initializing mimic4_cxr: 694.2 MB
Initializing mimic4_cxr dataset from /Users/wpang/Desktop/PyHealth/srv/local/data/physionet.org/files/mimic-cxr-jpg/2.0.0 (dev mode: False)
Memory usage After initializing mimic4_cxr: 693.2 MB
Memory usage After CXR dataset initialization: 693.2 MB
Memory usage Completed MIMIC4Dataset init: 693.2 MB


In [None]:
# Apply multimodal task
task = MultimodalMortalityPredictionMIMIC4()
samples = dataset.set_task(task)

Setting task MultimodalMortalityPredictionMIMIC4 for mimic4 base dataset...
Applying task transformations on data with 16 workers...
Combining data from ehr dataset
Scanning table: diagnoses_icd from /Users/wpang/Desktop/PyHealth/srv/local/data/physionet.org/files/mimiciv/2.2/hosp/diagnoses_icd.csv.gz
Joining with table: /Users/wpang/Desktop/PyHealth/srv/local/data/physionet.org/files/mimiciv/2.2/hosp/admissions.csv.gz
Scanning table: procedures_icd from /Users/wpang/Desktop/PyHealth/srv/local/data/physionet.org/files/mimiciv/2.2/hosp/procedures_icd.csv.gz
Joining with table: /Users/wpang/Desktop/PyHealth/srv/local/data/physionet.org/files/mimiciv/2.2/hosp/admissions.csv.gz
Scanning table: prescriptions from /Users/wpang/Desktop/PyHealth/srv/local/data/physionet.org/files/mimiciv/2.2/hosp/prescriptions.csv.gz
Scanning table: labevents from /Users/wpang/Desktop/PyHealth/srv/local/data/physionet.org/files/mimiciv/2.2/hosp/labevents.csv.gz
Joining with table: /Users/wpang/Desktop/PyHealth

In [None]:
print("Done!")