*Created On: Feb 10, 2026*
## Description
To prototype a `EHRFoundationalModelMIMIC4(BaseTask)` class, focusing first on clinical discharge notes + radiology notes pre-processor

In [None]:
# Change directory to package root
import os
PROJECT_ROOT = '/Users/wpang/Desktop/PyHealth'
os.chdir(PROJECT_ROOT)

# Other General Packages
from datetime import datetime
from typing import Any, Dict, List, Optional

In [None]:
# PyHealth Packages
from pyhealth.datasets import MIMIC4Dataset
from pyhealth.tasks import MultimodalMortalityPredictionMIMIC4

# Will's Contribution Utilities
from will_contribution 

In [None]:
import polars as pl

pl.Config.set_tbl_rows(1000)
pl.Config.set_tbl_cols(100)
pl.Config.set_fmt_str_lengths(1000)

In [None]:
# Paths
EHR_ROOT = os.path.join(PROJECT_ROOT, "srv/local/data/physionet.org/files/mimiciv/2.2")
NOTE_ROOT = os.path.join(PROJECT_ROOT, "srv/local/data/physionet.org/files/mimic-iv-note/2.2")
CXR_ROOT = os.path.join(PROJECT_ROOT,"srv/local/data/physionet.org/files/mimic-cxr-jpg/2.0.0")
CACHE_DIR = os.path.join(PROJECT_ROOT,"srv/local/data/wp/pyhealth_cache")

In [None]:
class EHRFoundationalModelMIMIC4:
    def __init__(self):
        """Initialize the EHR Foundational Model task."""
        self.input_schema: Dict[str, str] = {
            "discharge": "raw",
            "radiology": "raw"
        }
        self.output_schema: Dict[str, str] = {"mortality": "binary"}

    def _clean_text(self, text: Optional[str]) -> Optional[str]:
        """Return text if non-empty, otherwise None."""
        return text if text else None

    def __call__(self, patient: Any) -> List[Dict[str, Any]]:
          all_discharge_notes = []
          all_radiology_notes = []

          # Get all discharge notes for this patient
          discharge_notes = patient.get_events(event_type="discharge")
          for note in discharge_notes:
              try:
                  note_text = self._clean_text(note.text)
                  if note_text:
                      all_discharge_notes.append(note_text)
              except AttributeError:
                  pass

          # Get all radiology notes for this patient
          radiology_notes = patient.get_events(event_type="radiology")
          for note in radiology_notes:
              try:
                  note_text = self._clean_text(note.text)
                  if note_text:
                      all_radiology_notes.append(note_text)
              except AttributeError:
                  pass

          # Require at least one note of either type
          if len(all_discharge_notes) == 0 and len(all_radiology_notes) == 0:
              return []

          return [
              {
                  "patient_id": patient.patient_id,
                  "discharge": all_discharge_notes,
                  "radiology": all_radiology_notes
              }
          ]