# MIMIC mortality (unsupervised)

This notebook reproduces the unsupervised SUAVE mortality analysis with Optuna-based hyperparameter tuning.

In [1]:

import sys
import json
from pathlib import Path
import time
from typing import Dict, List, Mapping, Optional, Tuple
from IPython.display import Markdown, display

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

EXAMPLES_DIR = Path().resolve()
if not EXAMPLES_DIR.exists():
    raise RuntimeError("Run this notebook from the repository root so 'examples' is available.")
if str(EXAMPLES_DIR) not in sys.path:
    sys.path.insert(0, str(EXAMPLES_DIR))

from mimic_mortality_utils import (
    RANDOM_STATE,
    TARGET_COLUMNS,
    CALIBRATION_SIZE,
    VALIDATION_SIZE,
    Schema,
    define_schema,
    
    compute_auc,
    define_schema,
    format_float,
    kolmogorov_smirnov_statistic,
    load_dataset,
    mutual_information_feature,
    prepare_features,
    rbf_mmd,
    schema_markdown_table,
    split_train_validation_calibration,
    to_numeric_frame,
)

from suave import SUAVE
from suave.evaluate import (
    evaluate_tstr,
    evaluate_trtr,
    simple_membership_inference,
)

try:
    import optuna
except ImportError as exc:  # pragma: no cover - optuna provided via requirements
    raise RuntimeError(
        "Optuna is required for the mortality analysis. Install it via 'pip install optuna'."
    ) from exc


In [2]:

# Configuration
analysis_config = {
    "optuna_trials": 60,
    "optuna_timeout": 3600*48,
    "optuna_study_prefix": "unsupervised",
    "optuna_storage": None,
    "output_dir_name": "analysis_outputs_unsupervised",
}


In [None]:

DATA_DIR = (EXAMPLES_DIR / "data" / "sepsis_mortality_dataset").resolve()
OUTPUT_DIR = EXAMPLES_DIR / analysis_config["output_dir_name"]
OUTPUT_DIR.mkdir(exist_ok=True)
analysis_config['optuna_storage'] = f'sqlite:///{OUTPUT_DIR}/{analysis_config["optuna_study_prefix"]}_optuna.db'

train_df = load_dataset(DATA_DIR / "mimic-mortality-train.tsv")
test_df = load_dataset(DATA_DIR / "mimic-mortality-test.tsv")
external_df = load_dataset(DATA_DIR / "eicu-mortality-external_val.tsv")

FEATURE_COLUMNS = [column for column in train_df.columns if column not in [*TARGET_COLUMNS, 'PaO2']]
# FEATURE_COLUMNS = [column for column in train_df.columns if column not in TARGET_COLUMNS]
schema = define_schema(train_df, FEATURE_COLUMNS)

# manual schema correction
schema.update({'BMI':{'type': 'real'}})
schema.update({'Respiratory_Support':{'type': 'ordinal', 'n_classes': 5}})
schema.update({'LYM%':{'type': 'real'}})

schema_table = schema_markdown_table(schema)
display(Markdown(schema_table))

[schema] Column 'age' flagged for review: Integer feature near categorical threshold.
[schema] Column 'PaO2' flagged for review: Continuous feature near categorical ratio boundary.
[schema] Column 'PaO2/FiO2' flagged for review: Positive skew close to threshold.


| Column | Type | n_classes | y_dim |
| --- | --- | --- | --- |
| age | real |  |  |
| sex | cat | 2 |  |
| BMI | real |  |  |
| temperature | real |  |  |
| heart_rate | real |  |  |
| respir_rate | real |  |  |
| SBP | real |  |  |
| DBP | real |  |  |
| MAP | real |  |  |
| SOFA_cns | ordinal | 5 |  |
| CRRT | cat | 2 |  |
| Respiratory_Support | ordinal | 5 |  |
| WBC | pos |  |  |
| Hb | real |  |  |
| NE% | real |  |  |
| LYM% | real |  |  |
| PLT | pos |  |  |
| ALT | pos |  |  |
| AST | pos |  |  |
| STB | pos |  |  |
| BUN | pos |  |  |
| Scr | pos |  |  |
| Glu | pos |  |  |
| K+ | real |  |  |
| Na+ | real |  |  |
| Fg | pos |  |  |
| PT | pos |  |  |
| APTT | pos |  |  |
| PH | real |  |  |
| PaO2 | real |  |  |
| PaO2/FiO2 | pos |  |  |
| PaCO2 | pos |  |  |
| HCO3- | real |  |  |
| Lac | pos |  |  |

In [4]:

def make_latent_classifier() -> Pipeline:
    """Return the logistic regression pipeline used on latent representations."""

    return Pipeline(
        [
            ("scaler", StandardScaler()),
            ("classifier", LogisticRegression(max_iter=1000)),
        ]
    )


def make_logistic_pipeline() -> Pipeline:
    """Factory for the baseline classifier used in TSTR/TRTR."""

    return Pipeline(
        [
            ("scaler", StandardScaler()),
            ("classifier", LogisticRegression(max_iter=200)),
        ]
    )


def run_optuna_search(
    X_train: pd.DataFrame,
    y_train: pd.Series,
    X_validation: pd.DataFrame,
    y_validation: pd.Series,
    schema: Schema,
    *,
    random_state: int,
    n_trials: Optional[int],
    timeout: Optional[int],
    study_name: Optional[str] = None,
    storage: Optional[str] = None,
) -> tuple["optuna.study.Study", Dict[str, object]]:
    """Perform Optuna hyperparameter optimisation for unsupervised :class:`SUAVE`."""

    hidden_dimension_options: Dict[str, Tuple[int, int]] = {
        "compact": (128, 64),
        "balanced": (256, 128),
        "widened": (384, 192),
        "extended": (512, 256),
    }

    if n_trials is not None and n_trials <= 0:
        n_trials = None
    if timeout is not None and timeout <= 0:
        timeout = None

    rng = np.random.default_rng(random_state)

    def objective(trial: "optuna.trial.Trial") -> float:
        latent_dim = trial.suggest_categorical("latent_dim", [8, 16, 32, 64, 128])
        hidden_key = trial.suggest_categorical("hidden_dims", list(hidden_dimension_options.keys()))
        dropout = trial.suggest_float("dropout", 0.0, 0.5)
        learning_rate = trial.suggest_float("learning_rate", 5e-5, 2e-2, log=True)
        batch_size = trial.suggest_categorical("batch_size", [64, 128, 256, 512, 1024])
        beta = trial.suggest_float("beta", 0.25, 4.0)
        kl_warmup_epochs = trial.suggest_int("kl_warmup_epochs", 2, 25)
        warmup_epochs = trial.suggest_int("warmup_epochs", 10, 60)
        n_components = trial.suggest_int("n_components", 1, 8)
        tau_start = trial.suggest_float("tau_start", 0.5, 5.0)
        tau_min = trial.suggest_float("tau_min", 1e-4, 0.5, log=True)
        tau_decay = trial.suggest_float("tau_decay", 1e-4, 0.1, log=True)

        model = SUAVE(
            schema=schema,
            behaviour="unsupervised",
            latent_dim=latent_dim,
            hidden_dims=hidden_dimension_options[hidden_key],
            dropout=dropout,
            learning_rate=learning_rate,
            batch_size=batch_size,
            beta=beta,
            n_components=n_components,
            tau_start=tau_start,
            tau_min=tau_min,
            tau_decay=tau_decay,
            random_state=random_state,
        )

        start_time = time.perf_counter()
        model.fit(
            X_train,
            warmup_epochs=warmup_epochs,
            kl_warmup_epochs=kl_warmup_epochs,
        )
        fit_seconds = time.perf_counter() - start_time

        latent_classifier = make_latent_classifier()
        train_latents = model.encode(X_train)
        val_latents = model.encode(X_validation)

        if train_latents.size == 0 or val_latents.size == 0:
            raise optuna.exceptions.TrialPruned("Empty latent representations")

        if np.unique(y_train).size < 2 or np.unique(y_validation).size < 2:
            raise optuna.exceptions.TrialPruned("Insufficient class diversity")

        latent_classifier.fit(train_latents, np.asarray(y_train))
        val_probs = latent_classifier.predict_proba(val_latents)
        val_auc = compute_auc(val_probs, y_validation)
        if not np.isfinite(val_auc):
            raise optuna.exceptions.TrialPruned("Non-finite validation AUC")

        numeric_train = to_numeric_frame(X_train)
        numeric_val = to_numeric_frame(X_validation)
        train_means = numeric_train.mean(axis=0)
        train_means = train_means.fillna(0.0)
        numeric_train = numeric_train.fillna(train_means)
        numeric_val = numeric_val.fillna(train_means)

        try:
            synthetic_features = model.sample(len(X_train))
        except Exception as exc:
            raise optuna.exceptions.TrialPruned(f"Sampling failed: {exc}") from exc
        if not isinstance(synthetic_features, pd.DataFrame):
            synthetic_features = pd.DataFrame(synthetic_features, columns=X_train.columns)
        synthetic_features = synthetic_features.reindex(columns=X_train.columns)
        numeric_synth = to_numeric_frame(synthetic_features).fillna(train_means)

        synthetic_latents = model.encode(synthetic_features)
        synth_probs = latent_classifier.predict_proba(synthetic_latents)
        if synth_probs.ndim == 1:
            positive_probs = synth_probs
        else:
            positive_probs = synth_probs[:, -1]
        if not np.all(np.isfinite(positive_probs)):
            raise optuna.exceptions.TrialPruned("Non-finite synthetic probabilities")
        rng_local = np.random.default_rng(rng.integers(0, 1_000_000))
        synthetic_labels = rng_local.binomial(1, np.clip(positive_probs, 1e-4, 1 - 1e-4))
        if np.unique(synthetic_labels).size < 2:
            raise optuna.exceptions.TrialPruned("Synthetic labels lacked class diversity")

        try:
            tstr_metrics = evaluate_tstr(
                (numeric_synth.to_numpy(), synthetic_labels),
                (numeric_val.to_numpy(), y_validation.to_numpy()),
                make_logistic_pipeline,
            )
            trtr_metrics = evaluate_trtr(
                (numeric_train.to_numpy(), y_train.to_numpy()),
                (numeric_val.to_numpy(), y_validation.to_numpy()),
                make_logistic_pipeline,
            )
        except ValueError as exc:
            raise optuna.exceptions.TrialPruned(f"Classification failed: {exc}") from exc

        tstr_auc = tstr_metrics.get("auroc")
        trtr_auc = trtr_metrics.get("auroc")
        if not (np.isfinite(tstr_auc) and np.isfinite(trtr_auc)):
            raise optuna.exceptions.TrialPruned("Non-finite TSTR/TRTR AUC")

        delta_auc = float(tstr_auc - trtr_auc)

        trial.set_user_attr("validation_auc", float(val_auc))
        trial.set_user_attr("fit_seconds", fit_seconds)
        trial.set_user_attr(
            "train_auc",
            compute_auc(latent_classifier.predict_proba(train_latents), y_train),
        )
        trial.set_user_attr("tstr_auc", float(tstr_auc))
        trial.set_user_attr("trtr_auc", float(trtr_auc))
        trial.set_user_attr("delta_auc", delta_auc)
        return delta_auc

    sampler = optuna.samplers.TPESampler(seed=rng.integers(0, 1_000_000))
    study = optuna.create_study(
        direction="maximize",
        sampler=sampler,
        study_name=study_name,
        storage=storage,
        load_if_exists=bool(storage and study_name),
    )
    study.optimize(objective, n_trials=n_trials, timeout=timeout)

    if study.best_trial is None:
        raise RuntimeError("Optuna search did not produce a best trial")
    best_attributes: Dict[str, object] = {
        "trial_number": study.best_trial.number,
        "value": study.best_value,
        "params": dict(study.best_trial.params),
        "validation_auc": study.best_trial.user_attrs.get("validation_auc"),
        "fit_seconds": study.best_trial.user_attrs.get("fit_seconds"),
        "tstr_auc": study.best_trial.user_attrs.get("tstr_auc"),
        "trtr_auc": study.best_trial.user_attrs.get("trtr_auc"),
        "delta_auc": study.best_trial.user_attrs.get("delta_auc", study.best_value),
    }
    return study, best_attributes


In [None]:

metrics_records: List[Dict[str, object]] = []
membership_records: List[Dict[str, object]] = []
optuna_reports: Dict[str, Dict[str, object]] = {}

latent_models: Dict[str, Pipeline] = {}
suave_models: Dict[str, SUAVE] = {}

tstr_results: Optional[pd.DataFrame] = None
tstr_path: Optional[Path] = None
distribution_df: Optional[pd.DataFrame] = None
distribution_path: Optional[Path] = None

for target in TARGET_COLUMNS:
    if target not in train_df.columns:
        continue
    print(f"Training unsupervised model for {target}…")
    X_full = prepare_features(train_df, FEATURE_COLUMNS)
    y_full = train_df[target]

    (
        X_train_model,
        X_validation,
        X_calibration,
        y_train_model,
        y_validation,
        y_calibration,
    ) = split_train_validation_calibration(
        X_full,
        y_full,
        calibration_size=CALIBRATION_SIZE,
        validation_size=VALIDATION_SIZE,
        random_state=RANDOM_STATE,
    )

    X_train_model = pd.concat([X_train_model, X_calibration], ignore_index=True)
    y_train_model = pd.concat([y_train_model, y_calibration], ignore_index=True)

    study_name = (
        f"{analysis_config['optuna_study_prefix']}_{target}"
        if analysis_config["optuna_study_prefix"]
        else None
    )
    study, best_info = run_optuna_search(
        X_train_model,
        y_train_model,
        X_validation,
        y_validation,
        schema,
        random_state=RANDOM_STATE,
        n_trials=analysis_config["optuna_trials"],
        timeout=analysis_config["optuna_timeout"],
        study_name=study_name,
        storage=analysis_config["optuna_storage"],
    )

    hidden_dimension_options: Dict[str, Tuple[int, int]] = {
        "compact": (128, 64),
        "balanced": (256, 128),
        "widened": (384, 192),
        "extended": (512, 256),
    }
    best_params = dict(best_info.get("params", {}))
    hidden_key = str(best_params.get("hidden_dims", "balanced"))
    hidden_dims = hidden_dimension_options.get(
        hidden_key, hidden_dimension_options["balanced"]
    )
    model = SUAVE(
        schema=schema,
        behaviour="unsupervised",
        latent_dim=int(best_params.get("latent_dim", 32)),
        hidden_dims=hidden_dims,
        dropout=float(best_params.get("dropout", 0.1)),
        learning_rate=float(best_params.get("learning_rate", 1e-3)),
        batch_size=int(best_params.get("batch_size", 256)),
        beta=float(best_params.get("beta", 1.5)),
        n_components=int(best_params.get("n_components", 1)),
        tau_start=float(best_params.get("tau_start", 1.0)),
        tau_min=float(best_params.get("tau_min", 0.1)),
        tau_decay=float(best_params.get("tau_decay", 0.01)),
        random_state=RANDOM_STATE,
    )
    model.fit(
        X_train_model,
        warmup_epochs=int(best_params.get("warmup_epochs", 30)),
        kl_warmup_epochs=int(best_params.get("kl_warmup_epochs", 10)),
    )
    suave_models[target] = model

    latent_classifier = make_latent_classifier()
    train_latents = model.encode(X_train_model)

    evaluation_datasets: Dict[str, Tuple[pd.DataFrame, pd.Series]] = {
        "Train": (X_train_model, y_train_model),
        "Validation": (X_validation, y_validation),
        "MIMIC test": (
            prepare_features(test_df, FEATURE_COLUMNS),
            test_df[target],
        ),
    }
    if target in external_df.columns:
        evaluation_datasets["eICU external"] = (
            prepare_features(external_df, FEATURE_COLUMNS),
            external_df[target],
        )

    latent_classifier.fit(train_latents, np.asarray(y_train_model))
    latent_models[target] = latent_classifier

    for dataset_name, (features, labels) in evaluation_datasets.items():
        latents = model.encode(features)
        probs = latent_classifier.predict_proba(latents)
        auc = compute_auc(probs, labels)
        metrics_records.append(
            {
                "target": target,
                "dataset": dataset_name,
                "auc": auc,
            }
        )

    train_probs = latent_classifier.predict_proba(train_latents)
    test_latents = model.encode(evaluation_datasets["MIMIC test"][0])
    test_probs = latent_classifier.predict_proba(test_latents)
    membership = simple_membership_inference(
        train_probs,
        np.asarray(y_train_model),
        test_probs,
        np.asarray(evaluation_datasets["MIMIC test"][1]),
    )
    membership_records.append({"target": target, **membership})

    trial_rows: List[Dict[str, object]] = []
    for trial in study.trials:
        record: Dict[str, object] = {
            "trial_number": trial.number,
            "value": trial.value,
        }
        record.update(trial.params)
        validation_auc = trial.user_attrs.get("validation_auc")
        if validation_auc is not None:
            record["validation_auc"] = validation_auc
        fit_seconds = trial.user_attrs.get("fit_seconds")
        if fit_seconds is not None:
            record["fit_seconds"] = fit_seconds
        train_auc = trial.user_attrs.get("train_auc")
        if train_auc is not None:
            record["train_auc"] = train_auc
        tstr_auc = trial.user_attrs.get("tstr_auc")
        if tstr_auc is not None:
            record["tstr_auc"] = tstr_auc
        trtr_auc = trial.user_attrs.get("trtr_auc")
        if trtr_auc is not None:
            record["trtr_auc"] = trtr_auc
        delta_auc = trial.user_attrs.get("delta_auc")
        if delta_auc is not None:
            record["delta_auc"] = delta_auc
        trial_rows.append(record)
    trials_df = pd.DataFrame(trial_rows)
    trials_path = OUTPUT_DIR / f"optuna_trials_{target}.csv"
    if not trials_df.empty:
        trials_df.to_csv(trials_path, index=False)
    else:
        trials_path.write_text("trial_number,value\n")

    optuna_reports[target] = {
        "best": best_info,
        "best_params": best_params,
        "metrics": {
            row["dataset"]: row["auc"]
            for row in metrics_records
            if row["target"] == target
        },
        "trials_csv": trials_path,
    }

metrics_df = pd.DataFrame(metrics_records)
metrics_path = OUTPUT_DIR / "evaluation_metrics_unsupervised.csv"
metrics_df.to_csv(metrics_path, index=False)

membership_df = pd.DataFrame(membership_records)
membership_path = OUTPUT_DIR / "membership_inference_unsupervised.csv"
membership_df.to_csv(membership_path, index=False)

primary_target = "in_hospital_mortality"
if primary_target in suave_models and primary_target in latent_models:
    print("Generating synthetic data for TSTR/TRTR comparisons…")
    model = suave_models[primary_target]
    latent_classifier = latent_models[primary_target]

    X_train_full = prepare_features(train_df, FEATURE_COLUMNS)
    y_train_full = train_df[primary_target]
    numeric_train = to_numeric_frame(X_train_full)
    train_means = numeric_train.mean(axis=0)
    train_means = train_means.fillna(0.0)
    numeric_train = numeric_train.fillna(train_means)

    synthetic_features = model.sample(len(X_train_full))
    synthetic_features = synthetic_features[FEATURE_COLUMNS]
    numeric_synthetic = to_numeric_frame(synthetic_features)
    numeric_synthetic = numeric_synthetic.fillna(train_means)

    synthetic_latents = model.encode(synthetic_features)
    synthetic_probs = latent_classifier.predict_proba(synthetic_latents)[:, 1]
    rng = np.random.default_rng(RANDOM_STATE)
    synthetic_labels = rng.binomial(1, synthetic_probs)

    numeric_test = to_numeric_frame(prepare_features(test_df, FEATURE_COLUMNS))
    numeric_test = numeric_test.fillna(train_means)
    y_test = test_df[primary_target]

    tstr_metrics = evaluate_tstr(
        (numeric_synthetic.to_numpy(), synthetic_labels),
        (numeric_test.to_numpy(), y_test.to_numpy()),
        make_logistic_pipeline,
    )
    trtr_metrics = evaluate_trtr(
        (numeric_train.to_numpy(), y_train_full.to_numpy()),
        (numeric_test.to_numpy(), y_test.to_numpy()),
        make_logistic_pipeline,
    )
    tstr_results = pd.DataFrame(
        [
            {"setting": "TSTR", **tstr_metrics},
            {"setting": "TRTR", **trtr_metrics},
        ]
    )
    tstr_path = OUTPUT_DIR / "tstr_trtr_comparison_unsupervised.csv"
    tstr_results.to_csv(tstr_path, index=False)

    distribution_rows: List[Dict[str, object]] = []
    for column in FEATURE_COLUMNS:
        real_values = numeric_train[column].to_numpy()
        synthetic_values = numeric_synthetic[column].to_numpy()
        distribution_rows.append(
            {
                "feature": column,
                "ks": kolmogorov_smirnov_statistic(real_values, synthetic_values),
                "mmd": rbf_mmd(
                    real_values, synthetic_values, random_state=RANDOM_STATE
                ),
                "mutual_information": mutual_information_feature(
                    real_values, synthetic_values
                ),
            }
        )
    distribution_df = pd.DataFrame(distribution_rows)
    distribution_path = OUTPUT_DIR / "distribution_shift_metrics_unsupervised.csv"
    distribution_df.to_csv(distribution_path, index=False)
else:
    print("Primary target model not available; skipping TSTR/TRTR and distribution analysis.")


summary_lines: List[str] = [
    "# Unsupervised mortality modelling report",
    "",
    "## Schema",
    schema_table,
    "",
    "## Model selection and performance",
]

if not optuna_reports:
    summary_lines.append("No models were trained by optuna.")

for target, report in optuna_reports.items():
    best = report["best"]
    best_params = report["best_params"]
    metrics_map: Mapping[str, float] = report["metrics"]
    summary_lines.append(f"### {target}")
    best_value = best.get("value")
    value_text = (
        f"{best_value:.4f}" if isinstance(best_value, (int, float)) else "n/a"
    )
    summary_lines.append(
        f"Best Optuna trial #{best.get('trial_number')} with delta AUC (TSTR-TRTR) {value_text}"
    )
    summary_lines.append("Best parameters:")
    summary_lines.append("```json")
    summary_lines.append(json.dumps(best_params, indent=2, ensure_ascii=False))
    summary_lines.append("```")
    if best.get("tstr_auc") is not None and best.get("trtr_auc") is not None:
        summary_lines.append(
            "TSTR AUC: {tstr} | TRTR AUC: {trtr} | Delta: {delta}".format(
                tstr=format_float(best.get("tstr_auc")),
                trtr=format_float(best.get("trtr_auc")),
                delta=format_float(best.get("delta_auc")),
            )
        )
    summary_lines.append("| Dataset | AUC |")
    summary_lines.append("| --- | --- |")
    for dataset_name in [
        "Train",
        "Validation",
        "MIMIC test",
        "eICU external",
    ]:
        if dataset_name not in metrics_map:
            continue
        summary_lines.append(
            "| {dataset} | {auc} |".format(
                dataset=dataset_name,
                auc=format_float(metrics_map.get(dataset_name)),
            )
        )
    summary_lines.append(
        f"Optuna trials logged at: {report['trials_csv'].relative_to(OUTPUT_DIR)}"
    )
    summary_lines.append("")

if tstr_results is not None:
    summary_lines.append("## TSTR vs TRTR")
    summary_lines.append("| Setting | Accuracy | AUC | AUPRC | Brier | ECE |")
    summary_lines.append("| --- | --- | --- | --- | --- | --- |")
    for _, row in tstr_results.iterrows():
        summary_lines.append(
            "| {setting} | {acc:.3f} | {auc:.3f} | {auprc:.3f} | {brier:.3f} | {ece:.3f} |".format(
                setting=row["setting"],
                acc=row.get("accuracy", np.nan),
                auc=row.get("auroc", np.nan),
                auprc=row.get("auprc", np.nan),
                brier=row.get("brier", np.nan),
                ece=row.get("ece", np.nan),
            )
        )
    summary_lines.append("")

summary_lines.append("## Distribution shift and privacy")
if distribution_df is not None and distribution_path is not None:
    distribution_top = distribution_df.sort_values("ks", ascending=False).head(10)
    summary_lines.append("Top 10 features by KS statistic:")
    summary_lines.append("| Feature | KS | MMD | Mutual information |")
    summary_lines.append("| --- | --- | --- | --- |")
    for _, row in distribution_top.iterrows():
        summary_lines.append(
            "| {feature} | {ks:.3f} | {mmd:.3f} | {mi:.3f} |".format(
                feature=row["feature"],
                ks=row.get("ks", np.nan),
                mmd=row.get("mmd", np.nan),
                mi=row.get("mutual_information", np.nan),
            )
        )
    summary_lines.append(
        f"Full distribution metrics: {distribution_path.relative_to(OUTPUT_DIR)}"
    )
else:
    summary_lines.append("Distribution metrics were not computed.")

if not membership_records:
    summary_lines.append("No membership inference metrics were recorded.")
else:
    summary_lines.append("Membership inference results:")
    summary_lines.append(
        "| Target | attack_auc | attack_accuracy | attack_threshold |"
    )
    summary_lines.append("| --- | --- | --- | --- |")
    for _, row in pd.DataFrame(membership_records).iterrows():
        summary_lines.append(
            "| {target} | {auc:.3f} | {accuracy:.3f} | {threshold:.3f} |".format(
                target=row["target"],
                auc=row.get("attack_auc", np.nan),
                accuracy=row.get("attack_best_accuracy", np.nan),
                threshold=row.get("attack_best_threshold", np.nan),
            )
        )
    summary_lines.append(
        f"Membership metrics saved to: {membership_path.relative_to(OUTPUT_DIR)}"
    )

summary_path = OUTPUT_DIR / "summary_unsupervised.md"
summary_path.write_text("\n".join(summary_lines), encoding="utf-8")

print("Analysis complete.")
print(f"Metric table saved to {metrics_path}")
print(f"Membership inference results saved to {membership_path}")
if tstr_path is not None and distribution_path is not None:
    print(f"TSTR/TRTR comparison saved to {tstr_path}")
    print(f"Distribution metrics saved to {distribution_path}")
print(f"Summary written to {summary_path}")


Training unsupervised model for in_hospital_mortality…


[I 2025-09-24 02:47:27,201] A new study created in RDB with name: unsupervised_in_hospital_mortality


unsupervised training:   0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-09-24 03:56:31,134] Trial 0 finished with value: -0.07268738941326314 and parameters: {'latent_dim': 16, 'hidden_dims': 'balanced', 'dropout': 0.3073187550897951, 'learning_rate': 7.76479325061648e-05, 'batch_size': 64, 'beta': 0.598017013951521, 'kl_warmup_epochs': 21, 'warmup_epochs': 10, 'n_components': 6, 'tau_start': 3.3624624396717526, 'tau_min': 0.07754282945320617, 'tau_decay': 0.009430419398084474}. Best is trial 0 with value: -0.07268738941326314.


unsupervised training:   0%|          | 0/23 [00:00<?, ?it/s]

[I 2025-09-24 04:50:12,785] Trial 1 finished with value: -0.08207753751009306 and parameters: {'latent_dim': 8, 'hidden_dims': 'extended', 'dropout': 0.2954000136593146, 'learning_rate': 9.210612304120623e-05, 'batch_size': 64, 'beta': 3.7199507852167963, 'kl_warmup_epochs': 17, 'warmup_epochs': 23, 'n_components': 2, 'tau_start': 1.4631627527394042, 'tau_min': 0.022114835295407302, 'tau_decay': 0.0003968887618755092}. Best is trial 0 with value: -0.07268738941326314.


unsupervised training:   0%|          | 0/49 [00:00<?, ?it/s]

[I 2025-09-24 06:44:55,747] Trial 2 finished with value: -0.049042232191065405 and parameters: {'latent_dim': 16, 'hidden_dims': 'widened', 'dropout': 0.029473827718230483, 'learning_rate': 6.280394207239094e-05, 'batch_size': 256, 'beta': 1.175990034714907, 'kl_warmup_epochs': 24, 'warmup_epochs': 49, 'n_components': 8, 'tau_start': 2.8391624774065893, 'tau_min': 0.233311728645624, 'tau_decay': 0.00015790850649580868}. Best is trial 2 with value: -0.049042232191065405.


unsupervised training:   0%|          | 0/34 [00:00<?, ?it/s]

[I 2025-09-24 08:41:47,378] Trial 3 finished with value: -0.08710126674189689 and parameters: {'latent_dim': 32, 'hidden_dims': 'balanced', 'dropout': 0.4644112109291761, 'learning_rate': 0.0002918409267973941, 'batch_size': 64, 'beta': 0.5624102281820105, 'kl_warmup_epochs': 10, 'warmup_epochs': 34, 'n_components': 3, 'tau_start': 0.7453198818068492, 'tau_min': 0.0023115847195476404, 'tau_decay': 0.0006041899177905281}. Best is trial 2 with value: -0.049042232191065405.


unsupervised training:   0%|          | 0/12 [00:00<?, ?it/s]

[I 2025-09-24 09:04:45,256] Trial 4 finished with value: -0.06548973170927441 and parameters: {'latent_dim': 16, 'hidden_dims': 'widened', 'dropout': 0.07137994067533732, 'learning_rate': 0.005687053183697995, 'batch_size': 256, 'beta': 1.6238798849596598, 'kl_warmup_epochs': 12, 'warmup_epochs': 12, 'n_components': 6, 'tau_start': 3.482032694556354, 'tau_min': 0.4510598420705658, 'tau_decay': 0.002651040210912842}. Best is trial 2 with value: -0.049042232191065405.


unsupervised training:   0%|          | 0/54 [00:00<?, ?it/s]

[I 2025-09-24 14:20:12,433] Trial 5 finished with value: -0.062005550014276434 and parameters: {'latent_dim': 64, 'hidden_dims': 'widened', 'dropout': 0.35293108773289117, 'learning_rate': 0.0032238768546772336, 'batch_size': 64, 'beta': 0.4507739704372005, 'kl_warmup_epochs': 7, 'warmup_epochs': 54, 'n_components': 4, 'tau_start': 0.9710073490010638, 'tau_min': 0.0016533663704475132, 'tau_decay': 0.00023139773584066902}. Best is trial 2 with value: -0.049042232191065405.


unsupervised training:   0%|          | 0/38 [00:00<?, ?it/s]

[I 2025-09-24 16:58:29,981] Trial 6 finished with value: -0.14548069096203697 and parameters: {'latent_dim': 16, 'hidden_dims': 'widened', 'dropout': 0.34719916162616377, 'learning_rate': 0.004207514635002578, 'batch_size': 128, 'beta': 3.1936390984000766, 'kl_warmup_epochs': 16, 'warmup_epochs': 38, 'n_components': 7, 'tau_start': 3.069992258491713, 'tau_min': 0.21571773419968288, 'tau_decay': 0.009797980181413292}. Best is trial 2 with value: -0.049042232191065405.


unsupervised training:   0%|          | 0/58 [00:00<?, ?it/s]

[I 2025-09-24 17:15:11,108] Trial 7 finished with value: -0.08638717873546353 and parameters: {'latent_dim': 16, 'hidden_dims': 'balanced', 'dropout': 0.19407243042530153, 'learning_rate': 0.00012804948455125067, 'batch_size': 256, 'beta': 2.3735964783611148, 'kl_warmup_epochs': 12, 'warmup_epochs': 58, 'n_components': 1, 'tau_start': 4.3645974851527, 'tau_min': 0.0016807235314031475, 'tau_decay': 0.0006818159912915988}. Best is trial 2 with value: -0.049042232191065405.


unsupervised training:   0%|          | 0/46 [00:00<?, ?it/s]

[I 2025-09-24 17:29:42,899] Trial 8 finished with value: -0.10803037822097084 and parameters: {'latent_dim': 64, 'hidden_dims': 'compact', 'dropout': 0.29110855805737207, 'learning_rate': 6.52918524123672e-05, 'batch_size': 1024, 'beta': 3.548580039397641, 'kl_warmup_epochs': 22, 'warmup_epochs': 46, 'n_components': 4, 'tau_start': 1.015022489179212, 'tau_min': 0.1143646687818292, 'tau_decay': 0.0003151618686450806}. Best is trial 2 with value: -0.049042232191065405.


unsupervised training:   0%|          | 0/22 [00:00<?, ?it/s]

[I 2025-09-24 18:12:45,995] Trial 9 finished with value: -0.0858854609388946 and parameters: {'latent_dim': 64, 'hidden_dims': 'extended', 'dropout': 0.40649343735466964, 'learning_rate': 0.011967478597327153, 'batch_size': 256, 'beta': 1.525855625086484, 'kl_warmup_epochs': 12, 'warmup_epochs': 22, 'n_components': 8, 'tau_start': 0.7630931629266371, 'tau_min': 0.0008913376611485299, 'tau_decay': 0.006131864396774987}. Best is trial 2 with value: -0.049042232191065405.


unsupervised training:   0%|          | 0/47 [00:00<?, ?it/s]

[I 2025-09-24 18:57:51,393] Trial 10 finished with value: -0.0825350342603921 and parameters: {'latent_dim': 128, 'hidden_dims': 'compact', 'dropout': 0.011226007565454499, 'learning_rate': 0.0007142866479559837, 'batch_size': 512, 'beta': 2.4839594647902845, 'kl_warmup_epochs': 24, 'warmup_epochs': 47, 'n_components': 8, 'tau_start': 1.8793352170608522, 'tau_min': 0.000133533945696758, 'tau_decay': 0.07383755202970171}. Best is trial 2 with value: -0.049042232191065405.


unsupervised training:   0%|          | 0/59 [00:00<?, ?it/s]

[I 2025-09-24 21:21:00,110] Trial 11 finished with value: -0.04648123307930796 and parameters: {'latent_dim': 64, 'hidden_dims': 'widened', 'dropout': 0.16662011180471692, 'learning_rate': 0.0014761954832455237, 'batch_size': 128, 'beta': 0.27981502939571085, 'kl_warmup_epochs': 2, 'warmup_epochs': 59, 'n_components': 5, 'tau_start': 2.1792786230930896, 'tau_min': 0.013498723063298131, 'tau_decay': 0.00010315904499883012}. Best is trial 11 with value: -0.04648123307930796.


unsupervised training:   0%|          | 0/59 [00:00<?, ?it/s]

[I 2025-09-24 23:05:12,442] Trial 12 finished with value: -0.07673279623869966 and parameters: {'latent_dim': 8, 'hidden_dims': 'widened', 'dropout': 0.15692070457795898, 'learning_rate': 0.0011075221822962743, 'batch_size': 128, 'beta': 1.2378316913248892, 'kl_warmup_epochs': 2, 'warmup_epochs': 59, 'n_components': 6, 'tau_start': 2.4121506054292654, 'tau_min': 0.009617458804075987, 'tau_decay': 0.0001215142757825073}. Best is trial 11 with value: -0.04648123307930796.


unsupervised training:   0%|          | 0/48 [00:00<?, ?it/s]

[I 2025-09-25 00:28:16,966] Trial 13 finished with value: -0.07657611179080492 and parameters: {'latent_dim': 128, 'hidden_dims': 'widened', 'dropout': 0.1218911801151272, 'learning_rate': 0.001085011908614363, 'batch_size': 128, 'beta': 1.1996822258144348, 'kl_warmup_epochs': 2, 'warmup_epochs': 48, 'n_components': 5, 'tau_start': 2.3113096352150073, 'tau_min': 0.030017249714861763, 'tau_decay': 0.00011497124075659331}. Best is trial 11 with value: -0.04648123307930796.


unsupervised training:   0%|          | 0/52 [00:00<?, ?it/s]

[I 2025-09-25 00:48:42,473] Trial 14 finished with value: -0.03352446652221108 and parameters: {'latent_dim': 32, 'hidden_dims': 'widened', 'dropout': 0.008083066233534143, 'learning_rate': 0.00028772317711856226, 'batch_size': 1024, 'beta': 0.29997630958455473, 'kl_warmup_epochs': 6, 'warmup_epochs': 52, 'n_components': 7, 'tau_start': 4.1576994348454654, 'tau_min': 0.041589222696606594, 'tau_decay': 0.001638430985212495}. Best is trial 14 with value: -0.03352446652221108.


unsupervised training:   0%|          | 0/39 [00:00<?, ?it/s]

[I 2025-09-25 01:06:06,152] Trial 15 finished with value: -0.033961217596482296 and parameters: {'latent_dim': 32, 'hidden_dims': 'widened', 'dropout': 0.19492181378610426, 'learning_rate': 0.0003665183049191354, 'batch_size': 1024, 'beta': 0.2811185200117105, 'kl_warmup_epochs': 5, 'warmup_epochs': 39, 'n_components': 5, 'tau_start': 4.6627170228842685, 'tau_min': 0.00855668297879985, 'tau_decay': 0.0016155839808031883}. Best is trial 14 with value: -0.03352446652221108.


unsupervised training:   0%|          | 0/37 [00:00<?, ?it/s]