In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 0: CONFIGURATION & ENVIRONMENT SETUP
# Purpose: Initialize paths, configurations, helpers, and Q1 journal styling
# Author: zainzampawala786-sudo
# Date: 2025-10-19 12:42:28 UTC
# ═══════════════════════════════════════════════════════════════════════════════

from pathlib import Path
from typing import Sequence, Optional
import json
import platform
import subprocess
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
from datetime import datetime
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.calibration import calibration_curve
import matplotlib.patches as mpatches
from matplotlib.ticker import MaxNLocator

print("\n" + "="*100)
print("AMI MORTALITY PREDICTION PIPELINE - STEP 0: INITIALIZATION")
print("="*100)
print(f"Current Date: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC")
print(f"User: zainzampawala786-sudo")
print("="*100 + "\n")

# ═══════════════════════════════════════════════════════════════════════════════
# GLOBAL CONFIGURATION
# ═══════════════════════════════════════════════════════════════════════════════

CONFIG = {
    # Target and basic settings
    "target_col": "one_year_mortality",
    "random_state": 42,
    
    # Data processing
    "missing_threshold": 10.0,  # Drop features with >10% missing
    "protected_features": ["lactate_max", "lactate_min", "creatinine_max"],
    
    # Visualization
    "figure_dpi": 300,
    
    # Model parameters (will be used in later steps)
    "cv_folds": 5,
    "test_size": 0.2,
    "n_jobs": -1,  # Use all CPU cores
    
    # Feature selection (will be used in later steps)
    "boruta_max_iter": 100,
    "rfe_step": 1,
}

# ═══════════════════════════════════════════════════════════════════════════════
# FILE PATHS - EDIT THESE FOR YOUR SYSTEM IF NEEDED
# ═══════════════════════════════════════════════════════════════════════════════

INTERNAL_PATH = Path(r"C:\Users\zainz\Desktop\Second Analysis\ZZTongji Dataset AMI Internal Validation One_Year.xlsx")
EXTERNAL_PATH = Path(r"C:\Users\zainz\Desktop\Second Analysis\ZZMimic Dataset AMI External Validation One_Year.xlsx")
RESULTS_ROOT = Path(r"C:\Users\zainz\Desktop\Second Analysis\ZAINY")

DIRS = {
    "root": RESULTS_ROOT,
    "data": RESULTS_ROOT / "data",
    "figures": RESULTS_ROOT / "figures",
    "tables": RESULTS_ROOT / "tables",
    "models": RESULTS_ROOT / "models",
    "logs": RESULTS_ROOT / "logs",
}

def init_dirs():
    """Create all necessary directories"""
    for p in DIRS.values():
        Path(p).mkdir(parents=True, exist_ok=True)
    print("✅ Created output directories:")
    for name, path in DIRS.items():
        print(f"   • {name:10s}: {path}")
    return DIRS

# ═══════════════════════════════════════════════════════════════════════════════
# PROFESSIONAL Q1 PALETTE (Paul Tol - Colorblind-safe)
# ═══════════════════════════════════════════════════════════════════════════════

PALETTE = {
    "primary": ["#0077BB", "#EE7733", "#009988", "#CC3311", "#33BBEE", "#EE3377", "#BBBBBB"],
    "models": {
        "xgb": "#117733",      # Green (XGBoost)
        "lgbm": "#332288",     # Indigo (LightGBM)
        "cat": "#AA4499",      # Purple (CatBoost)
        "rf": "#44AA99",       # Cyan (Random Forest)
        "log_reg": "#DDCC77",  # Sand (Logistic Regression)
        "en_lr": "#88CCEE",    # Light Blue (Elastic Net)
        "svc": "#CC6677",      # Rose (SVM)
    },
    "cohorts": {
        "internal": "#0077BB",  # Blue
        "external": "#EE7733",  # Orange
        "train": "#009988",     # Teal
        "test": "#CC3311",      # Red
    },
    "neutral": {
        "chance": "#888888",    # Gray (reference line)
        "grid": "#CCCCCC",      # Light gray
        "spine": "#333333",     # Dark gray
    }
}

COLORS = {
    "tier1":      "#2E7D32",   # dark green (≥ 80 %)
    "tier2":      "#66BB6A",   # medium green (70–79 %)
    "tier3":      "#FFA726",   # orange (60–69 %)
    "unstable":   "#E0E0E0",   # light gray (< 60 %)
    "rejected":   "#BDBDBD",
    "selected":   "#1976D2",   # main line/selection blue
    "ci_ribbon":  "#BBDEFB",   # light blue fill
    "shadow":     "#D32F2F",   # Boruta shadow
    "internal_auc": "#0173B2",
    "external_auc": "#DE8F05",
    "death":       "#D55E00",
    "survive":     "#029E73",
}

FIG_SIZES = {"single": (5.5, 5.5), "double": (11.0, 5.5), "wide": (7.0, 3.5)}
FIGSIZE_Q1 = {
    "single":  (3.5, 2.6),
    "double":  (7.2, 4.8),
    "square":  (6.5, 6.5),
    "wide":    (7.2, 3.6),
    "tall":    (7.2, 9.0),
    "panel":   (8.0, 6.0)
}
TYPO = {"font": "Arial", "title": 11, "axis": 10, "tick": 9, "legend": 8}

# ═══════════════════════════════════════════════════════════════════════════════
# HELPER FUNCTIONS: File I/O
# ═══════════════════════════════════════════════════════════════════════════════

def create_table(df, name, caption=""):
    """Save DataFrame as CSV table with caption"""
    tables_dir = Path(DIRS["tables"])
    tables_dir.mkdir(parents=True, exist_ok=True)
    
    out_path = tables_dir / f"{name}.csv"
    df.to_csv(out_path, index=False)
    
    caption_path = tables_dir / f"{name}_caption.txt"
    with open(caption_path, "w") as f:
        f.write(caption)
    
    print(f"   ✅ Table saved: {out_path.name}")
    return out_path

def save_csv(df, name):
    """Save DataFrame as CSV"""
    out_path = DIRS["data"] / f"{name}.csv"
    df.to_csv(out_path, index=False)
    print(f"   ✅ CSV saved: {out_path.name}")
    return out_path

def save_pickle(obj, name):
    """Save Python object as pickle"""
    out_path = DIRS["data"] / f"{name}.pkl"
    with open(out_path, "wb") as f:
        pickle.dump(obj, f)
    print(f"   ✅ Pickle saved: {out_path.name}")
    return out_path

def load_pickle(name):
    """Load Python object from pickle"""
    in_path = DIRS["data"] / f"{name}.pkl"
    with open(in_path, "rb") as f:
        obj = pickle.load(f)
    print(f"   ✅ Pickle loaded: {in_path.name}")
    return obj

# ═══════════════════════════════════════════════════════════════════════════════
# HELPER FUNCTIONS: Logging
# ═══════════════════════════════════════════════════════════════════════════════

def _json_safe(x):
    """Recursively convert numpy/pandas/scientific types into JSON-safe Python types."""
    if isinstance(x, (str, int, float, bool)) or x is None:
        return x
    if isinstance(x, np.integer):   return int(x)
    if isinstance(x, np.floating):  return float(x)
    if isinstance(x, np.bool_):     return bool(x)
    try:
        if pd.isna(x):
            return None
    except Exception:
        pass
    if isinstance(x, dict):
        return {k: _json_safe(v) for k, v in x.items()}
    if isinstance(x, (list, tuple, set)):
        return [_json_safe(v) for v in x]
    if hasattr(x, "tolist"):
        return x.tolist()
    if hasattr(x, "to_dict"):
        return x.to_dict()
    return str(x)

def append_runlog(step: str, details: dict):
    """Append an entry to logs/run_log.json with full safety and confirmation output."""
    log_path = DIRS["logs"] / "run_log.json"
    
    # Read existing or create fresh
    if not log_path.exists() or log_path.stat().st_size == 0:
        log = []
    else:
        try:
            log = json.loads(log_path.read_text(encoding="utf-8"))
            if not isinstance(log, list):
                print("⚠️  run_log.json corrupted; reinitializing log list.")
                log = []
        except Exception as e:
            print(f"⚠️  run_log.json load failed ({type(e).__name__}); starting new log.")
            log = []

    # Create new entry
    entry = {
        "step": step,
        "utc": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
        "details": _json_safe(details)
    }
    log.append(entry)

    # Write back to disk
    with open(log_path, "w", encoding="utf-8") as f:
        json.dump(log, f, ensure_ascii=False, indent=2)

    # Print confirmation summary
    print("="*90)
    print(f"📘 RUN LOG UPDATED — Step {step}")
    print("="*90)
    print(f"UTC Time : {entry['utc']}")
    print(f"Entries  : {len(log)} total\n")

    # Pretty summary of details
    for key, val in details.items():
        if isinstance(val, dict):
            print(f"   {key}:")
            for subk, subv in val.items():
                print(f"      • {subk:<18} = {subv}")
        else:
            print(f"   {key:<22} = {val}")
    print("="*90 + "\n")

    return log_path

def log_step(step, message):
    """Simple logging function (alias for append_runlog)"""
    append_runlog(step, {"message": message})

# ═══════════════════════════════════════════════════════════════════════════════
# HELPER FUNCTIONS: Plotting
# ═══════════════════════════════════════════════════════════════════════════════

def set_q1_style(palette: Optional[Sequence[str]] = None, context: str = "paper"):
    """Set professional Q1 journal plotting style (Nature/eClinicalMedicine standard)"""
    if palette is None:
        palette = PALETTE["primary"]
    
    sns.set_theme(style="whitegrid", palette=palette, context=context)
    
    # Matplotlib rcParams for Q1 journals
    mpl.rcParams.update({
        # Fonts
        "font.family": "sans-serif",
        "font.sans-serif": [TYPO["font"], "Helvetica", "DejaVu Sans"],
        "font.size": 8,
        "axes.titlesize": TYPO["title"],
        "axes.titleweight": "bold",
        "axes.labelsize": TYPO["axis"],
        "axes.labelweight": "bold",
        "xtick.labelsize": TYPO["tick"],
        "ytick.labelsize": TYPO["tick"],
        "legend.fontsize": TYPO["legend"],
        
        # High resolution
        "figure.dpi": CONFIG["figure_dpi"],
        "savefig.dpi": CONFIG["figure_dpi"],
        "savefig.bbox": "tight",
        "savefig.pad_inches": 0.05,
        
        # Professional styling
        "axes.grid": True,
        "grid.linestyle": ":",
        "grid.linewidth": 0.4,
        "grid.alpha": 0.3,
        "axes.axisbelow": True,
        "axes.linewidth": 1.2,
        "axes.edgecolor": "#000000",
        "axes.spines.top": False,
        "axes.spines.right": False,
        "lines.linewidth": 1.8,
        
        # Legend
        "legend.frameon": True,
        "legend.framealpha": 1.0,
        "legend.facecolor": "#FFFFFF",
        "legend.edgecolor": "#E0E0E0",
        
        # Ticks
        "xtick.direction": "out",
        "ytick.direction": "out",
        
        # PDF export (Illustrator compatible)
        "pdf.fonttype": 42,
        "ps.fonttype": 42,
        "svg.fonttype": "none",
        
        # Background
        "axes.facecolor": "white",
        "figure.facecolor": "white",
    })

def save_figure(fig: plt.Figure, name: str, outdir: Optional[Path] = None, formats: Sequence[str] = ("pdf", "png")):
    """Save figure in multiple formats"""
    outdir = Path(outdir or DIRS["figures"])
    outdir.mkdir(parents=True, exist_ok=True)
    saved = []
    for fmt in formats:
        p = outdir / f"{name}.{fmt}"
        fig.savefig(p, bbox_inches="tight", dpi=CONFIG["figure_dpi"])
        saved.append(p)
    print(f"   ✅ Figure saved: {name} ({', '.join(formats)})")
    return saved

def plot_roc(ax, y_true, y_score, label: Optional[str] = None, color: Optional[str] = None, 
             lw: float = 1.8, alpha: float = 0.85, show_ci: bool = False):
    """Professional ROC plot for Q1 journals"""
    fpr, tpr, _ = roc_curve(y_true, y_score)
    auc = roc_auc_score(y_true, y_score)
    
    if color is None:
        color = PALETTE["primary"][0]
    
    ax.plot(fpr, tpr, 
           label=f"{label} (AUC={auc:.3f})" if label else f"AUC={auc:.3f}", 
           color=color, lw=lw, alpha=alpha, zorder=2)
    
    ax.plot([0, 1], [0, 1], 
           linestyle="--", color=PALETTE["neutral"]["chance"], 
           lw=1.0, alpha=0.5, label="Chance", zorder=1)
    
    ax.set_xlim(-0.02, 1.02)
    ax.set_ylim(-0.02, 1.02)
    ax.set_xlabel("1 - Specificity", fontsize=TYPO["axis"])
    ax.set_ylabel("Sensitivity", fontsize=TYPO["axis"])
    
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    for spine in ["left", "bottom"]:
        ax.spines[spine].set_linewidth(0.8)
        ax.spines[spine].set_color(PALETTE["neutral"]["spine"])
    
    ax.set_aspect('equal', adjustable='box')
    
    return auc

def plot_calibration(ax, y_true, y_prob, n_bins: int = 10, label: Optional[str] = None, 
                    color: Optional[str] = None, marker_size: int = 7):
    """Professional calibration plot for Q1 journals"""
    prob_true, prob_pred = calibration_curve(y_true, y_prob, n_bins=n_bins, strategy="quantile")
    
    if color is None:
        color = PALETTE["primary"][0]
    
    ax.plot(prob_pred, prob_true, 
           marker='o', markersize=marker_size, 
           label=label, color=color, 
           linewidth=1.8, alpha=0.85,
           markeredgecolor='white', markeredgewidth=0.5)
    
    ax.plot([0, 1], [0, 1], 
           linestyle="--", color=PALETTE["neutral"]["chance"], 
           linewidth=1.0, alpha=0.5, label="Perfect calibration")
    
    ax.set_xlabel("Predicted Probability", fontsize=TYPO["axis"])
    ax.set_ylabel("Observed Proportion", fontsize=TYPO["axis"])
    ax.set_xlim(-0.02, 1.02)
    ax.set_ylim(-0.02, 1.02)
    
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    for spine in ["left", "bottom"]:
        ax.spines[spine].set_linewidth(0.8)
        ax.spines[spine].set_color(PALETTE["neutral"]["spine"])
    
    ax.set_aspect('equal', adjustable='box')
    
    return prob_true, prob_pred

def apply_q1_axes(ax, title=None, xlabel=None, ylabel=None):
    """Apply uniform journal styling to a given matplotlib Axes."""
    if title:
        ax.set_title(title, loc="left", fontsize=11, fontweight="bold", pad=10)
    if xlabel:
        ax.set_xlabel(xlabel, fontsize=10, fontweight="bold")
    if ylabel:
        ax.set_ylabel(ylabel, fontsize=10, fontweight="bold")
    ax.grid(True, which="major", axis="both", alpha=0.3, linestyle=":")
    for spine in ["top", "right"]:
        ax.spines[spine].set_visible(False)
    ax.tick_params(axis="both", which="major", width=1.2, length=5)
    return ax

def finalize_figure(fig, filename, title=None, folder=None):
    """Save figure in all formats and close."""
    if folder is None:
        folder = DIRS["figures"]
    if title:
        fig.suptitle(title, fontsize=13, fontweight="bold", y=0.97)
    save_figure(fig, filename, outdir=folder)
    plt.close(fig)

def tier_legend_patches(tier_counts=None):
    """Return list of mpatches.Patch for Tier 1–3 legend blocks."""
    patches = [
        mpatches.Patch(color=COLORS["tier1"], label="Tier 1 (≥ 80 %)"),
        mpatches.Patch(color=COLORS["tier2"], label="Tier 2 (70–79 %)"),
        mpatches.Patch(color=COLORS["tier3"], label="Tier 3 (60–69 %)"),
    ]
    if tier_counts:
        for p, (tier, n) in zip(patches, tier_counts.items()):
            p.set_label(f"{tier} (n={n})")
    return patches

# ═══════════════════════════════════════════════════════════════════════════════
# ENVIRONMENT RECORDING
# ═══════════════════════════════════════════════════════════════════════════════

def _pkgs_via_importlib():
    try:
        from importlib import metadata as imm
    except Exception:
        try:
            import importlib_metadata as imm
        except Exception:
            return {}
    d = {}
    try:
        for dist in imm.distributions():
            name = (dist.metadata.get("Name") or dist.metadata.get("name") or getattr(dist, "name", None))
            if name:
                d[name] = dist.version
    except Exception:
        return {}
    return d

def _pkgs_via_pip():
    try:
        res = subprocess.run(["pip", "freeze"], capture_output=True, text=True, check=True)
        out = res.stdout.strip().splitlines()
        d = {}
        for line in out:
            if "==" in line:
                pkg, ver = line.split("==", 1); d[pkg] = ver
        return d
    except Exception:
        return {}

def record_environment(outdir: Optional[Path] = None):
    outdir = Path(outdir or DIRS["tables"]); outdir.mkdir(parents=True, exist_ok=True)
    env = {"python_version": platform.python_version()}
    pkgs = _pkgs_via_importlib()
    if not pkgs: pkgs = _pkgs_via_pip()
    env["packages"] = pkgs
    env["capture_method"] = "importlib.metadata" if pkgs else "pip_freeze"
    p = outdir / "env_versions.json"
    with open(p, "w") as f: json.dump(env, f, indent=2)
    print(f"   ✅ Environment recorded: {p.name}")
    return p

# ═══════════════════════════════════════════════════════════════════════════════
# INITIALIZE EVERYTHING
# ═══════════════════════════════════════════════════════════════════════════════

init_dirs()
set_q1_style()
record_environment()

print("\n" + "="*100)
print("✅ STEP 0 COMPLETE — ENVIRONMENT INITIALIZED")
print("="*100)
print(f"User: zainzampawala786-sudo")
print(f"Date: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC")
print(f"Results Directory: {RESULTS_ROOT}")
print(f"Target Variable: {CONFIG['target_col']}")
print(f"Visualization: Paul Tol (Colorblind-safe) @ {CONFIG['figure_dpi']} DPI")
print("="*100 + "\n")

# Log Step 0 completion
log_step("0", "Environment initialized successfully - all paths, configurations, and helpers loaded")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 1: DATA LOADING & INITIAL VALIDATION
# TRIPOD: 4a (source), 5a (participants), 5b (sample size)
# ═══════════════════════════════════════════════════════════════════════════════

print("\n" + "="*100)
print("STEP 1: DATA LOADING & INITIAL VALIDATION")
print("="*100)
print(f"UTC: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

TARGET = CONFIG["target_col"]

# ── 1.1 Load data
print("📂 Loading Excel files ...")
df_internal = pd.read_excel(INTERNAL_PATH)
df_external = pd.read_excel(EXTERNAL_PATH)
print(f"   ✅ Internal (Tongji):  {df_internal.shape[0]:,} × {df_internal.shape[1]:,}")
print(f"   ✅ External (MIMIC-IV): {df_external.shape[0]:,} × {df_external.shape[1]:,}")

# ── 1.2 Validate target (binary 0/1)
def _assert_binary(df, name):
    if TARGET not in df.columns:
        raise KeyError(f"[{name}] target '{TARGET}' not found")
    vals = set(pd.Series(df[TARGET]).dropna().unique())
    if vals != {0, 1}:
        raise ValueError(f"[{name}] target not binary 0/1. Found: {vals}")

_assert_binary(df_internal, "Internal")
_assert_binary(df_external, "External")
print(f"   ✅ Target '{TARGET}' verified (binary 0/1)\n")

# ── 1.3 Mortality rates
int_n = len(df_internal)
int_d = int((df_internal[TARGET]==1).sum())
int_s = int((df_internal[TARGET]==0).sum())
int_rate = int_d/int_n*100

ext_n = len(df_external)
ext_d = int((df_external[TARGET]==1).sum())
ext_s = int((df_external[TARGET]==0).sum())
ext_rate = ext_d/ext_n*100

print("📊 Mortality:")
print(f"   Internal: {int_d}/{int_n} died ({int_rate:.1f}%), {int_s} survived ({100-int_rate:.1f}%)")
print(f"   External: {ext_d}/{ext_n} died ({ext_rate:.1f}%), {ext_s} survived ({100-ext_rate:.1f}%)")

# ── 1.4 Feature alignment
common = sorted(list(set(df_internal.columns) & set(df_external.columns)))
int_only = sorted(list(set(df_internal.columns) - set(df_external.columns)))
ext_only = sorted(list(set(df_external.columns) - set(df_internal.columns)))

print(f"\n🔗 Feature alignment: {len(common)} common, {len(int_only)} internal-only, {len(ext_only)} external-only")

# ── 1.5 Missingness overview (high-level)
int_miss = df_internal.isna().sum().sum()
int_total = df_internal.shape[0] * df_internal.shape[1]
ext_miss = df_external.isna().sum().sum()
ext_total = df_external.shape[0] * df_external.shape[1]

print(f"📉 Total missingness:")
print(f"   Internal: {int_miss:,}/{int_total:,} cells ({int_miss/int_total*100:.2f}%)")
print(f"   External: {ext_miss:,}/{ext_total:,} cells ({ext_miss/ext_total*100:.2f}%)")

# ── 1.6 Save summary table
summary_df = pd.DataFrame({
    "Characteristic": ["Sample size (n)", "Features (p)", "Deaths, n (%)", 
                       "Survivors, n (%)", "Missing cells, n (%)"],
    "Internal (Tongji)": [
        int_n, df_internal.shape[1], f"{int_d} ({int_rate:.1f}%)",
        f"{int_s} ({100-int_rate:.1f}%)", f"{int_miss:,} ({int_miss/int_total*100:.2f}%)"
    ],
    "External (MIMIC-IV)": [
        ext_n, df_external.shape[1], f"{ext_d} ({ext_rate:.1f}%)",
        f"{ext_s} ({100-ext_rate:.1f}%)", f"{ext_miss:,} ({ext_miss/ext_total*100:.2f}%)"
    ],
})

create_table(summary_df, "step1_data_summary", caption="Cohort overview")

# ── 1.7 Persist raw data (FULL dataset - no drops yet)
save_pickle(df_internal, "step1_df_internal_raw")
save_pickle(df_external, "step1_df_external_raw")

# ── 1.8 Create hand-off
RAW_DATA = {
    "df_internal": df_internal,
    "df_external": df_external,
    "n_internal": int_n,
    "n_external": ext_n,
    "deaths_internal": int_d,
    "deaths_external": ext_d,
    "mortality_rate_internal": int_rate,
    "mortality_rate_external": ext_rate,
    "common_features": common,
}

# ── 1.9 Log
append_runlog("1", {
    "internal": {"n": int_n, "p": df_internal.shape[1], "deaths": int_d, "mortality_pct": round(int_rate,1)},
    "external": {"n": ext_n, "p": df_external.shape[1], "deaths": ext_d, "mortality_pct": round(ext_rate,1)},
    "alignment": {"common": len(common), "internal_only": len(int_only), "external_only": len(ext_only)},
})

print("\n💾 Stored: RAW_DATA (FULL dataset - 88 features, NO exclusions yet)")
print("\n" + "="*100)
print("✅ STEP 1 COMPLETE — DATA LOADED & VALIDATED")
print("="*100)

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 2: BASELINE CHARACTERISTICS TABLE (TABLE 1) - Publication Ready
# TRIPOD: 5a (Participants), 14a (Baseline characteristics)
# ═══════════════════════════════════════════════════════════════════════════════

from scipy import stats

print("\n" + "="*100)
print("STEP 2: BASELINE CHARACTERISTICS TABLE (TABLE 1) - PUBLICATION READY")
print("="*100)
print(f"UTC: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

df_int = RAW_DATA["df_internal"].copy()
df_ext = RAW_DATA["df_external"].copy()
TARGET = CONFIG["target_col"]

# ── 2.1 Helper functions
def compute_continuous_stats(df, var, target_col):
    """Median [IQR] for overall, survived, died + Mann-Whitney U test"""
    overall = df[var].dropna()
    survived = df[df[target_col]==0][var].dropna()
    died = df[df[target_col]==1][var].dropna()
    
    if len(overall) < 3:
        return "N/A", "N/A", "N/A", 1.0
    
    overall_stat = f"{overall.median():.1f} [{overall.quantile(0.25):.1f}-{overall.quantile(0.75):.1f}]"
    surv_stat = f"{survived.median():.1f} [{survived.quantile(0.25):.1f}-{survived.quantile(0.75):.1f}]" if len(survived)>0 else "N/A"
    died_stat = f"{died.median():.1f} [{died.quantile(0.25):.1f}-{died.quantile(0.75):.1f}]" if len(died)>0 else "N/A"
    
    try:
        if len(survived)>0 and len(died)>0:
            _, p_val = stats.mannwhitneyu(survived, died, alternative='two-sided')
        else:
            p_val = 1.0
    except:
        p_val = 1.0
    
    return overall_stat, surv_stat, died_stat, p_val

def compute_categorical_stats(df, var, target_col):
    """n(%) for overall, survived, died + chi-square test"""
    overall = df[var].dropna()
    survived = df[df[target_col]==0][var].dropna()
    died = df[df[target_col]==1][var].dropna()
    
    # Overall
    overall_n = int((overall==1).sum())
    overall_pct = overall_n / len(overall) * 100 if len(overall)>0 else 0
    overall_stat = f"{overall_n} ({overall_pct:.1f}%)"
    
    # Survived
    surv_n = int((survived==1).sum())
    surv_pct = surv_n / len(survived) * 100 if len(survived)>0 else 0
    surv_stat = f"{surv_n} ({surv_pct:.1f}%)"
    
    # Died
    died_n = int((died==1).sum())
    died_pct = died_n / len(died) * 100 if len(died)>0 else 0
    died_stat = f"{died_n} ({died_pct:.1f}%)"
    
    # Chi-square test
    try:
        contingency = pd.crosstab(df[target_col], df[var])
        _, p_val, _, _ = stats.chi2_contingency(contingency)
    except:
        p_val = 1.0
    
    return overall_stat, surv_stat, died_stat, p_val

# ── 2.2 Define key variables
table1_vars = {
    'Demographics': {
        'continuous': ['age', 'ICU_LOS'],
        'categorical': ['gender', 'STEMI', 'NSTEMI']
    },
    'Vital Signs': {
        'continuous': ['sbp', 'dbp', 'resp_rate'],
        'categorical': []
    },
    'Hematology': {
        'continuous': ['hemoglobin_min', 'platelet_count_min', 'wbc_count_max', 
                       'neutrophils_abs_max', 'lymphocytes_abs_min'],
        'categorical': []
    },
    'Renal Function': {
        'continuous': ['creatinine_max', 'eGFR_CKD_EPI_21'],
        'categorical': ['Renal_Insufficiency']
    },
    'Liver Function': {
        'continuous': ['ALT_max', 'AST_max', 'Total_Bilirubin_max'],
        'categorical': ['liver_insufficiency']
    },
    'Metabolic': {
        'continuous': ['glucose_max', 'potassium_max', 'sodium_min', 'lactate_max'],
        'categorical': []
    },
    'Comorbidities': {
        'continuous': [],
        'categorical': ['hx_hypertension', 'hx_cerebrovascular_disease', 
                        'hx_chronic_pulmonary_disease', 'hx_malignancy']
    },
    'Interventions': {
        'continuous': [],
        'categorical': ['pci_status', 'iabp_use', 'ecmo_use', 'underwent_CRRT', 
                        'invasive_ventilation', 'cardiogenic_shock']
    },
    'Arrhythmias': {
        'continuous': [],
        'categorical': ['atrial_fibrillation', 'ventricular_tachycardia', 'AV_block']
    },
    'Medications': {
        'continuous': [],
        'categorical': ['acei_use', 'arb_use', 'beta_blocker_use', 
                        'clopidogrel_use', 'ticagrelor_use']
    },
}

# ── 2.3 Build INTERNAL Table 1
n_total_int = len(df_int)
n_survived_int = int((df_int[TARGET]==0).sum())
n_died_int = int((df_int[TARGET]==1).sum())

print(f"🏥 INTERNAL COHORT (Tongji)")
print(f"   Total: {n_total_int} | Survived: {n_survived_int} | Died: {n_died_int}\n")

table1_int = []
for category, var_dict in table1_vars.items():
    table1_int.append(['', f"**{category}**", '', '', '', ''])
    
    for var in var_dict['continuous']:
        if var in df_int.columns:
            overall, surv, died, p = compute_continuous_stats(df_int, var, TARGET)
            p_str = f"{p:.3f}" if p >= 0.001 else "<0.001"
            sig = "*" if p < 0.05 else ""
            table1_int.append(['', f"  {var}", overall, surv, died, p_str + sig])
    
    for var in var_dict['categorical']:
        if var in df_int.columns:
            overall, surv, died, p = compute_categorical_stats(df_int, var, TARGET)
            p_str = f"{p:.3f}" if p >= 0.001 else "<0.001"
            sig = "*" if p < 0.05 else ""
            table1_int.append(['', f"  {var}, n (%)", overall, surv, died, p_str + sig])

table1_int_df = pd.DataFrame(table1_int, columns=[
    '', 'Variable', 
    f'Overall (N={n_total_int})', 
    f'Survived (n={n_survived_int})', 
    f'Died (n={n_died_int})', 
    'P-value'
])

print("📊 INTERNAL COHORT - Table 1 (sample):")
print(table1_int_df.head(25).to_string(index=False))

# ── 2.4 Build EXTERNAL Table 1
n_total_ext = len(df_ext)
n_survived_ext = int((df_ext[TARGET]==0).sum())
n_died_ext = int((df_ext[TARGET]==1).sum())

print(f"\n{'='*100}")
print(f"🏥 EXTERNAL COHORT (MIMIC-IV)")
print(f"   Total: {n_total_ext} | Survived: {n_survived_ext} | Died: {n_died_ext}\n")

table1_ext = []
for category, var_dict in table1_vars.items():
    table1_ext.append(['', f"**{category}**", '', '', '', ''])
    
    for var in var_dict['continuous']:
        if var in df_ext.columns:
            overall, surv, died, p = compute_continuous_stats(df_ext, var, TARGET)
            p_str = f"{p:.3f}" if p >= 0.001 else "<0.001"
            sig = "*" if p < 0.05 else ""
            table1_ext.append(['', f"  {var}", overall, surv, died, p_str + sig])
    
    for var in var_dict['categorical']:
        if var in df_ext.columns:
            overall, surv, died, p = compute_categorical_stats(df_ext, var, TARGET)
            p_str = f"{p:.3f}" if p >= 0.001 else "<0.001"
            sig = "*" if p < 0.05 else ""
            table1_ext.append(['', f"  {var}, n (%)", overall, surv, died, p_str + sig])

table1_ext_df = pd.DataFrame(table1_ext, columns=[
    '', 'Variable', 
    f'Overall (N={n_total_ext})', 
    f'Survived (n={n_survived_ext})', 
    f'Died (n={n_died_ext})', 
    'P-value'
])

print("📊 EXTERNAL COHORT - Table 1 (sample):")
print(table1_ext_df.head(25).to_string(index=False))

# ── 2.5 Save tables
create_table(table1_int_df, "step2_table1_internal_publication", 
             caption="Table 1. Baseline characteristics stratified by one-year mortality (Internal cohort, Tongji Hospital). Continuous variables: Median [IQR], Mann-Whitney U test. Categorical variables: n (%), Chi-square test. *P < 0.05.")

create_table(table1_ext_df, "step2_table1_external_publication",
             caption="Table 1. Baseline characteristics stratified by one-year mortality (External cohort, MIMIC-IV). Continuous variables: Median [IQR], Mann-Whitney U test. Categorical variables: n (%), Chi-square test. *P < 0.05.")

# ── 2.6 Log
append_runlog("2", {
    "analysis": "Publication-ready Table 1 (Overall + Survived + Died)",
    "statistical_tests": "Mann-Whitney U (continuous), Chi-square (categorical)",
    "internal": {"total": n_total_int, "survived": n_survived_int, "died": n_died_int},
    "external": {"total": n_total_ext, "survived": n_survived_ext, "died": n_died_ext},
})

TABLE1_DATA = {
    "internal_table": table1_int_df,
    "external_table": table1_ext_df,
}

print("\n💾 Stored: TABLE1_DATA (publication-ready format)")
print("\n" + "="*100)
print("✅ STEP 2 COMPLETE — PUBLICATION-READY TABLE 1 GENERATED")
print("="*100)

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 3: MISSINGNESS HEATMAP VISUALIZATION (Q1 Journal Quality)
# TRIPOD: 5c (Handling of missing data - visualization)
# ═══════════════════════════════════════════════════════════════════════════════

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

print("\n" + "="*100)
print("STEP 3: MISSINGNESS HEATMAP VISUALIZATION (Q1 JOURNAL QUALITY)")
print("="*100)
print(f"UTC: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

df_int = RAW_DATA["df_internal"].copy()
df_ext = RAW_DATA["df_external"].copy()
TARGET = CONFIG["target_col"]

# ── 3.0 Set Q1 Journal Style (Nature/Lancet/NEJM inspired)
plt.rcParams.update({
    'font.family': 'Arial',
    'font.size': 10,
    'axes.labelsize': 11,
    'axes.titlesize': 12,
    'axes.titleweight': 'bold',
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'legend.fontsize': 9,
    'figure.titlesize': 13,
    'figure.titleweight': 'bold',
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.linewidth': 1.2,
    'grid.alpha': 0.3,
    'grid.linestyle': '--',
})

# Q1 Color Palette (colorblind-friendly)
COLORS = {
    'primary': '#2C3E50',      # Dark blue-gray
    'secondary': '#E74C3C',    # Red
    'accent1': '#3498DB',      # Blue
    'accent2': '#F39C12',      # Orange
    'success': '#27AE60',      # Green
    'missing': '#E74C3C',      # Red for missing
    'present': '#ECF0F1',      # Light gray for present
    'grid': '#BDC3C7',         # Gray
}

# ── 3.1 Calculate missingness
miss_int = (df_int.isna().mean() * 100).sort_values(ascending=False)
miss_ext = (df_ext.isna().mean() * 100).sort_values(ascending=False)

print(f"📊 Missingness Summary:")
print(f"   Internal: {(miss_int > 0).sum()}/{len(miss_int)} features with missing data")
print(f"   External: {(miss_ext > 0).sum()}/{len(miss_ext)} features with missing data\n")

print("🔝 Top 10 most missing features (Internal):")
for feat, pct in miss_int.head(10).items():
    print(f"   {feat:30s}: {pct:5.1f}%")

print(f"\n🔝 Top 10 most missing features (External):")
for feat, pct in miss_ext.head(10).items():
    print(f"   {feat:30s}: {pct:5.1f}%")

# ── 3.2 FIGURE 1: Missingness Heatmap - INTERNAL
features_with_missing = miss_int[miss_int > 0].index.tolist()

if len(features_with_missing) > 0:
    fig, ax = plt.subplots(figsize=(14, 10), dpi=300)
    
    # Binary matrix (1=missing, 0=present)
    miss_matrix = df_int[features_with_missing].isna().astype(int)
    
    # Create heatmap with custom colormap
    from matplotlib.colors import ListedColormap
    cmap = ListedColormap([COLORS['present'], COLORS['missing']])
    
    sns.heatmap(miss_matrix.T, 
                cmap=cmap,
                cbar_kws={'label': 'Data Status', 'ticks': [0.25, 0.75], 
                          'shrink': 0.8, 'aspect': 30},
                yticklabels=features_with_missing,
                xticklabels=False,
                linewidths=0,
                ax=ax)
    
    # Customize colorbar labels
    cbar = ax.collections[0].colorbar
    cbar.ax.set_yticklabels(['Present', 'Missing'])
    cbar.ax.tick_params(labelsize=9)
    
    ax.set_title('A. Missingness Pattern: Internal Cohort (Tongji Hospital)', 
                 fontsize=13, fontweight='bold', pad=15, loc='left')
    ax.set_xlabel(f'Patients (N = {len(df_int):,})', fontsize=11, fontweight='bold')
    ax.set_ylabel('Clinical Features', fontsize=11, fontweight='bold')
    
    # Add text annotation
    ax.text(0.98, 0.02, f'{len(features_with_missing)} features with missing data', 
            transform=ax.transAxes, ha='right', va='bottom',
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.8, edgecolor=COLORS['primary']),
            fontsize=9)
    
    plt.tight_layout()
    save_figure(fig, 'step3_fig1_missingness_heatmap_internal')
    plt.show()
    
    print(f"\n✅ Figure 1A saved: Internal heatmap ({len(features_with_missing)} features)")
else:
    print("\n⚠️  No missing data in internal cohort")

# ── 3.3 FIGURE 1B: Missingness Heatmap - EXTERNAL
features_with_missing_ext = miss_ext[miss_ext > 0].index.tolist()

if len(features_with_missing_ext) > 0:
    fig, ax = plt.subplots(figsize=(14, 10), dpi=300)
    
    miss_matrix_ext = df_ext[features_with_missing_ext].isna().astype(int)
    
    sns.heatmap(miss_matrix_ext.T, 
                cmap=cmap,
                cbar_kws={'label': 'Data Status', 'ticks': [0.25, 0.75], 
                          'shrink': 0.8, 'aspect': 30},
                yticklabels=features_with_missing_ext,
                xticklabels=False,
                linewidths=0,
                ax=ax)
    
    cbar = ax.collections[0].colorbar
    cbar.ax.set_yticklabels(['Present', 'Missing'])
    cbar.ax.tick_params(labelsize=9)
    
    ax.set_title('B. Missingness Pattern: External Cohort (MIMIC-IV)', 
                 fontsize=13, fontweight='bold', pad=15, loc='left')
    ax.set_xlabel(f'Patients (N = {len(df_ext):,})', fontsize=11, fontweight='bold')
    ax.set_ylabel('Clinical Features', fontsize=11, fontweight='bold')
    
    ax.text(0.98, 0.02, f'{len(features_with_missing_ext)} features with missing data', 
            transform=ax.transAxes, ha='right', va='bottom',
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.8, edgecolor=COLORS['primary']),
            fontsize=9)
    
    plt.tight_layout()
    save_figure(fig, 'step3_fig1_missingness_heatmap_external')
    plt.show()
    
    print(f"✅ Figure 1B saved: External heatmap ({len(features_with_missing_ext)} features)")
else:
    print("⚠️  No missing data in external cohort")

# ── 3.4 FIGURE 2: Missingness Comparison (Top 20)
fig, ax = plt.subplots(figsize=(10, 8), dpi=300)

# Top 20 features
all_missing = pd.DataFrame({
    'Internal': miss_int,
    'External': miss_ext
})
all_missing['Max'] = all_missing.max(axis=1)
top20 = all_missing.nlargest(20, 'Max')[['Internal', 'External']]

# Horizontal bar plot
x = np.arange(len(top20))
width = 0.35

bars1 = ax.barh(x - width/2, top20['Internal'], width, 
                label='Internal (Tongji)', color=COLORS['accent1'], 
                edgecolor='white', linewidth=1.5, alpha=0.9)
bars2 = ax.barh(x + width/2, top20['External'], width, 
                label='External (MIMIC-IV)', color=COLORS['accent2'], 
                edgecolor='white', linewidth=1.5, alpha=0.9)

# Add 10% threshold line
ax.axvline(x=10, color=COLORS['secondary'], linestyle='--', linewidth=2, 
           label='10% exclusion threshold', zorder=0)

# Styling
ax.set_yticks(x)
ax.set_yticklabels(top20.index, fontsize=9)
ax.set_xlabel('Missing Data (%)', fontsize=11, fontweight='bold')
ax.set_ylabel('Clinical Features', fontsize=11, fontweight='bold')
ax.set_title('Figure 2. Top 20 Features with Highest Missing Data', 
             fontsize=13, fontweight='bold', pad=15, loc='left')
ax.legend(loc='lower right', frameon=True, fancybox=True, shadow=True, fontsize=9)
ax.grid(axis='x', alpha=0.3, linestyle='--', linewidth=0.8)
ax.set_xlim(0, max(top20.max()) * 1.1)

# Add value labels on bars
for bars in [bars1, bars2]:
    for bar in bars:
        width_val = bar.get_width()
        if width_val > 2:  # Only show if >2%
            ax.text(width_val + 1, bar.get_y() + bar.get_height()/2, 
                   f'{width_val:.1f}%', ha='left', va='center', fontsize=7, color=COLORS['primary'])

plt.tight_layout()
save_figure(fig, 'step3_fig2_missingness_comparison_top20')
plt.show()

print(f"\n✅ Figure 2 saved: Comparison plot (top 20 features)")

# ── 3.5 Save detailed report
miss_report = pd.DataFrame({
    'Feature': df_int.columns,
    'Internal_%_Missing': miss_int.reindex(df_int.columns).values,
    'External_%_Missing': miss_ext.reindex(df_ext.columns).values,
    'Max_%_Missing': all_missing.reindex(df_int.columns)['Max'].values
}).sort_values('Max_%_Missing', ascending=False)

save_csv(miss_report, 'step3_missingness_detailed_report')

# ── 3.6 Log
append_runlog("3", {
    "analysis": "Missingness visualization (Q1 journal quality, ALL 88 features)",
    "style": "Nature/Lancet/NEJM inspired",
    "internal_features_with_missing": len(features_with_missing),
    "external_features_with_missing": len(features_with_missing_ext),
    "figures_generated": 3,
})

MISSINGNESS_DATA = {
    "internal_missing_pct": miss_int,
    "external_missing_pct": miss_ext,
    "missingness_report": miss_report,
    "plot_colors": COLORS,
}

print("\n💾 Stored: MISSINGNESS_DATA + Q1 plot styling (will be used throughout)")
print("\n" + "="*100)
print("✅ STEP 3 COMPLETE — Q1 JOURNAL-QUALITY FIGURES GENERATED")
print("="*100)

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 4: DISTRIBUTION PLOTS - ENHANCED QUALITY
# TRIPOD: 7a (Descriptive statistics), Visual inspection of data
# ═══════════════════════════════════════════════════════════════════════════════

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats
from matplotlib.patches import Rectangle

print("\n" + "="*100)
print("STEP 4: DISTRIBUTION PLOTS - ENHANCED VISUALIZATION")
print("="*100)
print(f"UTC: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

df_int = RAW_DATA["df_internal"].copy()
df_ext = RAW_DATA["df_external"].copy()
TARGET = CONFIG["target_col"]

# ── 4.0 Enhanced Style
plt.rcParams.update({
    'font.family': 'sans-serif',
    'font.sans-serif': ['Arial', 'Helvetica', 'DejaVu Sans'],
    'font.size': 11,
    'axes.labelsize': 12,
    'axes.titlesize': 13,
    'axes.titleweight': 'bold',
    'axes.labelweight': 'bold',
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'legend.title_fontsize': 11,
    'figure.titlesize': 15,
    'figure.titleweight': 'bold',
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.linewidth': 1.5,
    'xtick.major.width': 1.5,
    'ytick.major.width': 1.5,
    'xtick.major.size': 5,
    'ytick.major.size': 5,
    'grid.alpha': 0.25,
    'grid.linestyle': '--',
    'grid.linewidth': 0.8,
})

# Enhanced color palette
COLORS_ENHANCED = {
    'survived': '#3498DB',      # Professional blue
    'died': '#E74C3C',          # Clinical red
    'survived_light': '#5DADE2', # Light blue
    'died_light': '#EC7063',    # Light red
    'primary': '#2C3E50',       # Dark slate
    'secondary': '#95A5A6',     # Gray
    'grid': '#BDC3C7',          # Light gray
    'sig': '#27AE60',           # Green for significance
    'ns': '#95A5A6',            # Gray for non-significant
}

# ── 4.1 Select key variables by clinical category
key_vars_organized = {
    'Demographics': ['age', 'ICU_LOS'],
    'Hemodynamics': ['sbp', 'resp_rate'],
    'Hematology': ['hemoglobin_min', 'platelet_count_min', 'wbc_count_max'],
    'Renal': ['creatinine_max', 'eGFR_CKD_EPI_21'],
    'Metabolic': ['glucose_max', 'lactate_max', 'potassium_max'],
}

all_vars = [v for vars_list in key_vars_organized.values() for v in vars_list]
print(f"📊 Visualizing {len(all_vars)} key variables across {len(key_vars_organized)} categories\n")

# ── 4.2 FIGURE 3: Distribution - INTERNAL COHORT
fig = plt.figure(figsize=(18, 14), dpi=300)
gs = fig.add_gridspec(4, 3, hspace=0.35, wspace=0.3, 
                      left=0.08, right=0.95, top=0.94, bottom=0.06)

plot_idx = 0
for category, vars_list in key_vars_organized.items():
    for var in vars_list:
        if plot_idx >= 12:
            break
            
        row = plot_idx // 3
        col = plot_idx % 3
        ax = fig.add_subplot(gs[row, col])
        
        if var in df_int.columns:
            # Get data
            survived = df_int[df_int[TARGET]==0][var].dropna()
            died = df_int[df_int[TARGET]==1][var].dropna()
            
            if len(survived) > 0 and len(died) > 0:
                # Plot violins
                parts_surv = ax.violinplot([survived], positions=[1], 
                                          widths=0.7, showmeans=False, 
                                          showmedians=False, showextrema=False)
                parts_died = ax.violinplot([died], positions=[2], 
                                          widths=0.7, showmeans=False, 
                                          showmedians=False, showextrema=False)
                
                # Style violins
                for pc in parts_surv['bodies']:
                    pc.set_facecolor(COLORS_ENHANCED['survived'])
                    pc.set_edgecolor(COLORS_ENHANCED['survived'])
                    pc.set_alpha(0.7)
                    pc.set_linewidth(1.5)
                
                for pc in parts_died['bodies']:
                    pc.set_facecolor(COLORS_ENHANCED['died'])
                    pc.set_edgecolor(COLORS_ENHANCED['died'])
                    pc.set_alpha(0.7)
                    pc.set_linewidth(1.5)
                
                # Add box plots
                bp = ax.boxplot([survived, died], positions=[1, 2], 
                               widths=0.25, patch_artist=True,
                               showfliers=False,
                               boxprops=dict(facecolor='white', edgecolor=COLORS_ENHANCED['primary'], 
                                           linewidth=2, alpha=0.9),
                               whiskerprops=dict(color=COLORS_ENHANCED['primary'], linewidth=1.5),
                               capprops=dict(color=COLORS_ENHANCED['primary'], linewidth=1.5),
                               medianprops=dict(color='black', linewidth=2.5))
                
                # Statistical test
                _, p_val = stats.mannwhitneyu(survived, died, alternative='two-sided')
                
                # Format p-value
                if p_val < 0.001:
                    p_text = "P < 0.001"
                    sig_marker = "***"
                    sig_color = COLORS_ENHANCED['sig']
                elif p_val < 0.01:
                    p_text = f"P = {p_val:.3f}"
                    sig_marker = "**"
                    sig_color = COLORS_ENHANCED['sig']
                elif p_val < 0.05:
                    p_text = f"P = {p_val:.3f}"
                    sig_marker = "*"
                    sig_color = COLORS_ENHANCED['sig']
                else:
                    p_text = f"P = {p_val:.3f}"
                    sig_marker = "ns"
                    sig_color = COLORS_ENHANCED['ns']
                
                # Add significance bar
                y_max = max(survived.max(), died.max())
                y_min = min(survived.min(), died.min())
                y_range = y_max - y_min
                sig_y = y_max + y_range * 0.05
                
                if p_val < 0.05:
                    ax.plot([1, 2], [sig_y, sig_y], color=sig_color, linewidth=2)
                    ax.plot([1, 1], [sig_y - y_range*0.02, sig_y], color=sig_color, linewidth=2)
                    ax.plot([2, 2], [sig_y - y_range*0.02, sig_y], color=sig_color, linewidth=2)
                
                # Add p-value annotation
                ax.text(1.5, sig_y + y_range*0.03, f"{p_text}\n{sig_marker}", 
                       ha='center', va='bottom', fontsize=9, fontweight='bold',
                       color=sig_color)
                
                # Add median values
                ax.text(1, y_min - y_range*0.12, 
                       f'Median: {survived.median():.1f}\nn = {len(survived)}',
                       ha='center', va='top', fontsize=8, 
                       color=COLORS_ENHANCED['survived'], fontweight='bold')
                ax.text(2, y_min - y_range*0.12, 
                       f'Median: {died.median():.1f}\nn = {len(died)}',
                       ha='center', va='top', fontsize=8, 
                       color=COLORS_ENHANCED['died'], fontweight='bold')
                
                # Styling
                ax.set_xlim(0.4, 2.6)
                ax.set_ylim(y_min - y_range*0.22, y_max + y_range*0.15)
                ax.set_xticks([1, 2])
                ax.set_xticklabels(['Survived', 'Died'], fontsize=11, fontweight='bold')
                ax.set_ylabel(var.replace('_', ' ').title(), 
                            fontsize=11, fontweight='bold')
                ax.grid(axis='y', alpha=0.25, linestyle='--', linewidth=0.8)
                
                # Add category label
                ax.text(0.02, 0.98, category, transform=ax.transAxes,
                       fontsize=8, va='top', ha='left', style='italic',
                       color=COLORS_ENHANCED['secondary'],
                       bbox=dict(boxstyle='round,pad=0.3', facecolor='white', 
                                edgecolor=COLORS_ENHANCED['grid'], alpha=0.7))
            else:
                ax.text(0.5, 0.5, 'Insufficient Data', 
                       transform=ax.transAxes, ha='center', va='center',
                       fontsize=10, color='gray', style='italic')
                ax.axis('off')
        else:
            ax.text(0.5, 0.5, f'{var}\nNot Available', 
                   transform=ax.transAxes, ha='center', va='center',
                   fontsize=10, color='gray', style='italic')
            ax.axis('off')
        
        plot_idx += 1

# Add main title
fig.suptitle('Figure 3. Distribution of Key Clinical Variables by One-Year Mortality Status\nInternal Cohort (Tongji Hospital, n=476)', 
             fontsize=15, fontweight='bold', y=0.98)

# Add legend
legend_elements = [
    plt.Line2D([0], [0], color=COLORS_ENHANCED['survived'], lw=8, label='Survived'),
    plt.Line2D([0], [0], color=COLORS_ENHANCED['died'], lw=8, label='Died'),
]
fig.legend(handles=legend_elements, loc='upper right', 
          bbox_to_anchor=(0.98, 0.97), frameon=True, 
          fancybox=True, shadow=True, fontsize=11)

save_figure(fig, 'step4_fig3_distributions_internal')
plt.show()

print(f"✅ Figure 3 saved: Internal cohort distributions")

# ── 4.3 FIGURE 4: Distribution - EXTERNAL COHORT
fig = plt.figure(figsize=(18, 14), dpi=300)
gs = fig.add_gridspec(4, 3, hspace=0.35, wspace=0.3, 
                      left=0.08, right=0.95, top=0.94, bottom=0.06)

plot_idx = 0
for category, vars_list in key_vars_organized.items():
    for var in vars_list:
        if plot_idx >= 12:
            break
            
        row = plot_idx // 3
        col = plot_idx % 3
        ax = fig.add_subplot(gs[row, col])
        
        if var in df_ext.columns:
            survived = df_ext[df_ext[TARGET]==0][var].dropna()
            died = df_ext[df_ext[TARGET]==1][var].dropna()
            
            if len(survived) > 0 and len(died) > 0:
                # Violins
                parts_surv = ax.violinplot([survived], positions=[1], 
                                          widths=0.7, showmeans=False, 
                                          showmedians=False, showextrema=False)
                parts_died = ax.violinplot([died], positions=[2], 
                                          widths=0.7, showmeans=False, 
                                          showmedians=False, showextrema=False)
                
                for pc in parts_surv['bodies']:
                    pc.set_facecolor(COLORS_ENHANCED['survived'])
                    pc.set_edgecolor(COLORS_ENHANCED['survived'])
                    pc.set_alpha(0.7)
                    pc.set_linewidth(1.5)
                
                for pc in parts_died['bodies']:
                    pc.set_facecolor(COLORS_ENHANCED['died'])
                    pc.set_edgecolor(COLORS_ENHANCED['died'])
                    pc.set_alpha(0.7)
                    pc.set_linewidth(1.5)
                
                # Box plots
                bp = ax.boxplot([survived, died], positions=[1, 2], 
                               widths=0.25, patch_artist=True,
                               showfliers=False,
                               boxprops=dict(facecolor='white', edgecolor=COLORS_ENHANCED['primary'], 
                                           linewidth=2, alpha=0.9),
                               whiskerprops=dict(color=COLORS_ENHANCED['primary'], linewidth=1.5),
                               capprops=dict(color=COLORS_ENHANCED['primary'], linewidth=1.5),
                               medianprops=dict(color='black', linewidth=2.5))
                
                # Statistics
                _, p_val = stats.mannwhitneyu(survived, died, alternative='two-sided')
                
                if p_val < 0.001:
                    p_text = "P < 0.001"
                    sig_marker = "***"
                    sig_color = COLORS_ENHANCED['sig']
                elif p_val < 0.01:
                    p_text = f"P = {p_val:.3f}"
                    sig_marker = "**"
                    sig_color = COLORS_ENHANCED['sig']
                elif p_val < 0.05:
                    p_text = f"P = {p_val:.3f}"
                    sig_marker = "*"
                    sig_color = COLORS_ENHANCED['sig']
                else:
                    p_text = f"P = {p_val:.3f}"
                    sig_marker = "ns"
                    sig_color = COLORS_ENHANCED['ns']
                
                y_max = max(survived.max(), died.max())
                y_min = min(survived.min(), died.min())
                y_range = y_max - y_min
                sig_y = y_max + y_range * 0.05
                
                if p_val < 0.05:
                    ax.plot([1, 2], [sig_y, sig_y], color=sig_color, linewidth=2)
                    ax.plot([1, 1], [sig_y - y_range*0.02, sig_y], color=sig_color, linewidth=2)
                    ax.plot([2, 2], [sig_y - y_range*0.02, sig_y], color=sig_color, linewidth=2)
                
                ax.text(1.5, sig_y + y_range*0.03, f"{p_text}\n{sig_marker}", 
                       ha='center', va='bottom', fontsize=9, fontweight='bold',
                       color=sig_color)
                
                ax.text(1, y_min - y_range*0.12, 
                       f'Median: {survived.median():.1f}\nn = {len(survived)}',
                       ha='center', va='top', fontsize=8, 
                       color=COLORS_ENHANCED['survived'], fontweight='bold')
                ax.text(2, y_min - y_range*0.12, 
                       f'Median: {died.median():.1f}\nn = {len(died)}',
                       ha='center', va='top', fontsize=8, 
                       color=COLORS_ENHANCED['died'], fontweight='bold')
                
                ax.set_xlim(0.4, 2.6)
                ax.set_ylim(y_min - y_range*0.22, y_max + y_range*0.15)
                ax.set_xticks([1, 2])
                ax.set_xticklabels(['Survived', 'Died'], fontsize=11, fontweight='bold')
                ax.set_ylabel(var.replace('_', ' ').title(), 
                            fontsize=11, fontweight='bold')
                ax.grid(axis='y', alpha=0.25, linestyle='--', linewidth=0.8)
                
                ax.text(0.02, 0.98, category, transform=ax.transAxes,
                       fontsize=8, va='top', ha='left', style='italic',
                       color=COLORS_ENHANCED['secondary'],
                       bbox=dict(boxstyle='round,pad=0.3', facecolor='white', 
                                edgecolor=COLORS_ENHANCED['grid'], alpha=0.7))
            else:
                ax.text(0.5, 0.5, 'Insufficient Data', 
                       transform=ax.transAxes, ha='center', va='center',
                       fontsize=10, color='gray', style='italic')
                ax.axis('off')
        else:
            ax.text(0.5, 0.5, f'{var}\nNot Available', 
                   transform=ax.transAxes, ha='center', va='center',
                   fontsize=10, color='gray', style='italic')
            ax.axis('off')
        
        plot_idx += 1

fig.suptitle('Figure 4. Distribution of Key Clinical Variables by One-Year Mortality Status\nExternal Cohort (MIMIC-IV, n=354)', 
             fontsize=15, fontweight='bold', y=0.98)

fig.legend(handles=legend_elements, loc='upper right', 
          bbox_to_anchor=(0.98, 0.97), frameon=True, 
          fancybox=True, shadow=True, fontsize=11)

save_figure(fig, 'step4_fig4_distributions_external')
plt.show()

print(f"✅ Figure 4 saved: External cohort distributions")

# ── 4.4 Log
append_runlog("4", {
    "analysis": "Distribution plots with significance testing",
    "variables_visualized": len(all_vars),
    "statistical_test": "Mann-Whitney U test",
    "significance_levels": "*** p<0.001, ** p<0.01, * p<0.05, ns p≥0.05",
})

DISTRIBUTION_DATA = {
    "key_continuous_vars": all_vars,
    "colors_enhanced": COLORS_ENHANCED,
}

print("\n💾 Stored: DISTRIBUTION_DATA")
print("\n" + "="*100)
print("✅ STEP 4 COMPLETE — DISTRIBUTION FIGURES GENERATED")
print("="*100)

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 5: UNIVARIATE ANALYSIS - FEATURE-OUTCOME ASSOCIATIONS
# TRIPOD: 10a (Univariate associations), 14a (Model specification)
# ═══════════════════════════════════════════════════════════════════════════════

import pandas as pd
import numpy as np
from scipy import stats
from sklearn.metrics import roc_auc_score

print("\n" + "="*100)
print("STEP 5: UNIVARIATE ANALYSIS - FEATURE-OUTCOME ASSOCIATIONS")
print("="*100)
print(f"UTC: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"User: zainzampawala786-sudo\n")

df_int = RAW_DATA["df_internal"].copy()
df_ext = RAW_DATA["df_external"].copy()
TARGET = CONFIG["target_col"]

# ── 5.1 Helper functions
def compute_univariate_continuous(df, feature, target):
    """Univariate analysis for continuous variables"""
    data = df[[feature, target]].dropna()
    
    if len(data) < 10:
        return {
            'feature': feature,
            'type': 'continuous',
            'n': len(data),
            'test': 'Mann-Whitney U',
            'statistic': np.nan,
            'p_value': np.nan,
            'auc': np.nan,
            'median_survived': np.nan,
            'median_died': np.nan,
            'effect_size': np.nan,
        }
    
    survived = data[data[target]==0][feature]
    died = data[data[target]==1][feature]
    
    # Mann-Whitney U test
    try:
        stat, p_val = stats.mannwhitneyu(survived, died, alternative='two-sided')
    except:
        stat, p_val = np.nan, np.nan
    
    # AUC (discriminative ability)
    try:
        auc = roc_auc_score(data[target], data[feature])
    except:
        auc = np.nan
    
    # Effect size (Cohen's d approximation)
    try:
        pooled_std = np.sqrt((survived.std()**2 + died.std()**2) / 2)
        effect_size = (died.median() - survived.median()) / pooled_std if pooled_std > 0 else 0
    except:
        effect_size = np.nan
    
    return {
        'feature': feature,
        'type': 'continuous',
        'n': len(data),
        'test': 'Mann-Whitney U',
        'statistic': stat,
        'p_value': p_val,
        'auc': auc,
        'median_survived': survived.median(),
        'median_died': died.median(),
        'effect_size': effect_size,
    }

def compute_univariate_categorical(df, feature, target):
    """Univariate analysis for categorical variables"""
    data = df[[feature, target]].dropna()
    
    if len(data) < 10:
        return {
            'feature': feature,
            'type': 'categorical',
            'n': len(data),
            'test': 'Chi-square',
            'statistic': np.nan,
            'p_value': np.nan,
            'auc': np.nan,
            'pct_survived': np.nan,
            'pct_died': np.nan,
            'odds_ratio': np.nan,
        }
    
    # Chi-square test
    try:
        contingency = pd.crosstab(data[target], data[feature])
        stat, p_val, _, _ = stats.chi2_contingency(contingency)
    except:
        stat, p_val = np.nan, np.nan
    
    # AUC
    try:
        auc = roc_auc_score(data[target], data[feature])
    except:
        auc = np.nan
    
    # Percentages
    survived = data[data[target]==0][feature]
    died = data[data[target]==1][feature]
    pct_surv = (survived==1).sum() / len(survived) * 100 if len(survived) > 0 else 0
    pct_died = (died==1).sum() / len(died) * 100 if len(died) > 0 else 0
    
    # Odds ratio
    try:
        a = (died==1).sum()  # died with feature
        b = (died==0).sum()  # died without feature
        c = (survived==1).sum()  # survived with feature
        d = (survived==0).sum()  # survived without feature
        odds_ratio = (a * d) / (b * c) if (b * c) > 0 else np.nan
    except:
        odds_ratio = np.nan
    
    return {
        'feature': feature,
        'type': 'categorical',
        'n': len(data),
        'test': 'Chi-square',
        'statistic': stat,
        'p_value': p_val,
        'auc': auc,
        'pct_survived': pct_surv,
        'pct_died': pct_died,
        'odds_ratio': odds_ratio,
    }

# ── 5.2 Identify continuous vs categorical features
continuous_features = df_int.select_dtypes(include=[np.number]).columns.tolist()
continuous_features.remove(TARGET)

# Identify binary/categorical (unique values <= 10)
categorical_features = []
for col in continuous_features[:]:
    if df_int[col].nunique() <= 10:
        categorical_features.append(col)
        continuous_features.remove(col)

print(f"📊 Feature Classification:")
print(f"   Continuous: {len(continuous_features)} features")
print(f"   Categorical: {len(categorical_features)} features")
print(f"   Total: {len(continuous_features) + len(categorical_features)} features\n")

# ── 5.3 Univariate analysis - INTERNAL cohort
print("🔍 Running univariate analysis: INTERNAL cohort...")

results_int = []

# Continuous
for feat in continuous_features:
    if feat in df_int.columns:
        result = compute_univariate_continuous(df_int, feat, TARGET)
        results_int.append(result)

# Categorical
for feat in categorical_features:
    if feat in df_int.columns:
        result = compute_univariate_categorical(df_int, feat, TARGET)
        results_int.append(result)

# Convert to DataFrame
univariate_int_df = pd.DataFrame(results_int)
univariate_int_df['significant'] = univariate_int_df['p_value'] < 0.05
univariate_int_df = univariate_int_df.sort_values('p_value')

# Add effect interpretation
def interpret_auc(auc):
    if pd.isna(auc):
        return 'N/A'
    elif auc < 0.5:
        return 'Poor (inverse)'
    elif auc < 0.6:
        return 'Poor'
    elif auc < 0.7:
        return 'Fair'
    elif auc < 0.8:
        return 'Good'
    else:
        return 'Excellent'

univariate_int_df['auc_interpretation'] = univariate_int_df['auc'].apply(interpret_auc)

print(f"   ✅ Analyzed {len(univariate_int_df)} features")
print(f"   ✅ Significant (p<0.05): {univariate_int_df['significant'].sum()} features\n")

# Display top 20 most significant
print("📈 Top 20 Most Significant Features (Internal):")
display_cols = ['feature', 'type', 'n', 'p_value', 'auc', 'auc_interpretation']
print(univariate_int_df[display_cols].head(20).to_string(index=False))

# ── 5.4 Univariate analysis - EXTERNAL cohort
print(f"\n{'='*100}")
print("🔍 Running univariate analysis: EXTERNAL cohort...")

results_ext = []

# Continuous
for feat in continuous_features:
    if feat in df_ext.columns:
        result = compute_univariate_continuous(df_ext, feat, TARGET)
        results_ext.append(result)

# Categorical
for feat in categorical_features:
    if feat in df_ext.columns:
        result = compute_univariate_categorical(df_ext, feat, TARGET)
        results_ext.append(result)

# Convert to DataFrame
univariate_ext_df = pd.DataFrame(results_ext)
univariate_ext_df['significant'] = univariate_ext_df['p_value'] < 0.05
univariate_ext_df = univariate_ext_df.sort_values('p_value')
univariate_ext_df['auc_interpretation'] = univariate_ext_df['auc'].apply(interpret_auc)

print(f"   ✅ Analyzed {len(univariate_ext_df)} features")
print(f"   ✅ Significant (p<0.05): {univariate_ext_df['significant'].sum()} features\n")

# Display top 20
print("📈 Top 20 Most Significant Features (External):")
print(univariate_ext_df[display_cols].head(20).to_string(index=False))

# ── 5.5 Save results
save_csv(univariate_int_df, 'step5_univariate_analysis_internal')
save_csv(univariate_ext_df, 'step5_univariate_analysis_external')

# ── 5.6 Summary comparison
common_sig_int = set(univariate_int_df[univariate_int_df['significant']]['feature'])
common_sig_ext = set(univariate_ext_df[univariate_ext_df['significant']]['feature'])
common_sig_both = common_sig_int & common_sig_ext

print(f"\n{'='*100}")
print(f"📊 Univariate Significance Summary:")
print(f"   Internal significant: {len(common_sig_int)} features")
print(f"   External significant: {len(common_sig_ext)} features")
print(f"   Significant in BOTH cohorts: {len(common_sig_both)} features")

if len(common_sig_both) > 0:
    print(f"\n   Features significant in BOTH cohorts:")
    for feat in sorted(list(common_sig_both))[:15]:
        print(f"      • {feat}")

# ── 5.7 Log
append_runlog("5", {
    "analysis": "Univariate feature-outcome associations",
    "internal": {
        "features_analyzed": len(univariate_int_df),
        "significant_p005": int(univariate_int_df['significant'].sum()),
        "continuous": len(continuous_features),
        "categorical": len(categorical_features),
    },
    "external": {
        "features_analyzed": len(univariate_ext_df),
        "significant_p005": int(univariate_ext_df['significant'].sum()),
    },
    "common_significant_both": len(common_sig_both),
})

UNIVARIATE_DATA = {
    "internal_results": univariate_int_df,
    "external_results": univariate_ext_df,
    "continuous_features": continuous_features,
    "categorical_features": categorical_features,
    "common_significant_both": list(common_sig_both),
}

print("\n💾 Stored: UNIVARIATE_DATA")
print("\n" + "="*100)
print("✅ STEP 5 COMPLETE — UNIVARIATE ANALYSIS FINISHED")
print("="*100)

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 6: FEATURE RETENTION & DROPPING
# TRIPOD: 5c (Handling of missing data - exclusions)
# ═══════════════════════════════════════════════════════════════════════════════

print("\n" + "="*100)
print("STEP 6: FEATURE RETENTION & DROPPING")
print("="*100)
print(f"UTC: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"User: zainzampawala786-sudo\n")

df_int = RAW_DATA["df_internal"].copy()
df_ext = RAW_DATA["df_external"].copy()
TARGET = CONFIG["target_col"]
THRESHOLD = CONFIG["missing_threshold"]

miss_int = MISSINGNESS_DATA["internal_missing_pct"]
miss_ext = MISSINGNESS_DATA["external_missing_pct"]

# ── 6.1 Define protected features (clinically critical)
PROTECTED_FEATURES = [
    'lactate_min', 'lactate_max',      # Strong mortality predictor
    'spo2_min', 'spo2_max',            # Hypoxia marker
    'pco2_min', 'pco2_max',            # Respiratory failure
    'po2_min', 'po2_max',              # Oxygenation status
]

print(f"🛡️  Protected Features (keep despite >{THRESHOLD}% missing):")
for feat in PROTECTED_FEATURES:
    int_miss = miss_int.get(feat, 0)
    ext_miss = miss_ext.get(feat, 0)
    print(f"   • {feat:25s} - Internal: {int_miss:5.1f}%, External: {ext_miss:5.1f}%")

# ── 6.2 Identify features to drop
features_to_drop_int = []
features_to_drop_ext = []

print(f"\n🗑️  Features to DROP (>{THRESHOLD}% missing, NOT protected):\n")

# Internal cohort
print("   INTERNAL COHORT:")
for feat in miss_int.index:
    if feat == TARGET:
        continue
    if miss_int[feat] > THRESHOLD and feat not in PROTECTED_FEATURES:
        features_to_drop_int.append(feat)
        print(f"      • {feat:25s} - {miss_int[feat]:5.1f}% missing")

# External cohort
print(f"\n   EXTERNAL COHORT:")
for feat in miss_ext.index:
    if feat == TARGET:
        continue
    if miss_ext[feat] > THRESHOLD and feat not in PROTECTED_FEATURES:
        features_to_drop_ext.append(feat)
        print(f"      • {feat:25s} - {miss_ext[feat]:5.1f}% missing")

# ── 6.3 Additional drops (non-prognostic)
ADDITIONAL_DROPS = ['weight', 'height']  # Not clinically prognostic in AMI mortality

print(f"\n🗑️  Additional drops (non-prognostic features):")
for feat in ADDITIONAL_DROPS:
    if feat in df_int.columns:
        print(f"   • {feat:25s} - Internal: {miss_int.get(feat, 0):5.1f}% missing")
    if feat in df_ext.columns:
        print(f"   • {feat:25s} - External: {miss_ext.get(feat, 0):5.1f}% missing")

features_to_drop_int.extend([f for f in ADDITIONAL_DROPS if f in df_int.columns])
features_to_drop_ext.extend([f for f in ADDITIONAL_DROPS if f in df_ext.columns])

# Remove duplicates
features_to_drop_int = list(set(features_to_drop_int))
features_to_drop_ext = list(set(features_to_drop_ext))

# ── 6.4 Drop features
print(f"\n{'='*100}")
print(f"📊 Feature Retention Summary:\n")

print(f"   INTERNAL COHORT:")
print(f"      Original features:   {len(df_int.columns)}")
print(f"      Features to drop:    {len(features_to_drop_int)}")
print(f"      Retained features:   {len(df_int.columns) - len(features_to_drop_int)}")

df_int_clean = df_int.drop(columns=features_to_drop_int, errors='ignore')

print(f"\n   EXTERNAL COHORT:")
print(f"      Original features:   {len(df_ext.columns)}")
print(f"      Features to drop:    {len(features_to_drop_ext)}")
print(f"      Retained features:   {len(df_ext.columns) - len(features_to_drop_ext)}")

df_ext_clean = df_ext.drop(columns=features_to_drop_ext, errors='ignore')

# ── 6.5 Align features (use common features only)
common_features = list(set(df_int_clean.columns) & set(df_ext_clean.columns))
common_features = sorted([f for f in common_features if f != TARGET] + [TARGET])

print(f"\n   ALIGNED FEATURES:")
print(f"      Common features:     {len(common_features)}")

df_int_aligned = df_int_clean[common_features]
df_ext_aligned = df_ext_clean[common_features]

# ── 6.6 Final missingness check
miss_int_clean = (df_int_aligned.isna().mean() * 100).sort_values(ascending=False)
miss_ext_clean = (df_ext_aligned.isna().mean() * 100).sort_values(ascending=False)

print(f"\n📉 Missingness After Cleaning:\n")
print(f"   INTERNAL: Max missing = {miss_int_clean.max():.1f}% ({miss_int_clean.idxmax()})")
print(f"   EXTERNAL: Max missing = {miss_ext_clean.max():.1f}% ({miss_ext_clean.idxmax()})")

# Features still with >10% (should only be protected)
high_miss_int = miss_int_clean[miss_int_clean > THRESHOLD]
high_miss_ext = miss_ext_clean[miss_ext_clean > THRESHOLD]

if len(high_miss_int) > 0:
    print(f"\n   Features with >{THRESHOLD}% missing (INTERNAL - should be PROTECTED only):")
    for feat, pct in high_miss_int.items():
        status = "✅ PROTECTED" if feat in PROTECTED_FEATURES else "⚠️  UNEXPECTED"
        print(f"      • {feat:25s}: {pct:5.1f}% - {status}")

if len(high_miss_ext) > 0:
    print(f"\n   Features with >{THRESHOLD}% missing (EXTERNAL - should be PROTECTED only):")
    for feat, pct in high_miss_ext.items():
        status = "✅ PROTECTED" if feat in PROTECTED_FEATURES else "⚠️  UNEXPECTED"
        print(f"      • {feat:25s}: {pct:5.1f}% - {status}")

# ── 6.7 Save cleaned data
save_pickle(df_int_aligned, 'step6_df_internal_cleaned')
save_pickle(df_ext_aligned, 'step6_df_external_cleaned')

# Save dropped features list
dropped_features_summary = pd.DataFrame({
    'Feature': list(set(features_to_drop_int + features_to_drop_ext)),
    'Dropped_from_Internal': [f in features_to_drop_int for f in set(features_to_drop_int + features_to_drop_ext)],
    'Dropped_from_External': [f in features_to_drop_ext for f in set(features_to_drop_int + features_to_drop_ext)],
    'Internal_Missing_%': [miss_int.get(f, 0) for f in set(features_to_drop_int + features_to_drop_ext)],
    'External_Missing_%': [miss_ext.get(f, 0) for f in set(features_to_drop_int + features_to_drop_ext)],
})
save_csv(dropped_features_summary, 'step6_dropped_features_summary')

# ── 6.8 Log
append_runlog("6", {
    "analysis": "Feature retention and dropping based on missingness",
    "threshold": f"{THRESHOLD}%",
    "protected_features": len(PROTECTED_FEATURES),
    "internal": {
        "original": len(df_int.columns),
        "dropped": len(features_to_drop_int),
        "retained": len(df_int_aligned.columns),
    },
    "external": {
        "original": len(df_ext.columns),
        "dropped": len(features_to_drop_ext),
        "retained": len(df_ext_aligned.columns),
    },
    "aligned_common_features": len(common_features),
})

CLEAN_DATA = {
    "df_internal": df_int_aligned,
    "df_external": df_ext_aligned,
    "features_dropped_internal": features_to_drop_int,
    "features_dropped_external": features_to_drop_ext,
    "protected_features": PROTECTED_FEATURES,
    "common_features": common_features,
    "n_features": len(common_features) - 1,  # excluding target
}

print("\n💾 Stored: CLEAN_DATA (aligned datasets ready for train-test split)")
print("\n" + "="*100)
print("✅ STEP 6 COMPLETE — FEATURE CLEANING FINISHED")
print("="*100)

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 7: TRAIN-TEST SPLIT (STRATIFIED)
# TRIPOD: 10b (Data splitting), Prevention of data leakage
# ═══════════════════════════════════════════════════════════════════════════════

from sklearn.model_selection import train_test_split

print("\n" + "="*100)
print("STEP 7: TRAIN-TEST SPLIT (STRATIFIED)")
print("="*100)
print(f"UTC: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"User: zainzampawala786-sudo\n")

df_int = CLEAN_DATA["df_internal"].copy()
df_ext = CLEAN_DATA["df_external"].copy()
TARGET = CONFIG["target_col"]
TEST_SIZE = CONFIG["test_size"]
RANDOM_STATE = CONFIG["random_state"]

# ── 7.1 Split INTERNAL cohort (train + test)
print(f"📊 INTERNAL COHORT (Development Dataset):\n")
print(f"   Total samples: {len(df_int)}")
print(f"   Features: {len(df_int.columns) - 1} (excluding target)")
print(f"   Split ratio: {int((1-TEST_SIZE)*100)}% train / {int(TEST_SIZE*100)}% test")
print(f"   Random state: {RANDOM_STATE}")
print(f"   Stratification: {TARGET}\n")

# Separate features and target
X_int = df_int.drop(columns=[TARGET])
y_int = df_int[TARGET]

# Stratified split
X_train, X_test, y_train, y_test = train_test_split(
    X_int, y_int, 
    test_size=TEST_SIZE, 
    random_state=RANDOM_STATE, 
    stratify=y_int
)

# Calculate mortality rates
train_mortality = (y_train == 1).sum() / len(y_train) * 100
test_mortality = (y_test == 1).sum() / len(y_test) * 100
total_mortality = (y_int == 1).sum() / len(y_int) * 100

print(f"   ✅ TRAINING SET:")
print(f"      • Samples: {len(X_train)} ({len(X_train)/len(df_int)*100:.1f}%)")
print(f"      • Deaths: {(y_train==1).sum()} ({train_mortality:.1f}%)")
print(f"      • Survivors: {(y_train==0).sum()} ({100-train_mortality:.1f}%)")

print(f"\n   ✅ TESTING SET:")
print(f"      • Samples: {len(X_test)} ({len(X_test)/len(df_int)*100:.1f}%)")
print(f"      • Deaths: {(y_test==1).sum()} ({test_mortality:.1f}%)")
print(f"      • Survivors: {(y_test==0).sum()} ({100-test_mortality:.1f}%)")

print(f"\n   📈 Stratification Check:")
print(f"      • Overall mortality: {total_mortality:.1f}%")
print(f"      • Train mortality:   {train_mortality:.1f}% (Δ = {abs(train_mortality - total_mortality):.2f}%)")
print(f"      • Test mortality:    {test_mortality:.1f}% (Δ = {abs(test_mortality - total_mortality):.2f}%)")

if abs(train_mortality - total_mortality) < 2 and abs(test_mortality - total_mortality) < 2:
    print(f"      ✅ Stratification SUCCESSFUL (differences < 2%)")
else:
    print(f"      ⚠️  Stratification may need adjustment")

# ── 7.2 EXTERNAL cohort (entire set used for external validation)
print(f"\n{'='*100}")
print(f"📊 EXTERNAL COHORT (External Validation Dataset):\n")
print(f"   Total samples: {len(df_ext)}")
print(f"   Features: {len(df_ext.columns) - 1} (excluding target)")
print(f"   Usage: Full dataset for external validation (no split)")

X_external = df_ext.drop(columns=[TARGET])
y_external = df_ext[TARGET]

ext_mortality = (y_external == 1).sum() / len(y_external) * 100

print(f"\n   ✅ EXTERNAL VALIDATION SET:")
print(f"      • Samples: {len(X_external)}")
print(f"      • Deaths: {(y_external==1).sum()} ({ext_mortality:.1f}%)")
print(f"      • Survivors: {(y_external==0).sum()} ({100-ext_mortality:.1f}%)")

# ── 7.3 Feature alignment check
print(f"\n{'='*100}")
print(f"🔗 Feature Alignment Check:\n")

train_features = set(X_train.columns)
test_features = set(X_test.columns)
ext_features = set(X_external.columns)

print(f"   • Training features:  {len(train_features)}")
print(f"   • Testing features:   {len(test_features)}")
print(f"   • External features:  {len(ext_features)}")

if train_features == test_features == ext_features:
    print(f"   ✅ All datasets have identical features")
else:
    print(f"   ⚠️  Feature mismatch detected!")
    if train_features != test_features:
        print(f"      Train vs Test mismatch: {train_features.symmetric_difference(test_features)}")
    if train_features != ext_features:
        print(f"      Train vs External mismatch: {train_features.symmetric_difference(ext_features)}")

# ── 7.4 Missingness comparison
print(f"\n📉 Missingness Comparison (BEFORE Imputation):\n")

train_missing = (X_train.isna().sum().sum() / (X_train.shape[0] * X_train.shape[1]) * 100)
test_missing = (X_test.isna().sum().sum() / (X_test.shape[0] * X_test.shape[1]) * 100)
ext_missing = (X_external.isna().sum().sum() / (X_external.shape[0] * X_external.shape[1]) * 100)

print(f"   • Training set:    {train_missing:.2f}% missing")
print(f"   • Testing set:     {test_missing:.2f}% missing")
print(f"   • External set:    {ext_missing:.2f}% missing")

# ── 7.5 Save split data
save_pickle(X_train, 'step7_X_train_raw')
save_pickle(X_test, 'step7_X_test_raw')
save_pickle(y_train, 'step7_y_train')
save_pickle(y_test, 'step7_y_test')
save_pickle(X_external, 'step7_X_external_raw')
save_pickle(y_external, 'step7_y_external')

print(f"\n💾 Saved RAW (pre-imputation) datasets:")
print(f"   • X_train_raw.pkl, y_train.pkl")
print(f"   • X_test_raw.pkl, y_test.pkl")
print(f"   • X_external_raw.pkl, y_external.pkl")

# ── 7.6 Log
append_runlog("7", {
    "analysis": "Train-test split with stratification",
    "test_size": TEST_SIZE,
    "random_state": RANDOM_STATE,
    "internal_train": {
        "n": len(X_train),
        "deaths": int((y_train==1).sum()),
        "mortality_pct": round(train_mortality, 1),
    },
    "internal_test": {
        "n": len(X_test),
        "deaths": int((y_test==1).sum()),
        "mortality_pct": round(test_mortality, 1),
    },
    "external_validation": {
        "n": len(X_external),
        "deaths": int((y_external==1).sum()),
        "mortality_pct": round(ext_mortality, 1),
    },
    "features": len(X_train.columns),
})

SPLIT_DATA = {
    "X_train": X_train,
    "X_test": X_test,
    "y_train": y_train,
    "y_test": y_test,
    "X_external": X_external,
    "y_external": y_external,
    "feature_names": list(X_train.columns),
    "n_features": len(X_train.columns),
}

print("\n💾 Stored: SPLIT_DATA (raw splits, ready for imputation)")
print("\n" + "="*100)
print("⚠️  CRITICAL: All subsequent analysis (correlation, imputation, scaling)")
print("   will use ONLY training data to prevent data leakage!")
print("="*100)
print("\n✅ STEP 7 COMPLETE — DATA SPLIT SUCCESSFUL")
print("="*100)

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 8: CORRELATION MATRIX & VIF ANALYSIS (TRAINING DATA ONLY)
# TRIPOD: 10a (Predictor relationships), Multicollinearity assessment
# ═══════════════════════════════════════════════════════════════════════════════

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant

print("\n" + "="*100)
print("STEP 8: CORRELATION MATRIX & VIF ANALYSIS (TRAINING DATA ONLY)")
print("="*100)
print(f"UTC: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"User: zainzampawala786-sudo\n")

X_train = SPLIT_DATA["X_train"].copy()
COLORS = DISTRIBUTION_DATA["colors_enhanced"]

print(f"⚠️  IMPORTANT: Using ONLY training data (n={len(X_train)}) to prevent data leakage")
print(f"   Correlation calculated with pairwise complete observations (before imputation)\n")

# ── 8.1 Select continuous features for correlation
continuous_features = X_train.select_dtypes(include=[np.number]).columns.tolist()

# Remove binary/categorical (<=10 unique values)
continuous_only = []
for col in continuous_features:
    if X_train[col].nunique() > 10:
        continuous_only.append(col)

print(f"📊 Feature Selection for Correlation Analysis:")
print(f"   • Total features in training: {len(X_train.columns)}")
print(f"   • Continuous features (>10 unique values): {len(continuous_only)}")
print(f"   • Binary/categorical features (excluded): {len(continuous_features) - len(continuous_only)}\n")

# ── 8.2 Calculate correlation matrix (pairwise complete)
X_train_cont = X_train[continuous_only]
corr_matrix = X_train_cont.corr(method='pearson')

print(f"✅ Correlation matrix calculated ({len(continuous_only)} × {len(continuous_only)})")
print(f"   Method: Pearson correlation with pairwise complete observations\n")

# ── 8.3 Find high correlations
high_corr_threshold = 0.8
high_corr_pairs = []

for i in range(len(corr_matrix.columns)):
    for j in range(i+1, len(corr_matrix.columns)):
        if abs(corr_matrix.iloc[i, j]) > high_corr_threshold:
            high_corr_pairs.append({
                'Feature_1': corr_matrix.columns[i],
                'Feature_2': corr_matrix.columns[j],
                'Correlation': corr_matrix.iloc[i, j]
            })

high_corr_df = pd.DataFrame(high_corr_pairs).sort_values('Correlation', 
                                                           key=abs, 
                                                           ascending=False)

print(f"🔍 High Correlation Pairs (|r| > {high_corr_threshold}):")
if len(high_corr_df) > 0:
    print(f"   Found {len(high_corr_df)} pairs:\n")
    for idx, row in high_corr_df.head(15).iterrows():
        print(f"   • {row['Feature_1']:25s} ↔ {row['Feature_2']:25s}  r = {row['Correlation']:6.3f}")
    
    save_csv(high_corr_df, 'step8_high_correlation_pairs')
    print(f"\n   ✅ Full list saved: step8_high_correlation_pairs.csv")
else:
    print(f"   ✅ No highly correlated pairs found (good!)")

# ── 8.4 FIGURE 5: Correlation Heatmap (top 30 features by variance)
print(f"\n{'='*100}")
print(f"📊 Generating correlation heatmap...\n")

# Select top 30 features by variance (most informative)
feature_variance = X_train_cont.var().sort_values(ascending=False)
top_features = feature_variance.head(30).index.tolist()

corr_subset = corr_matrix.loc[top_features, top_features]

# Create figure
fig, ax = plt.subplots(figsize=(16, 14), dpi=300)

# Custom colormap (diverging: blue-white-red)
mask = np.triu(np.ones_like(corr_subset, dtype=bool), k=1)

sns.heatmap(corr_subset, mask=mask, annot=False, 
            cmap='RdBu_r', center=0, vmin=-1, vmax=1,
            square=True, linewidths=0.5, 
            cbar_kws={'label': 'Pearson Correlation Coefficient', 
                      'shrink': 0.8, 'aspect': 30},
            ax=ax)

# Styling
ax.set_title('Figure 5. Correlation Matrix of Top 30 Features (by Variance)\nTraining Set (n=380, Pairwise Complete Observations)', 
             fontsize=15, fontweight='bold', pad=20)
ax.set_xlabel('Features', fontsize=12, fontweight='bold')
ax.set_ylabel('Features', fontsize=12, fontweight='bold')

# Rotate labels
plt.setp(ax.get_xticklabels(), rotation=45, ha='right', fontsize=9)
plt.setp(ax.get_yticklabels(), rotation=0, fontsize=9)

# Add note
ax.text(0.02, -0.08, 
        f'Note: Only lower triangle shown. High correlation threshold: |r| > {high_corr_threshold}',
        transform=ax.transAxes, fontsize=9, style='italic', color='gray')

plt.tight_layout()
save_figure(fig, 'step8_fig5_correlation_matrix_top30')
plt.show()

print(f"✅ Figure 5 saved: Correlation heatmap (top 30 features)")

# ── 8.5 VIF Analysis (Variance Inflation Factor)
print(f"\n{'='*100}")
print(f"📊 Calculating VIF (Variance Inflation Factor)...\n")

# For VIF, we need complete cases (drop rows with any missing)
X_train_complete = X_train_cont.dropna()

if len(X_train_complete) < 50:
    print(f"   ⚠️  Insufficient complete cases ({len(X_train_complete)}) for reliable VIF")
    print(f"      VIF calculation skipped (will recalculate after imputation)")
    vif_results = None
else:
    print(f"   Using {len(X_train_complete)} complete cases (out of {len(X_train)})")
    
    vif_data = []
    
    # Calculate VIF for each feature
    print(f"   Calculating VIF for {len(continuous_only)} features...")
    
    for i, col in enumerate(continuous_only):
        try:
            if col in X_train_complete.columns:
                X_vif = X_train_complete[[c for c in continuous_only if c != col and c in X_train_complete.columns]]
                if len(X_vif.columns) > 0 and X_train_complete[col].std() > 0:
                    vif = variance_inflation_factor(add_constant(X_vif).values, 
                                                    list(add_constant(X_vif).columns).index(col) 
                                                    if col in add_constant(X_vif).columns else 0)
                    vif_data.append({'Feature': col, 'VIF': vif})
        except:
            vif_data.append({'Feature': col, 'VIF': np.nan})
        
        if (i+1) % 10 == 0:
            print(f"      Progress: {i+1}/{len(continuous_only)} features")
    
    vif_df = pd.DataFrame(vif_data).sort_values('VIF', ascending=False)
    
    # VIF interpretation
    print(f"\n   📈 VIF Results (Top 20 highest):\n")
    print(f"      VIF < 5:   No multicollinearity")
    print(f"      VIF 5-10:  Moderate multicollinearity")
    print(f"      VIF > 10:  High multicollinearity (consider removal)\n")
    
    for idx, row in vif_df.head(20).iterrows():
        vif_val = row['VIF']
        if pd.notna(vif_val):
            if vif_val > 10:
                status = "⚠️  HIGH"
                color_marker = "🔴"
            elif vif_val > 5:
                status = "⚠️  MODERATE"
                color_marker = "🟡"
            else:
                status = "✅ OK"
                color_marker = "🟢"
            print(f"      {color_marker} {row['Feature']:30s}  VIF = {vif_val:8.2f}  {status}")
        else:
            print(f"         {row['Feature']:30s}  VIF = N/A")
    
    high_vif = vif_df[vif_df['VIF'] > 10]
    print(f"\n   📊 Features with VIF > 10 (high multicollinearity): {len(high_vif)}")
    
    save_csv(vif_df, 'step8_vif_scores')
    vif_results = vif_df

# ── 8.6 Log
append_runlog("8", {
    "analysis": "Correlation matrix and VIF on training data (before imputation)",
    "method": "Pearson correlation, pairwise complete observations",
    "training_samples": len(X_train),
    "continuous_features": len(continuous_only),
    "high_correlation_pairs": len(high_corr_df) if len(high_corr_df) > 0 else 0,
    "high_vif_features": len(high_vif) if vif_results is not None and len(high_vif) > 0 else 0,
})

CORRELATION_DATA = {
    "corr_matrix": corr_matrix,
    "high_corr_pairs": high_corr_df,
    "vif_scores": vif_results,
    "continuous_features": continuous_only,
}

print("\n💾 Stored: CORRELATION_DATA")
print("\n" + "="*100)
print("✅ STEP 8 COMPLETE — CORRELATION & VIF ANALYSIS FINISHED")
print("="*100)

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 9: IMPUTATION METHOD COMPARISON & AUTOMATIC SELECTION
# TRIPOD: 5c (Handling of missing data - imputation method selection)
# ═══════════════════════════════════════════════════════════════════════════════

from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.model_selection import cross_val_score, StratifiedKFold
from xgboost import XGBClassifier
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time

print("\n" + "="*100)
print("STEP 9: IMPUTATION METHOD COMPARISON & AUTOMATIC SELECTION")
print("="*100)
print(f"UTC: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"User: zainzampawala786-sudo\n")

X_train = SPLIT_DATA["X_train"].copy()
X_test = SPLIT_DATA["X_test"].copy()
X_external = SPLIT_DATA["X_external"].copy()
y_train = SPLIT_DATA["y_train"].copy()

COLORS = DISTRIBUTION_DATA["colors_enhanced"]

print(f"⚠️  CRITICAL: All imputation methods fit ONLY on training data")
print(f"   Evaluation: 5-fold stratified cross-validation with XGBoost\n")

# ── 9.1 Separate continuous and categorical features
continuous_features = []
categorical_features = []

for col in X_train.columns:
    if X_train[col].nunique() > 10:
        continuous_features.append(col)
    else:
        categorical_features.append(col)

print(f"📊 Feature Classification:")
print(f"   • Continuous features: {len(continuous_features)}")
print(f"   • Categorical features: {len(categorical_features)}")
print(f"   • Total: {len(X_train.columns)}\n")

# ── 9.2 Define imputation methods
imputation_methods = {
    'Median/Mode': {
        'continuous': SimpleImputer(strategy='median'),
        'categorical': SimpleImputer(strategy='most_frequent'),
        'description': 'Simple univariate imputation (median for continuous, mode for categorical)'
    },
    'Mean/Mode': {
        'continuous': SimpleImputer(strategy='mean'),
        'categorical': SimpleImputer(strategy='most_frequent'),
        'description': 'Simple univariate imputation (mean for continuous, mode for categorical)'
    },
    'KNN (k=5)': {
        'continuous': KNNImputer(n_neighbors=5, weights='distance'),
        'categorical': SimpleImputer(strategy='most_frequent'),  # KNN for continuous only
        'description': 'K-Nearest Neighbors imputation (k=5) for continuous, mode for categorical'
    },
    'Iterative (MICE)': {
        'continuous': IterativeImputer(max_iter=10, random_state=42),
        'categorical': SimpleImputer(strategy='most_frequent'),  # MICE for continuous only
        'description': 'Multivariate iterative imputation (MICE) for continuous, mode for categorical'
    },
}

print(f"🔬 Testing {len(imputation_methods)} Imputation Methods:\n")
for i, (name, config) in enumerate(imputation_methods.items(), 1):
    print(f"   {i}. {name:20s} - {config['description']}")

# ── 9.3 Evaluation function
def evaluate_imputation(X_train, y_train, imputer_cont, imputer_cat, cont_features, cat_features):
    """Impute data and evaluate with cross-validation"""
    
    # Impute continuous
    if len(cont_features) > 0:
        X_train_cont = pd.DataFrame(
            imputer_cont.fit_transform(X_train[cont_features]),
            columns=cont_features,
            index=X_train.index
        )
    else:
        X_train_cont = pd.DataFrame(index=X_train.index)
    
    # Impute categorical
    if len(cat_features) > 0:
        X_train_cat = pd.DataFrame(
            imputer_cat.fit_transform(X_train[cat_features]),
            columns=cat_features,
            index=X_train.index
        )
    else:
        X_train_cat = pd.DataFrame(index=X_train.index)
    
    # Combine
    X_train_imputed = pd.concat([X_train_cont, X_train_cat], axis=1)
    X_train_imputed = X_train_imputed[X_train.columns]  # maintain order
    
    # Cross-validation with XGBoost
    model = XGBClassifier(
        n_estimators=100,
        max_depth=3,
        learning_rate=0.1,
        random_state=42,
        eval_metric='logloss',
        use_label_encoder=False
    )
    
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(model, X_train_imputed, y_train, 
                             cv=cv, scoring='roc_auc', n_jobs=-1)
    
    return scores, X_train_imputed

# ── 9.4 Compare imputation methods
print(f"\n{'='*100}")
print(f"🔬 EVALUATING IMPUTATION METHODS (5-Fold CV with XGBoost)...\n")

results = []

for method_name, config in imputation_methods.items():
    print(f"   Testing: {method_name}...")
    
    start_time = time.time()
    
    try:
        scores, X_imputed = evaluate_imputation(
            X_train, y_train,
            config['continuous'],
            config['categorical'],
            continuous_features,
            categorical_features
        )
        
        elapsed_time = time.time() - start_time
        
        results.append({
            'Method': method_name,
            'Mean_AUC': scores.mean(),
            'Std_AUC': scores.std(),
            'Min_AUC': scores.min(),
            'Max_AUC': scores.max(),
            'Time_seconds': elapsed_time,
            'Scores': scores,
            'Success': True
        })
        
        print(f"      ✅ AUC: {scores.mean():.4f} ± {scores.std():.4f} (time: {elapsed_time:.1f}s)")
        
    except Exception as e:
        print(f"      ❌ FAILED: {str(e)}")
        results.append({
            'Method': method_name,
            'Mean_AUC': 0,
            'Std_AUC': 0,
            'Min_AUC': 0,
            'Max_AUC': 0,
            'Time_seconds': 0,
            'Scores': [],
            'Success': False
        })

results_df = pd.DataFrame(results)
results_df = results_df[results_df['Success'] == True].sort_values('Mean_AUC', ascending=False)

# ── 9.5 Display results
print(f"\n{'='*100}")
print(f"📊 IMPUTATION METHOD COMPARISON RESULTS:\n")

display_df = results_df[['Method', 'Mean_AUC', 'Std_AUC', 'Time_seconds']].copy()
display_df['Rank'] = range(1, len(display_df) + 1)
display_df = display_df[['Rank', 'Method', 'Mean_AUC', 'Std_AUC', 'Time_seconds']]

print(display_df.to_string(index=False))

save_csv(results_df[['Method', 'Mean_AUC', 'Std_AUC', 'Min_AUC', 'Max_AUC', 'Time_seconds']], 
         'step9_imputation_comparison')

# ── 9.6 FIGURE: Comparison plot
print(f"\n{'='*100}")
print(f"📊 Generating comparison visualization...\n")

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6), dpi=300)

# Plot 1: AUC comparison with error bars
methods = results_df['Method'].values
mean_aucs = results_df['Mean_AUC'].values
std_aucs = results_df['Std_AUC'].values

colors_list = [COLORS['survived'], COLORS['died'], COLORS['sig'], COLORS['primary']][:len(methods)]

bars = ax1.barh(methods, mean_aucs, xerr=std_aucs, 
                color=colors_list, alpha=0.8, 
                edgecolor=COLORS['primary'], linewidth=2,
                error_kw={'linewidth': 2, 'ecolor': COLORS['primary']})

# Highlight best method
best_idx = results_df['Mean_AUC'].idxmax()
bars[0].set_edgecolor(COLORS['sig'])
bars[0].set_linewidth(3)

ax1.set_xlabel('Cross-Validated AUC (5-Fold)', fontsize=12, fontweight='bold')
ax1.set_ylabel('Imputation Method', fontsize=12, fontweight='bold')
ax1.set_title('A. Imputation Method Performance Comparison', 
              fontsize=13, fontweight='bold', loc='left')
ax1.grid(axis='x', alpha=0.3, linestyle='--')
ax1.set_xlim(0, 1)

# Add value labels
for i, (method, auc, std) in enumerate(zip(methods, mean_aucs, std_aucs)):
    ax1.text(auc + 0.02, i, f'{auc:.4f} ± {std:.4f}', 
            va='center', fontsize=10, fontweight='bold',
            color=COLORS['primary'])

# Plot 2: Box plot of CV scores
cv_scores_data = []
for idx, row in results_df.iterrows():
    for score in row['Scores']:
        cv_scores_data.append({
            'Method': row['Method'],
            'AUC': score
        })

cv_df = pd.DataFrame(cv_scores_data)

bp = ax2.boxplot([results_df.iloc[i]['Scores'] for i in range(len(results_df))],
                  labels=methods,
                  patch_artist=True,
                  widths=0.6,
                  showfliers=True,
                  boxprops=dict(facecolor=COLORS['survived'], alpha=0.7, 
                               edgecolor=COLORS['primary'], linewidth=2),
                  whiskerprops=dict(color=COLORS['primary'], linewidth=1.5),
                  capprops=dict(color=COLORS['primary'], linewidth=1.5),
                  medianprops=dict(color=COLORS['secondary'], linewidth=2.5),
                  flierprops=dict(marker='o', markerfacecolor=COLORS['died'], 
                                 markersize=6, alpha=0.6))

ax2.set_ylabel('Cross-Validation AUC', fontsize=12, fontweight='bold')
ax2.set_xlabel('Imputation Method', fontsize=12, fontweight='bold')
ax2.set_title('B. Cross-Validation Score Distribution', 
              fontsize=13, fontweight='bold', loc='left')
ax2.grid(axis='y', alpha=0.3, linestyle='--')
ax2.set_ylim(0.5, 1.0)
plt.setp(ax2.get_xticklabels(), rotation=15, ha='right')

fig.suptitle('Figure 6. Imputation Method Performance Comparison (5-Fold CV, XGBoost)', 
             fontsize=15, fontweight='bold', y=0.98)

plt.tight_layout(rect=[0, 0, 1, 0.96])
save_figure(fig, 'step9_fig6_imputation_comparison')
plt.show()

print(f"✅ Figure 6 saved: Imputation method comparison")

# ── 9.7 Select BEST method and apply
best_method_name = results_df.iloc[0]['Method']
best_auc = results_df.iloc[0]['Mean_AUC']
best_std = results_df.iloc[0]['Std_AUC']

print(f"\n{'='*100}")
print(f"🏆 BEST IMPUTATION METHOD SELECTED: {best_method_name}")
print(f"   • Cross-Validated AUC: {best_auc:.4f} ± {best_std:.4f}")
print(f"   • Improvement over worst: +{(best_auc - results_df['Mean_AUC'].min()):.4f}\n")

# Get best imputers
best_config = imputation_methods[best_method_name]
best_imputer_cont = best_config['continuous']
best_imputer_cat = best_config['categorical']

print(f"🔧 Applying {best_method_name} to all datasets...\n")

# ── 9.8 Apply BEST imputation to all datasets
# CONTINUOUS
if len(continuous_features) > 0:
    # Fit on training
    best_imputer_cont.fit(X_train[continuous_features])
    
    # Transform all
    X_train_cont_imputed = pd.DataFrame(
        best_imputer_cont.transform(X_train[continuous_features]),
        columns=continuous_features,
        index=X_train.index
    )
    
    X_test_cont_imputed = pd.DataFrame(
        best_imputer_cont.transform(X_test[continuous_features]),
        columns=continuous_features,
        index=X_test.index
    )
    
    X_external_cont_imputed = pd.DataFrame(
        best_imputer_cont.transform(X_external[continuous_features]),
        columns=continuous_features,
        index=X_external.index
    )
    
    print(f"   ✅ Continuous features imputed ({len(continuous_features)} features)")
else:
    X_train_cont_imputed = pd.DataFrame(index=X_train.index)
    X_test_cont_imputed = pd.DataFrame(index=X_test.index)
    X_external_cont_imputed = pd.DataFrame(index=X_external.index)

# CATEGORICAL
if len(categorical_features) > 0:
    # Fit on training
    best_imputer_cat.fit(X_train[categorical_features])
    
    # Transform all
    X_train_cat_imputed = pd.DataFrame(
        best_imputer_cat.transform(X_train[categorical_features]),
        columns=categorical_features,
        index=X_train.index
    )
    
    X_test_cat_imputed = pd.DataFrame(
        best_imputer_cat.transform(X_test[categorical_features]),
        columns=categorical_features,
        index=X_test.index
    )
    
    X_external_cat_imputed = pd.DataFrame(
        best_imputer_cat.transform(X_external[categorical_features]),
        columns=categorical_features,
        index=X_external.index
    )
    
    print(f"   ✅ Categorical features imputed ({len(categorical_features)} features)")
else:
    X_train_cat_imputed = pd.DataFrame(index=X_train.index)
    X_test_cat_imputed = pd.DataFrame(index=X_test.index)
    X_external_cat_imputed = pd.DataFrame(index=X_external.index)

# COMBINE
X_train_imputed = pd.concat([X_train_cont_imputed, X_train_cat_imputed], axis=1)
X_test_imputed = pd.concat([X_test_cont_imputed, X_test_cat_imputed], axis=1)
X_external_imputed = pd.concat([X_external_cont_imputed, X_external_cat_imputed], axis=1)

# Reorder to match original
original_order = X_train.columns.tolist()
X_train_imputed = X_train_imputed[original_order]
X_test_imputed = X_test_imputed[original_order]
X_external_imputed = X_external_imputed[original_order]

print(f"   ✅ Datasets combined and reordered")

# ── 9.9 Verify no missing values
miss_train = X_train_imputed.isna().sum().sum()
miss_test = X_test_imputed.isna().sum().sum()
miss_ext = X_external_imputed.isna().sum().sum()

print(f"\n📉 Final Missingness Check:")
print(f"   • Training:  {miss_train} missing values")
print(f"   • Testing:   {miss_test} missing values")
print(f"   • External:  {miss_ext} missing values")

if miss_train == 0 and miss_test == 0 and miss_ext == 0:
    print(f"\n   ✅ SUCCESS: All datasets fully imputed!")
else:
    print(f"\n   ⚠️  WARNING: Some missing values remain")

# ── 9.10 Save everything
save_pickle(X_train_imputed, 'step9_X_train_imputed')
save_pickle(X_test_imputed, 'step9_X_test_imputed')
save_pickle(X_external_imputed, 'step9_X_external_imputed')
save_pickle(best_imputer_cont, 'step9_best_imputer_continuous')
save_pickle(best_imputer_cat, 'step9_best_imputer_categorical')

print(f"\n💾 Saved imputed datasets and best imputers:")
print(f"   • X_train_imputed.pkl, X_test_imputed.pkl, X_external_imputed.pkl")
print(f"   • best_imputer_continuous.pkl, best_imputer_categorical.pkl")

# ── 9.11 Log
append_runlog("9", {
    "analysis": "Imputation method comparison and automatic selection",
    "methods_tested": len(imputation_methods),
    "best_method": best_method_name,
    "best_cv_auc": round(best_auc, 4),
    "best_cv_std": round(best_std, 4),
    "continuous_features": len(continuous_features),
    "categorical_features": len(categorical_features),
    "evaluation": "5-fold stratified CV with XGBoost",
})

IMPUTED_DATA = {
    "X_train_imputed": X_train_imputed,
    "X_test_imputed": X_test_imputed,
    "X_external_imputed": X_external_imputed,
    "y_train": SPLIT_DATA["y_train"],
    "y_test": SPLIT_DATA["y_test"],
    "y_external": SPLIT_DATA["y_external"],
    "best_imputer_continuous": best_imputer_cont,
    "best_imputer_categorical": best_imputer_cat,
    "best_method_name": best_method_name,
    "imputation_comparison": results_df,
    "continuous_features": continuous_features,
    "categorical_features": categorical_features,
}

print("\n💾 Stored: IMPUTED_DATA (best method automatically applied)")
print("\n" + "="*100)
print("✅ STEP 9 COMPLETE — BEST IMPUTATION METHOD APPLIED")
print("="*100)

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 10: VIF RECALCULATION & REDUNDANT FEATURE REMOVAL (IMPUTED DATA)
# TRIPOD: 10a (Predictor relationships after imputation)
# ═══════════════════════════════════════════════════════════════════════════════

from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

print("\n" + "="*100)
print("STEP 10: VIF RECALCULATION & REDUNDANT FEATURE REMOVAL")
print("="*100)
print(f"UTC: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"User: zainzampawala786-sudo\n")

X_train_imputed = IMPUTED_DATA["X_train_imputed"].copy()
X_test_imputed = IMPUTED_DATA["X_test_imputed"].copy()
X_external_imputed = IMPUTED_DATA["X_external_imputed"].copy()
continuous_features = IMPUTED_DATA["continuous_features"]
COLORS = DISTRIBUTION_DATA["colors_enhanced"]

print(f"✅ Using IMPUTED training data (n={len(X_train_imputed)})")
print(f"   • No missing values: {X_train_imputed.isna().sum().sum()} missing")
print(f"   • Continuous features: {len(continuous_features)}\n")

# ── 10.1 Calculate VIF for all continuous features
print(f"{'='*100}")
print(f"🔬 CALCULATING VIF (Variance Inflation Factor)...\n")
print(f"   Computing VIF for {len(continuous_features)} continuous features...")

vif_data = []

X_cont = X_train_imputed[continuous_features]

for i, col in enumerate(continuous_features):
    try:
        # Get all other features
        X_others = X_cont.drop(columns=[col])
        
        # Add constant and calculate VIF
        X_with_const = add_constant(X_others)
        
        # Find the index of the current feature in the original data
        vif = variance_inflation_factor(add_constant(X_cont).values, 
                                        list(add_constant(X_cont).columns).index(col))
        
        vif_data.append({
            'Feature': col,
            'VIF': vif
        })
    except Exception as e:
        vif_data.append({
            'Feature': col,
            'VIF': np.nan
        })
    
    if (i + 1) % 10 == 0:
        print(f"      Progress: {i+1}/{len(continuous_features)} features")

print(f"\n   ✅ VIF calculation complete\n")

vif_df = pd.DataFrame(vif_data).sort_values('VIF', ascending=False)

# ── 10.2 Display VIF results
print(f"{'='*100}")
print(f"📊 VIF RESULTS (Imputed Training Data):\n")
print(f"   Interpretation:")
print(f"      VIF < 5:    ✅ No multicollinearity")
print(f"      VIF 5-10:   🟡 Moderate multicollinearity")
print(f"      VIF > 10:   🔴 High multicollinearity (consider removal)\n")

print(f"   Top 20 Features by VIF:\n")

for idx, row in vif_df.head(20).iterrows():
    vif_val = row['VIF']
    feat = row['Feature']
    
    if pd.notna(vif_val):
        if vif_val > 10:
            marker = "🔴"
            status = "HIGH"
        elif vif_val > 5:
            marker = "🟡"
            status = "MODERATE"
        else:
            marker = "🟢"
            status = "OK"
        
        print(f"      {marker} {feat:35s}  VIF = {vif_val:8.2f}  ({status})")
    else:
        print(f"         {feat:35s}  VIF = N/A")

# Count problematic features
high_vif = vif_df[vif_df['VIF'] > 10]
moderate_vif = vif_df[(vif_df['VIF'] > 5) & (vif_df['VIF'] <= 10)]

print(f"\n   📊 VIF Summary:")
print(f"      • High VIF (>10):      {len(high_vif)} features")
print(f"      • Moderate VIF (5-10): {len(moderate_vif)} features")
print(f"      • Low VIF (<5):        {len(vif_df) - len(high_vif) - len(moderate_vif)} features")

save_csv(vif_df, 'step10_vif_recalculated_imputed')

# ── 10.3 Review high-correlation pairs from Step 8
print(f"\n{'='*100}")
print(f"🔍 REVIEWING HIGH-CORRELATION PAIRS (from Step 8):\n")

high_corr_pairs = CORRELATION_DATA["high_corr_pairs"]

print(f"   Found {len(high_corr_pairs)} pairs with |r| > 0.8:\n")

for idx, row in high_corr_pairs.iterrows():
    feat1 = row['Feature_1']
    feat2 = row['Feature_2']
    corr = row['Correlation']
    
    # Get VIF for both
    vif1 = vif_df[vif_df['Feature'] == feat1]['VIF'].values[0] if feat1 in vif_df['Feature'].values else np.nan
    vif2 = vif_df[vif_df['Feature'] == feat2]['VIF'].values[0] if feat2 in vif_df['Feature'].values else np.nan
    
    print(f"   {idx+1}. {feat1:30s} ↔ {feat2:30s}")
    print(f"      Correlation: r = {corr:6.3f}")
    print(f"      VIF: {feat1}: {vif1:6.2f}  |  {feat2}: {vif2:6.2f}\n")

# ── 10.4 Define features to drop (clinical + statistical logic)
print(f"{'='*100}")
print(f"🗑️  REDUNDANT FEATURES TO DROP (Clinical + Statistical Logic):\n")

FEATURES_TO_DROP = {
    'neutrophils_abs_max': 'Redundant with wbc_count_max (r=0.987). WBC more comprehensive.',
    'eosinophils_abs_min': 'Redundant with eosinophils_pct_min (r=0.932). Percentage normalized.',
    'eosinophils_abs_max': 'Redundant with eosinophils_pct_max (r=0.873). Percentage normalized.',
    'rbc_count_min': 'Redundant with hemoglobin_min (r=0.806). Hemoglobin more clinically actionable.',
}

print(f"   Dropping {len(FEATURES_TO_DROP)} redundant features:\n")

for i, (feat, reason) in enumerate(FEATURES_TO_DROP.items(), 1):
    vif = vif_df[vif_df['Feature'] == feat]['VIF'].values[0] if feat in vif_df['Feature'].values else np.nan
    print(f"   {i}. {feat:30s} (VIF: {vif:6.2f})")
    print(f"      → {reason}\n")

# Note: Keep ALT_min and AST_min (both clinically important, different enzymes)
print(f"   ✅ KEEPING: ALT_min & AST_min (r=0.863)")
print(f"      → Reason: Different enzymes (AST: cardiac/muscle, ALT: hepatic-specific)\n")

# ── 10.5 Drop features from all datasets
print(f"{'='*100}")
print(f"🔧 REMOVING REDUNDANT FEATURES FROM ALL DATASETS...\n")

features_to_drop_list = list(FEATURES_TO_DROP.keys())

# Check which features exist
existing_drops = [f for f in features_to_drop_list if f in X_train_imputed.columns]

print(f"   Features to drop: {len(features_to_drop_list)}")
print(f"   Features found in data: {len(existing_drops)}")

# Drop from all datasets
X_train_clean = X_train_imputed.drop(columns=existing_drops, errors='ignore')
X_test_clean = X_test_imputed.drop(columns=existing_drops, errors='ignore')
X_external_clean = X_external_imputed.drop(columns=existing_drops, errors='ignore')

print(f"\n   ✅ Features removed from all datasets")
print(f"\n   📊 Dataset Dimensions:")
print(f"      • Training:  {X_train_imputed.shape} → {X_train_clean.shape}")
print(f"      • Testing:   {X_test_imputed.shape} → {X_test_clean.shape}")
print(f"      • External:  {X_external_imputed.shape} → {X_external_clean.shape}")

n_features_remaining = X_train_clean.shape[1]
print(f"\n   🎯 Remaining features: {n_features_remaining} (from original {X_train_imputed.shape[1]})")

# ── 10.6 Recalculate VIF on cleaned data (optional verification)
print(f"\n{'='*100}")
print(f"🔬 VERIFICATION: Recalculating VIF on cleaned data...\n")

continuous_features_clean = [f for f in continuous_features if f not in existing_drops]

vif_data_clean = []
X_cont_clean = X_train_clean[continuous_features_clean]

for i, col in enumerate(continuous_features_clean):
    try:
        vif = variance_inflation_factor(add_constant(X_cont_clean).values, 
                                        list(add_constant(X_cont_clean).columns).index(col))
        vif_data_clean.append({'Feature': col, 'VIF': vif})
    except:
        vif_data_clean.append({'Feature': col, 'VIF': np.nan})
    
    if (i + 1) % 10 == 0:
        print(f"      Progress: {i+1}/{len(continuous_features_clean)} features")

vif_df_clean = pd.DataFrame(vif_data_clean).sort_values('VIF', ascending=False)

print(f"\n   📊 VIF Summary (After Removal):")
high_vif_clean = vif_df_clean[vif_df_clean['VIF'] > 10]
moderate_vif_clean = vif_df_clean[(vif_df_clean['VIF'] > 5) & (vif_df_clean['VIF'] <= 10)]

print(f"      • High VIF (>10):      {len(high_vif_clean)} features")
print(f"      • Moderate VIF (5-10): {len(moderate_vif_clean)} features")
print(f"      • Low VIF (<5):        {len(vif_df_clean) - len(high_vif_clean) - len(moderate_vif_clean)} features")

if len(high_vif_clean) > 0:
    print(f"\n      Remaining high VIF features:")
    for idx, row in high_vif_clean.head(5).iterrows():
        print(f"         🔴 {row['Feature']:35s}  VIF = {row['VIF']:8.2f}")

save_csv(vif_df_clean, 'step10_vif_after_removal')

# ── 10.7 FIGURE 7: VIF comparison (before vs after)
print(f"\n{'='*100}")
print(f"📊 Generating VIF comparison visualization...\n")

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 8), dpi=300)

# Plot 1: VIF distribution BEFORE removal
vif_before = vif_df['VIF'].dropna()
ax1.hist(vif_before, bins=30, color=COLORS['died'], alpha=0.7, 
         edgecolor=COLORS['primary'], linewidth=1.5)
ax1.axvline(x=10, color=COLORS['secondary'], linestyle='--', linewidth=2.5, 
           label='VIF = 10 (threshold)')
ax1.axvline(x=5, color=COLORS['sig'], linestyle='--', linewidth=2, 
           label='VIF = 5 (moderate)')
ax1.set_xlabel('VIF Score', fontsize=12, fontweight='bold')
ax1.set_ylabel('Frequency', fontsize=12, fontweight='bold')
ax1.set_title('A. VIF Distribution Before Feature Removal', 
             fontsize=13, fontweight='bold', loc='left')
ax1.legend(fontsize=10, frameon=True, fancybox=True)
ax1.grid(axis='y', alpha=0.3, linestyle='--')
ax1.text(0.98, 0.95, f'n = {len(vif_before)}\nHigh VIF (>10): {len(high_vif)}', 
        transform=ax1.transAxes, ha='right', va='top', fontsize=10,
        bbox=dict(boxstyle='round', facecolor='white', edgecolor=COLORS['primary'], alpha=0.8))

# Plot 2: VIF distribution AFTER removal
vif_after = vif_df_clean['VIF'].dropna()
ax2.hist(vif_after, bins=30, color=COLORS['survived'], alpha=0.7, 
        edgecolor=COLORS['primary'], linewidth=1.5)
ax2.axvline(x=10, color=COLORS['secondary'], linestyle='--', linewidth=2.5, 
           label='VIF = 10 (threshold)')
ax2.axvline(x=5, color=COLORS['sig'], linestyle='--', linewidth=2, 
           label='VIF = 5 (moderate)')
ax2.set_xlabel('VIF Score', fontsize=12, fontweight='bold')
ax2.set_ylabel('Frequency', fontsize=12, fontweight='bold')
ax2.set_title('B. VIF Distribution After Feature Removal', 
             fontsize=13, fontweight='bold', loc='left')
ax2.legend(fontsize=10, frameon=True, fancybox=True)
ax2.grid(axis='y', alpha=0.3, linestyle='--')
ax2.text(0.98, 0.95, f'n = {len(vif_after)}\nHigh VIF (>10): {len(high_vif_clean)}', 
        transform=ax2.transAxes, ha='right', va='top', fontsize=10,
        bbox=dict(boxstyle='round', facecolor='white', edgecolor=COLORS['sig'], alpha=0.8))

fig.suptitle('Figure 7. VIF Distribution Before and After Redundant Feature Removal\nTraining Set (Imputed Data)', 
            fontsize=15, fontweight='bold', y=0.98)

plt.tight_layout(rect=[0, 0, 1, 0.96])
save_figure(fig, 'step10_fig7_vif_comparison')
plt.show()

print(f"✅ Figure 7 saved: VIF comparison")

# ── 10.8 Save cleaned datasets
save_pickle(X_train_clean, 'step10_X_train_clean')
save_pickle(X_test_clean, 'step10_X_test_clean')
save_pickle(X_external_clean, 'step10_X_external_clean')

# Save dropped features report
dropped_report = pd.DataFrame({
    'Feature': list(FEATURES_TO_DROP.keys()),
    'Reason': list(FEATURES_TO_DROP.values()),
    'VIF_Before_Removal': [vif_df[vif_df['Feature']==f]['VIF'].values[0] 
                           if f in vif_df['Feature'].values else np.nan 
                           for f in FEATURES_TO_DROP.keys()]
})
save_csv(dropped_report, 'step10_dropped_features_report')

# ── 10.9 Log
append_runlog("10", {
    "analysis": "VIF recalculation and redundant feature removal",
    "vif_before_removal": {
        "high_vif_gt10": len(high_vif),
        "moderate_vif_5to10": len(moderate_vif),
    },
    "features_dropped": len(existing_drops),
    "features_remaining": n_features_remaining,
    "vif_after_removal": {
        "high_vif_gt10": len(high_vif_clean),
        "moderate_vif_5to10": len(moderate_vif_clean),
    },
})

CLEAN_FEATURE_DATA = {
    "X_train_clean": X_train_clean,
    "X_test_clean": X_test_clean,
    "X_external_clean": X_external_clean,
    "y_train": IMPUTED_DATA["y_train"],
    "y_test": IMPUTED_DATA["y_test"],
    "y_external": IMPUTED_DATA["y_external"],
    "features_dropped": list(FEATURES_TO_DROP.keys()),
    "n_features_remaining": n_features_remaining,
    "vif_before": vif_df,
    "vif_after": vif_df_clean,
}

print("\n💾 Stored: CLEAN_FEATURE_DATA (multicollinearity resolved)")
print("\n" + "="*100)
print("✅ STEP 10 COMPLETE — REDUNDANT FEATURES REMOVED, VIF IMPROVED")
print("="*100)

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 11: ANOVA FILTER + OPTIMAL K SELECTION (NESTED CROSS-VALIDATION)
# TRIPOD: 10b (Feature selection - filter method with nested CV)
# Current Date and Time (UTC): 2025-10-19 17:37:09
# Current User's Login: zainzampawala786-sudo
# ═══════════════════════════════════════════════════════════════════════════════

from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("\n" + "="*100)
print("STEP 11: ANOVA FILTER + OPTIMAL K SELECTION (NESTED CROSS-VALIDATION)")
print("="*100)
print(f"UTC: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"User: zainzampawala786-sudo")
print(f"Method: ANOVA F-test with nested CV to determine optimal K")
print(f"TRIPOD Type 2b: Feature selection on training data only\n")

# ═══════════════════════════════════════════════════════════════════════════════
# 11.1 DATA PREPARATION
# ═══════════════════════════════════════════════════════════════════════════════

X_train = CLEAN_FEATURE_DATA["X_train_clean"].copy()
y_train = CLEAN_FEATURE_DATA["y_train"].copy()
X_test = CLEAN_FEATURE_DATA["X_test_clean"].copy()
y_test = CLEAN_FEATURE_DATA["y_test"].copy()
X_external = CLEAN_FEATURE_DATA["X_external_clean"].copy()
y_external = CLEAN_FEATURE_DATA["y_external"].copy()

all_features = X_train.columns.tolist()
n_events = (y_train == 1).sum()
n_samples = len(y_train)

print(f"📊 Dataset Summary:")
print(f"   Training:  {n_samples} samples, {n_events} events ({n_events/n_samples*100:.1f}%)")
print(f"   Testing:   {len(X_test)} samples (internal hold-out)")
print(f"   External:  {len(X_external)} samples, {(y_external==1).sum()} events")
print(f"   Features:  {len(all_features)} (after VIF removal)\n")

# ═══════════════════════════════════════════════════════════════════════════════
# 11.2 CROSS-VALIDATION STRATEGY
# ═══════════════════════════════════════════════════════════════════════════════

print(f"{'='*100}")
print(f"🔧 CROSS-VALIDATION CONFIGURATION:\n")

# 5-fold Stratified CV (maintains outcome prevalence)
cv_folds = 5
cv_strategy = StratifiedKFold(
    n_splits=cv_folds,
    shuffle=True,
    random_state=42
)

print(f"   Strategy: {cv_folds}-Fold Stratified Cross-Validation")
print(f"   Stratification: Maintains {(y_train==1).sum()/len(y_train)*100:.1f}% mortality rate in each fold")
print(f"   Shuffle: True (random_state=42 for reproducibility)\n")

# Verify stratification
print(f"   📊 Verifying stratification across folds:")
for fold_idx, (train_idx, val_idx) in enumerate(cv_strategy.split(X_train, y_train), 1):
    fold_mortality = (y_train.iloc[val_idx] == 1).sum() / len(val_idx) * 100
    print(f"      Fold {fold_idx}: {len(val_idx):3d} samples, mortality = {fold_mortality:.1f}%")

# ═══════════════════════════════════════════════════════════════════════════════
# 11.3 K-VALUE RANGE SELECTION (EPV-BASED)
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 K-VALUE RANGE SELECTION (EPV-Based):\n")

# K range based on EPV guidelines (Riley et al. 2019: minimum EPV=5)
k_values = [15, 20, 25, 30]

print(f"   Testing K ∈ {{{', '.join(map(str, k_values))}}}\n")
print(f"   EPV Analysis:")
print(f"   {'K':>5s}  {'EPV':>8s}  {'Status':>20s}  {'Interpretation':>30s}")
print(f"   {'-'*70}")

for k in k_values:
    epv = n_events / k
    if epv >= 10:
        status = "✅ EXCELLENT"
        interp = "Adequate for all models"
    elif epv >= 5:
        status = "✅ ADEQUATE"
        interp = "Meets minimum threshold"
    elif epv >= 3:
        status = "⚠️  MARGINAL"
        interp = "Consider shrinkage methods"
    else:
        status = "❌ INSUFFICIENT"
        interp = "Too few events per variable"
    
    print(f"   {k:5d}  {epv:8.2f}  {status:>20s}  {interp:>30s}")

print(f"\n   Reference: Riley RD et al. BMJ. 2019;364:l1732")

# ═══════════════════════════════════════════════════════════════════════════════
# 11.4 ANOVA F-TEST FEATURE RANKING
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"🔬 ANOVA F-TEST: Univariate Feature Ranking\n")

# Calculate F-statistics for all features
f_selector = SelectKBest(score_func=f_classif, k='all')
f_selector.fit(X_train, y_train)

# Extract scores and p-values
f_scores = f_selector.scores_
p_values = f_selector.pvalues_

# Create ranking DataFrame
anova_df = pd.DataFrame({
    'Feature': all_features,
    'F_Score': f_scores,
    'P_Value': p_values,
    'Rank': range(1, len(all_features) + 1)
}).sort_values('F_Score', ascending=False).reset_index(drop=True)

anova_df['Rank'] = range(1, len(anova_df) + 1)

print(f"   ✅ ANOVA F-test completed for {len(all_features)} features\n")
print(f"   Top 20 Features by F-Score:\n")
print(f"   {'Rank':>5s}  {'Feature':40s}  {'F-Score':>10s}  {'P-Value':>12s}  {'Significance':>15s}")
print(f"   {'-'*90}")

for idx, row in anova_df.head(20).iterrows():
    sig = "***" if row['P_Value'] < 0.001 else "**" if row['P_Value'] < 0.01 else "*" if row['P_Value'] < 0.05 else "ns"
    print(f"   {row['Rank']:5d}  {row['Feature']:40s}  {row['F_Score']:10.2f}  {row['P_Value']:12.4e}  {sig:>15s}")

save_csv(anova_df, 'step11_anova_feature_ranking')
print(f"\n   ✅ Full ranking saved: step11_anova_feature_ranking.csv")

# ═══════════════════════════════════════════════════════════════════════════════
# 11.5 NESTED CROSS-VALIDATION: K SELECTION
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"🎯 NESTED CROSS-VALIDATION: Optimal K Selection\n")
print(f"   Method: 5-fold stratified CV on TRAINING data only")
print(f"   Model: Logistic Regression (L2, balanced, C=1.0)")
print(f"   Metric: ROC AUC (threshold-independent)\n")

k_cv_results = []

for k in k_values:
    print(f"{'─'*100}")
    print(f"Testing K={k} (EPV={n_events/k:.1f})...\n")
    
    # Select top K features
    top_k_features = anova_df.head(k)['Feature'].tolist()
    X_train_k = X_train[top_k_features]
    
    # Scale features
    scaler = StandardScaler()
    X_train_k_scaled = scaler.fit_transform(X_train_k)
    
    # Logistic Regression
    lr_model = LogisticRegression(
        penalty='l2',
        C=1.0,
        max_iter=2000,
        class_weight='balanced',
        random_state=42,
        solver='liblinear'
    )
    
    # 5-fold CV
    cv_scores = cross_val_score(
        lr_model, 
        X_train_k_scaled, 
        y_train,
        cv=cv_strategy,
        scoring='roc_auc',
        n_jobs=-1
    )
    
    mean_auc = cv_scores.mean()
    std_auc = cv_scores.std()
    
    k_cv_results.append({
        'K': k,
        'EPV': n_events / k,
        'Mean_CV_AUC': mean_auc,
        'Std_CV_AUC': std_auc,
        'Min_CV_AUC': cv_scores.min(),
        'Max_CV_AUC': cv_scores.max(),
        'CV_Scores': cv_scores
    })
    
    print(f"   Fold Results:")
    for fold_idx, score in enumerate(cv_scores, 1):
        print(f"      Fold {fold_idx}: AUC = {score:.4f}")
    
    print(f"\n   → Mean CV AUC: {mean_auc:.4f} ± {std_auc:.4f}")
    print(f"   → Range: [{cv_scores.min():.4f}, {cv_scores.max():.4f}]\n")

k_cv_df = pd.DataFrame(k_cv_results)

# ═══════════════════════════════════════════════════════════════════════════════
# 11.6 SELECT OPTIMAL K
# ═══════════════════════════════════════════════════════════════════════════════

print(f"{'='*100}")
print(f"📊 K-SELECTION RESULTS SUMMARY:\n")

print(k_cv_df[['K', 'EPV', 'Mean_CV_AUC', 'Std_CV_AUC']].to_string(index=False))

# Select K with highest mean CV AUC
optimal_k = int(k_cv_df.loc[k_cv_df['Mean_CV_AUC'].idxmax(), 'K'])
optimal_cv_auc = k_cv_df.loc[k_cv_df['Mean_CV_AUC'].idxmax(), 'Mean_CV_AUC']
optimal_std_auc = k_cv_df.loc[k_cv_df['Mean_CV_AUC'].idxmax(), 'Std_CV_AUC']
optimal_epv = k_cv_df.loc[k_cv_df['Mean_CV_AUC'].idxmax(), 'EPV']

print(f"\n{'='*100}")
print(f"🏆 OPTIMAL K SELECTED: {optimal_k}")
print(f"{'='*100}")
print(f"\n   Selection Criterion: Maximum mean 5-fold CV AUC")
print(f"   Mean CV AUC:  {optimal_cv_auc:.4f} ± {optimal_std_auc:.4f}")
print(f"   EPV:          {optimal_epv:.2f} events per variable")
print(f"   EPV Status:   {'✅ EXCELLENT (≥10)' if optimal_epv >= 10 else '✅ ADEQUATE (≥5)' if optimal_epv >= 5 else '⚠️  MARGINAL'}")

# Get final selected features
selected_features_step11 = anova_df.head(optimal_k)['Feature'].tolist()

print(f"\n   📋 Selected {optimal_k} Features (by ANOVA rank):\n")
for i, feat in enumerate(selected_features_step11, 1):
    f_score = anova_df[anova_df['Feature'] == feat]['F_Score'].values[0]
    p_val = anova_df[anova_df['Feature'] == feat]['P_Value'].values[0]
    print(f"      {i:2d}. {feat:40s}  F={f_score:8.2f}, p={p_val:.2e}")

# ═══════════════════════════════════════════════════════════════════════════════
# 11.7 FIGURE: K-SELECTION VISUALIZATION
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 Generating K-selection visualization...\n")

COLORS = DISTRIBUTION_DATA["colors_enhanced"]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6), dpi=300)

# Plot 1: AUC vs K
ax1.plot(k_cv_df['K'], k_cv_df['Mean_CV_AUC'], 
         marker='o', markersize=10, linewidth=2.5, 
         color=COLORS['survived'], label='Mean CV AUC')
ax1.fill_between(k_cv_df['K'], 
                 k_cv_df['Mean_CV_AUC'] - k_cv_df['Std_CV_AUC'],
                 k_cv_df['Mean_CV_AUC'] + k_cv_df['Std_CV_AUC'],
                 alpha=0.3, color=COLORS['survived'], label='±1 SD')

# Highlight optimal K
ax1.scatter([optimal_k], [optimal_cv_auc], 
           s=300, color=COLORS['sig'], edgecolor='white', linewidth=3,
           zorder=5, label=f'Optimal K={optimal_k}')

ax1.set_xlabel('Number of Features (K)', fontsize=12, fontweight='bold')
ax1.set_ylabel('Cross-Validated AUC', fontsize=12, fontweight='bold')
ax1.set_title('A. Cross-Validation AUC vs Feature Count', 
             fontsize=13, fontweight='bold', loc='left')
ax1.legend(fontsize=10, frameon=True, fancybox=True, shadow=True)
ax1.grid(True, alpha=0.3, linestyle='--')
ax1.set_xticks(k_values)
ax1.set_ylim(0.7, 1.0)

# Plot 2: EPV vs AUC
ax2.scatter(k_cv_df['EPV'], k_cv_df['Mean_CV_AUC'], 
           s=200, c=k_cv_df['K'], cmap='viridis', 
           edgecolor=COLORS['primary'], linewidth=2, alpha=0.8)

# Annotate points
for idx, row in k_cv_df.iterrows():
    ax2.annotate(f"K={int(row['K'])}", 
                (row['EPV'], row['Mean_CV_AUC']),
                xytext=(5, 5), textcoords='offset points',
                fontsize=9, fontweight='bold')

# EPV threshold lines
ax2.axvline(x=5, color=COLORS['secondary'], linestyle='--', linewidth=2, 
           label='EPV=5 (Riley minimum)', alpha=0.7)
ax2.axvline(x=10, color=COLORS['sig'], linestyle='--', linewidth=2, 
           label='EPV=10 (recommended)', alpha=0.7)

ax2.set_xlabel('Events Per Variable (EPV)', fontsize=12, fontweight='bold')
ax2.set_ylabel('Cross-Validated AUC', fontsize=12, fontweight='bold')
ax2.set_title('B. EPV vs Model Performance', 
             fontsize=13, fontweight='bold', loc='left')
ax2.legend(fontsize=10, frameon=True, fancybox=True, shadow=True)
ax2.grid(True, alpha=0.3, linestyle='--')
ax2.set_ylim(0.7, 1.0)

fig.suptitle('Figure 8. ANOVA Filter: Optimal K Selection via Nested Cross-Validation\nTraining Set (n=380, 5-Fold Stratified CV)', 
            fontsize=15, fontweight='bold', y=0.98)

plt.tight_layout(rect=[0, 0, 1, 0.96])
save_figure(fig, 'step11_fig8_k_selection')
plt.show()

print(f"✅ Figure 8 saved: K-selection visualization")

# ═══════════════════════════════════════════════════════════════════════════════
# 11.8 SAVE RESULTS
# ═══════════════════════════════════════════════════════════════════════════════

save_csv(k_cv_df, 'step11_k_selection_cv_results')

# ═══════════════════════════════════════════════════════════════════════════════
# 11.9 LOG & HAND-OFF
# ═══════════════════════════════════════════════════════════════════════════════

append_runlog("11", {
    "analysis": "ANOVA filter with nested CV for optimal K selection",
    "method": "ANOVA F-test + Stratified 5-fold CV",
    "k_range_tested": k_values,
    "optimal_k": optimal_k,
    "optimal_cv_auc": round(optimal_cv_auc, 4),
    "optimal_epv": round(optimal_epv, 2),
    "selected_features": len(selected_features_step11),
})

ANOVA_SELECTION = {
    "method": "ANOVA_F_Test_Nested_CV",
    "optimal_k": optimal_k,
    "selected_features": selected_features_step11,
    "cv_auc": optimal_cv_auc,
    "cv_std": optimal_std_auc,
    "epv": optimal_epv,
    "k_cv_results": k_cv_df,
    "anova_ranking": anova_df,
}

print("\n💾 Stored: ANOVA_SELECTION")
print("\n" + "="*100)
print("✅ STEP 11 COMPLETE — ANOVA FILTER APPLIED, OPTIMAL K SELECTED")
print("="*100)
print(f"\n🎯 HAND-OFF TO STEP 12:")
print(f"   • Input features: {optimal_k} (ANOVA-selected)")
print(f"   • Next: Elastic Net embedded selection")
print(f"   • Expected output: ~18 features (after L1 shrinkage)")
print("="*100 + "\n")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 12: ELASTIC NET FEATURE SELECTION (3-TIER APPROACH)
# TRIPOD: 10b (Final feature selection with embedded method)
# MODIFIED: Test K=15, K=20, K=25 → 3 final feature sets
# ═══════════════════════════════════════════════════════════════════════════════

from sklearn.linear_model import ElasticNetCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

print("\n" + "="*100)
print("STEP 12: ELASTIC NET FEATURE SELECTION (3-TIER APPROACH)")
print("="*100)
print(f"UTC: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"User: zainzampawala786-sudo")
print(f"\n🎯 STRATEGY: Test 3 feature tiers (K=15, K=20, K=25)")
print(f"   Each tier will be validated on external data in Step 14")
print(f"   Best performing tier will be selected\n")

# Get data
X_train = CLEAN_FEATURE_DATA["X_train_clean"].copy()
y_train = CLEAN_FEATURE_DATA["y_train"].copy()
X_test = CLEAN_FEATURE_DATA["X_test_clean"].copy()
y_test = CLEAN_FEATURE_DATA["y_test"].copy()

# Get ANOVA ranking
anova_df = ANOVA_SELECTION["anova_ranking"]

# Define 3 K values to test
K_VALUES = [15, 20, 25]

print(f"{'='*100}")
print(f"📊 DATA SUMMARY:\n")
print(f"   Training samples:  {len(X_train)}")
print(f"   Training events:   {(y_train==1).sum()} ({(y_train==1).sum()/len(y_train)*100:.1f}%)")
print(f"   Test samples:      {len(X_test)}")
print(f"   Test events:       {(y_test==1).sum()} ({(y_test==1).sum()/len(y_test)*100:.1f}%)")
print(f"\n   Available features: {len(anova_df)}")
print(f"   K-values to test:   {K_VALUES}")

# Storage for all 3 tiers
TIER_RESULTS = {}
COLORS = DISTRIBUTION_DATA["colors_enhanced"]

# ══════════════════════════════════════════════════════════════════════════════
# PROCESS EACH TIER
# ══════════════════════════════════════════════════════════════════════════════

for tier_idx, K in enumerate(K_VALUES, 1):
    
    print(f"\n{'='*100}")
    print(f"🔬 TIER {tier_idx}: K={K} FEATURES")
    print(f"{'='*100}\n")
    
    # Calculate EPV
    n_events = (y_train == 1).sum()
    epv = n_events / K
    
    print(f"   Events-Per-Variable (EPV): {epv:.2f}")
    
    if epv >= 10:
        epv_status = "✅ EXCELLENT (≥10)"
    elif epv >= 5:
        epv_status = "✅ ADEQUATE (≥5)"
    else:
        epv_status = "⚠️  MARGINAL (<5)"
    
    print(f"   EPV Status: {epv_status}\n")
    
    # ── Select top K features from ANOVA
    selected_features_anova = anova_df.head(K)['Feature'].tolist()
    
    print(f"   Selected {K} features by ANOVA F-score:\n")
    for i, feat in enumerate(selected_features_anova, 1):
        f_score = anova_df[anova_df['Feature'] == feat]['F_Score'].values[0]
        print(f"      {i:2d}. {feat:40s} F={f_score:.2f}")
    
    # ── Subset data to K features
    X_train_k = X_train[selected_features_anova].copy()
    X_test_k = X_test[selected_features_anova].copy()
    
    print(f"\n   Training shape: {X_train_k.shape}")
    print(f"   Test shape:     {X_test_k.shape}")
    
    # ── Scale features
    print(f"\n   🔧 Scaling features (StandardScaler)...")
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_k)
    X_test_scaled = scaler.transform(X_test_k)
    
    print(f"      ✅ Features scaled (mean=0, std=1)")
    
    # ── Elastic Net with cross-validation
    print(f"\n   🔬 Training Elastic Net (5-fold CV)...")
    print(f"      • L1 ratio: 0.5 (equal L1/L2 penalty)")
    print(f"      • Alpha grid: 100 values (auto)")
    print(f"      • CV folds: 5 (stratified)\n")
    
    enet = ElasticNetCV(
        l1_ratio=0.5,          # Equal L1/L2 penalty
        alphas=None,           # Auto-generate 100 alphas
        cv=5,                  # 5-fold CV
        max_iter=10000,
        tol=1e-4,
        random_state=42,
        n_jobs=-1
    )
    
    enet.fit(X_train_scaled, y_train)
    
    print(f"   ✅ Elastic Net trained")
    print(f"      • Best alpha: {enet.alpha_:.6f}")
    print(f"      • CV score (R²): {enet.score(X_train_scaled, y_train):.4f}")
    
    # ── Extract non-zero coefficients
    coefficients = enet.coef_
    feature_importance = pd.DataFrame({
        'Feature': selected_features_anova,
        'Coefficient': coefficients,
        'Abs_Coefficient': np.abs(coefficients)
    }).sort_values('Abs_Coefficient', ascending=False)
    
    # Features with non-zero coefficients
    selected_features_enet = feature_importance[
        feature_importance['Abs_Coefficient'] > 1e-6
    ]['Feature'].tolist()
    
    n_selected = len(selected_features_enet)
    n_removed = K - n_selected
    
    print(f"\n   {'─'*80}")
    print(f"   📊 ELASTIC NET FEATURE SELECTION RESULTS:\n")
    print(f"      • Input features (K={K}):     {K}")
    print(f"      • Features retained:          {n_selected}")
    print(f"      • Features removed (→0):      {n_removed}")
    print(f"      • Final EPV:                  {n_events / n_selected:.2f}")
    
    print(f"\n   🎯 FINAL {n_selected} FEATURES (Non-zero coefficients):\n")
    print(f"      {'Rank':>5s}  {'Feature':40s}  {'Coefficient':>12s}  {'|Coef|':>10s}")
    print(f"      {'-'*75}")
    
    for idx, row in feature_importance[feature_importance['Abs_Coefficient'] > 1e-6].iterrows():
        rank = list(feature_importance.index).index(idx) + 1
        print(f"      {rank:5d}  {row['Feature']:40s}  {row['Coefficient']:12.4f}  {row['Abs_Coefficient']:10.4f}")
    
    if n_removed > 0:
        print(f"\n   🗑️  REMOVED FEATURES (Coefficients shrunk to zero):\n")
        removed_features = feature_importance[
            feature_importance['Abs_Coefficient'] <= 1e-6
        ]['Feature'].tolist()
        for i, feat in enumerate(removed_features, 1):
            print(f"      {i}. {feat}")
    
    # ── Calculate training AUC
    y_pred_train = enet.predict(X_train_scaled)
    y_pred_train = np.clip(y_pred_train, 0, 1)
    train_auc = roc_auc_score(y_train, y_pred_train)
    
    # Calculate test AUC (internal test set)
    y_pred_test = enet.predict(X_test_scaled)
    y_pred_test = np.clip(y_pred_test, 0, 1)
    test_auc = roc_auc_score(y_test, y_pred_test)
    
    print(f"\n   {'─'*80}")
    print(f"   📈 INTERNAL VALIDATION (Training/Test AUC):\n")
    print(f"      • Training AUC:   {train_auc:.4f}")
    print(f"      • Test AUC:       {test_auc:.4f}")
    print(f"      • Difference:     {abs(train_auc - test_auc):.4f}")
    
    if abs(train_auc - test_auc) < 0.05:
        print(f"      • Status:         ✅ Good generalization (<0.05 gap)")
    else:
        print(f"      • Status:         ⚠️  Possible overfitting (≥0.05 gap)")
    
    # ── Save tier results
    TIER_RESULTS[f"tier_{tier_idx}_k{K}"] = {
        "tier": tier_idx,
        "K": K,
        "epv": epv,
        "selected_features_anova": selected_features_anova,
        "selected_features_enet": selected_features_enet,
        "n_features_final": n_selected,
        "n_features_removed": n_removed,
        "scaler": scaler,
        "elasticnet_model": enet,
        "feature_importance": feature_importance,
        "best_alpha": enet.alpha_,
        "train_auc": train_auc,
        "test_auc": test_auc,
        "coefficients": coefficients,
    }
    
    # Save CSV for this tier
    save_csv(feature_importance, f'step12_tier{tier_idx}_k{K}_feature_importance')
    
    print(f"\n   ✅ Tier {tier_idx} (K={K}) complete: {n_selected} features selected")

# ══════════════════════════════════════════════════════════════════════════════
# SUMMARY COMPARISON
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 3-TIER COMPARISON SUMMARY")
print(f"{'='*100}\n")

summary_data = []
for key, result in TIER_RESULTS.items():
    summary_data.append({
        'Tier': result['tier'],
        'K_Input': result['K'],
        'Features_Final': result['n_features_final'],
        'Features_Removed': result['n_features_removed'],
        'EPV': result['epv'],
        'Final_EPV': (y_train==1).sum() / result['n_features_final'],
        'Train_AUC': result['train_auc'],
        'Test_AUC': result['test_auc'],
        'AUC_Gap': abs(result['train_auc'] - result['test_auc']),
        'Best_Alpha': result['best_alpha'],
    })

summary_df = pd.DataFrame(summary_data)

print(f"   {'Tier':>5s}  {'K→Final':>10s}  {'Removed':>8s}  {'EPV':>6s}  {'Final EPV':>10s}  {'Train AUC':>10s}  {'Test AUC':>9s}  {'Gap':>6s}")
print(f"   {'-'*85}")

for idx, row in summary_df.iterrows():
    print(f"   {int(row['Tier']):5d}  {int(row['K_Input']):2d}→{int(row['Features_Final']):2d}       "
          f"{int(row['Features_Removed']):8d}  {row['EPV']:6.2f}  {row['Final_EPV']:10.2f}  "
          f"{row['Train_AUC']:10.4f}  {row['Test_AUC']:9.4f}  {row['AUC_Gap']:6.4f}")

save_csv(summary_df, 'step12_3tier_summary')

# ══════════════════════════════════════════════════════════════════════════════
# VISUALIZATION: 3-TIER COMPARISON
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 Generating 3-tier visualization...\n")

fig = plt.figure(figsize=(20, 12), dpi=300)
gs = fig.add_gridspec(3, 3, hspace=0.35, wspace=0.3)

tier_colors = [COLORS['primary'], COLORS['secondary'], COLORS['sig']]

# ── Plot 1: Feature counts
ax1 = fig.add_subplot(gs[0, 0])
tiers = summary_df['Tier'].values
k_input = summary_df['K_Input'].values
features_final = summary_df['Features_Final'].values

x = np.arange(len(tiers))
width = 0.35

ax1.bar(x - width/2, k_input, width, label='Input (ANOVA)', 
        color=COLORS['died'], alpha=0.7, edgecolor='black')
ax1.bar(x + width/2, features_final, width, label='Final (Elastic Net)', 
        color=COLORS['survived'], alpha=0.7, edgecolor='black')

ax1.set_xlabel('Tier', fontsize=11, fontweight='bold')
ax1.set_ylabel('Number of Features', fontsize=11, fontweight='bold')
ax1.set_title('A. Feature Count: Input vs Final', fontsize=12, fontweight='bold', loc='left')
ax1.set_xticks(x)
ax1.set_xticklabels([f'Tier {t}\n(K={k})' for t, k in zip(tiers, k_input)])
ax1.legend(fontsize=9)
ax1.grid(axis='y', alpha=0.3)

# ── Plot 2: EPV comparison
ax2 = fig.add_subplot(gs[0, 1])
epv_initial = summary_df['EPV'].values
epv_final = summary_df['Final_EPV'].values

ax2.bar(x - width/2, epv_initial, width, label='Initial EPV', 
        color=COLORS['primary'], alpha=0.7, edgecolor='black')
ax2.bar(x + width/2, epv_final, width, label='Final EPV', 
        color=COLORS['sig'], alpha=0.7, edgecolor='black')
ax2.axhline(y=10, color='green', linestyle='--', linewidth=2, label='EPV=10 (excellent)')
ax2.axhline(y=5, color='orange', linestyle='--', linewidth=2, label='EPV=5 (adequate)')

ax2.set_xlabel('Tier', fontsize=11, fontweight='bold')
ax2.set_ylabel('Events Per Variable', fontsize=11, fontweight='bold')
ax2.set_title('B. EPV: Initial vs Final', fontsize=12, fontweight='bold', loc='left')
ax2.set_xticks(x)
ax2.set_xticklabels([f'Tier {t}' for t in tiers])
ax2.legend(fontsize=8)
ax2.grid(axis='y', alpha=0.3)

# ── Plot 3: AUC comparison
ax3 = fig.add_subplot(gs[0, 2])
train_auc = summary_df['Train_AUC'].values
test_auc = summary_df['Test_AUC'].values

ax3.plot(tiers, train_auc, marker='o', linewidth=2.5, markersize=10, 
         label='Training AUC', color=COLORS['primary'])
ax3.plot(tiers, test_auc, marker='s', linewidth=2.5, markersize=10, 
         label='Test AUC', color=COLORS['secondary'])

ax3.set_xlabel('Tier', fontsize=11, fontweight='bold')
ax3.set_ylabel('AUC', fontsize=11, fontweight='bold')
ax3.set_title('C. Internal AUC: Training vs Test', fontsize=12, fontweight='bold', loc='left')
ax3.set_xticks(tiers)
ax3.set_xticklabels([f'Tier {t}\n(K={k})' for t, k in zip(tiers, k_input)])
ax3.legend(fontsize=9)
ax3.grid(alpha=0.3)
ax3.set_ylim([0.8, 0.95])

# ── Plots 4-6: Feature importance for each tier
for tier_idx, (key, result) in enumerate(TIER_RESULTS.items()):
    ax = fig.add_subplot(gs[1 + tier_idx//3, tier_idx % 3])
    
    feat_imp = result['feature_importance']
    feat_imp_nonzero = feat_imp[feat_imp['Abs_Coefficient'] > 1e-6].head(15)
    
    y_pos = np.arange(len(feat_imp_nonzero))
    ax.barh(y_pos, feat_imp_nonzero['Abs_Coefficient'], 
            color=tier_colors[tier_idx], alpha=0.7, edgecolor='black')
    
    ax.set_yticks(y_pos)
    ax.set_yticklabels(feat_imp_nonzero['Feature'], fontsize=8)
    ax.set_xlabel('|Coefficient|', fontsize=10, fontweight='bold')
    ax.set_title(f'{"DEF"[tier_idx]}. Tier {tier_idx+1} (K={result["K"]}→{result["n_features_final"]} features)', 
                fontsize=11, fontweight='bold', loc='left')
    ax.grid(axis='x', alpha=0.3)
    ax.invert_yaxis()

fig.suptitle('Figure 8. 3-Tier Elastic Net Feature Selection Comparison\n'
             'All tiers will be validated on external data (Step 14)', 
             fontsize=16, fontweight='bold', y=0.98)

save_figure(fig, 'step12_fig8_3tier_comparison')
plt.show()

print(f"✅ Figure 8 saved: 3-tier comparison")

# ══════════════════════════════════════════════════════════════════════════════
# SAVE & LOG
# ══════════════════════════════════════════════════════════════════════════════

save_pickle(TIER_RESULTS, 'step12_tier_results')

append_runlog("12", {
    "analysis": "Elastic Net feature selection (3-tier approach)",
    "tiers": len(TIER_RESULTS),
    "tier_1": f"K={K_VALUES[0]} → {TIER_RESULTS['tier_1_k15']['n_features_final']} features",
    "tier_2": f"K={K_VALUES[1]} → {TIER_RESULTS['tier_2_k20']['n_features_final']} features",
    "tier_3": f"K={K_VALUES[2]} → {TIER_RESULTS['tier_3_k25']['n_features_final']} features",
})

ELASTICNET_3TIER = TIER_RESULTS

print("\n💾 Stored: ELASTICNET_3TIER (3 feature sets ready)")
print("\n" + "="*100)
print("✅ STEP 12 COMPLETE — 3 TIERS CREATED")
print("="*100)
print(f"\n🎯 NEXT STEP: Train models on all 3 tiers (Step 13)")
print(f"   Then validate on external data (Step 14) to select best tier")
print("="*100 + "\n")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 13: MODEL TRAINING & HYPERPARAMETER TUNING (3-TIER APPROACH)
# TRIPOD: 10b (Model development), 10c (Hyperparameter tuning)
# ═══════════════════════════════════════════════════════════════════════════════

from sklearn.linear_model import LogisticRegression, ElasticNet
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (roc_auc_score, roc_curve, precision_recall_curve,
                             brier_score_loss, classification_report, confusion_matrix)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("\n" + "="*100)
print("STEP 13: MODEL TRAINING & HYPERPARAMETER TUNING (3-TIER APPROACH)")
print("="*100)
print(f"UTC: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"User: zainzampawala786-sudo\n")

# Get data
X_train = CLEAN_FEATURE_DATA["X_train_clean"].copy()
y_train = CLEAN_FEATURE_DATA["y_train"].copy()
X_test = CLEAN_FEATURE_DATA["X_test_clean"].copy()
y_test = CLEAN_FEATURE_DATA["y_test"].copy()
TIER_RESULTS = ELASTICNET_3TIER
COLORS = DISTRIBUTION_DATA["colors_enhanced"]

print(f"{'='*100}")
print(f"📊 DATA SUMMARY:\n")
print(f"   Training samples:  {len(X_train)} (events: {(y_train==1).sum()}, {(y_train==1).sum()/len(y_train)*100:.1f}%)")
print(f"   Test samples:      {len(X_test)} (events: {(y_test==1).sum()}, {(y_test==1).sum()/len(y_test)*100:.1f}%)")
print(f"\n   Tiers: {len(TIER_RESULTS)}")

for key, tier_data in TIER_RESULTS.items():
    print(f"      • Tier {tier_data['tier']}: {tier_data['n_features_final']} features (K={tier_data['K']})")

# ══════════════════════════════════════════════════════════════════════════════
# MODEL DEFINITIONS & HYPERPARAMETER GRIDS
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"🤖 MODEL CONFIGURATIONS:\n")

scale_pos_weight = len(y_train[y_train==0]) / len(y_train[y_train==1])

MODELS = {
    'Logistic Regression': {
        'model': LogisticRegression(random_state=42, max_iter=2000, solver='saga'),
        'params': {
            'C': [0.001, 0.01, 0.1, 1.0],
            'penalty': ['l1', 'l2', 'elasticnet'],
            'l1_ratio': [0.3, 0.5, 0.7],
            'class_weight': ['balanced']
        },
        'scale': True
    },
    
    'Random Forest': {
        'model': RandomForestClassifier(random_state=42, n_jobs=-1),
        'params': {
            'n_estimators': [200, 300, 500],
            'max_depth': [4, 6],
            'min_samples_split': [10, 20, 30],
            'min_samples_leaf': [5, 10],
            'max_features': ['sqrt', 'log2'],
            'class_weight': ['balanced']
        },
        'scale': False
    },
    
    'Gradient Boosting': {
        'model': GradientBoostingClassifier(random_state=42),
        'params': {
            'n_estimators': [100, 200, 300],
            'learning_rate': [0.01, 0.05],
            'max_depth': [3, 4],
            'min_samples_split': [10, 20],
            'min_samples_leaf': [5, 10],
            'subsample': [0.7, 0.8],
            'max_features': ['sqrt', 'log2']
        },
        'scale': False
    },
    
    'XGBoost': {
        'model': XGBClassifier(random_state=42, eval_metric='logloss', 
                              use_label_encoder=False, n_jobs=-1),
        'params': {
            'n_estimators': [200, 300, 500],
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [3, 4, 5],
            'min_child_weight': [3, 5, 7],
            'subsample': [0.7, 0.8],
            'colsample_bytree': [0.7, 0.8],
            'gamma': [0.1, 0.5],
            'reg_alpha': [0.1, 0.5],
            'reg_lambda': [1.0, 2.0],
            'scale_pos_weight': [scale_pos_weight]
        },
        'scale': False
    },
    
    'SVM (RBF)': {
        'model': SVC(probability=True, random_state=42, max_iter=2000),
        'params': {
            'C': [0.1, 1.0, 10],
            'gamma': ['scale', 'auto', 0.01],
            'class_weight': ['balanced']
        },
        'scale': True
    },
    
    'Elastic Net': {
        'model': ElasticNet(random_state=42, max_iter=2000, selection='random'),
        'params': {
            'alpha': [0.001, 0.01, 0.1, 1.0],
            'l1_ratio': [0.3, 0.5, 0.7, 0.9],
        },
        'scale': True
    }
}

for model_name, config in MODELS.items():
    n_combos = np.prod([len(v) for v in config['params'].values()])
    print(f"   • {model_name:25s} — {n_combos:4d} hyperparameter combinations")

print(f"\n   Cross-validation: 5-fold stratified | Scoring: ROC AUC")

ALL_TIER_MODELS = {}
cv_strategy = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# ══════════════════════════════════════════════════════════════════════════════
# TRAIN MODELS FOR EACH TIER
# ══════════════════════════════════════════════════════════════════════════════

for tier_key, tier_data in TIER_RESULTS.items():
    
    tier_num = tier_data['tier']
    K = tier_data['K']
    n_features = tier_data['n_features_final']
    selected_features = tier_data['selected_features_enet']
    
    print(f"\n{'='*100}")
    print(f"🔬 TIER {tier_num}: {n_features} FEATURES (K={K})")
    print(f"{'='*100}\n")
    
    print(f"   Features ({n_features}):")
    for i, feat in enumerate(selected_features, 1):
        print(f"      {i:2d}. {feat}")
    
    X_train_tier = X_train[selected_features].copy()
    X_test_tier = X_test[selected_features].copy()
    
    print(f"\n   Training shape: {X_train_tier.shape}")
    print(f"   Test shape:     {X_test_tier.shape}")
    print(f"   EPV:            {(y_train==1).sum() / n_features:.2f}")
    
    tier_models = {}
    tier_results = []
    
    for model_name, config in MODELS.items():
        
        print(f"\n   {'─'*80}")
        print(f"   🤖 {model_name}")
        print(f"   {'─'*80}\n")
        
        if config['scale']:
            scaler = StandardScaler()
            X_train_processed = scaler.fit_transform(X_train_tier)
            X_test_processed = scaler.transform(X_test_tier)
            print(f"      ✅ Features scaled")
        else:
            X_train_processed = X_train_tier.values
            X_test_processed = X_test_tier.values
            scaler = None
        
        if model_name == 'Elastic Net':
            grid_search = GridSearchCV(
                estimator=config['model'],
                param_grid=config['params'],
                cv=cv_strategy,
                scoring='neg_mean_squared_error',
                n_jobs=-1,
                verbose=0
            )
        else:
            grid_search = GridSearchCV(
                estimator=config['model'],
                param_grid=config['params'],
                cv=cv_strategy,
                scoring='roc_auc',
                n_jobs=-1,
                verbose=0
            )
        
        start_time = datetime.utcnow()
        grid_search.fit(X_train_processed, y_train)
        elapsed = (datetime.utcnow() - start_time).total_seconds()
        
        print(f"      ✅ Training complete ({elapsed:.1f}s)")
        
        best_model = grid_search.best_estimator_
        best_params = grid_search.best_params_
        best_cv_score = grid_search.best_score_
        
        print(f"\n      📊 Results:")
        if model_name == 'Elastic Net':
            print(f"         • Best CV MSE:     {-best_cv_score:.4f}")
        else:
            print(f"         • Best CV AUC:     {best_cv_score:.4f}")
        
        print(f"         • Best parameters:")
        for param, value in best_params.items():
            print(f"            - {param}: {value}")
        
        if model_name == 'Elastic Net':
            y_pred_train_proba = np.clip(best_model.predict(X_train_processed), 0, 1)
            y_pred_test_proba = np.clip(best_model.predict(X_test_processed), 0, 1)
        elif hasattr(best_model, 'predict_proba'):
            y_pred_train_proba = best_model.predict_proba(X_train_processed)[:, 1]
            y_pred_test_proba = best_model.predict_proba(X_test_processed)[:, 1]
        else:
            y_pred_train_proba = best_model.decision_function(X_train_processed)
            y_pred_test_proba = best_model.decision_function(X_test_processed)
        
        train_auc = roc_auc_score(y_train, y_pred_train_proba)
        test_auc = roc_auc_score(y_test, y_pred_test_proba)
        train_brier = brier_score_loss(y_train, y_pred_train_proba)
        test_brier = brier_score_loss(y_test, y_pred_test_proba)
        auc_gap = abs(train_auc - test_auc)
        
        print(f"         • Train AUC:       {train_auc:.4f}")
        print(f"         • Test AUC:        {test_auc:.4f}")
        print(f"         • AUC Gap:         {auc_gap:.4f}")
        print(f"         • Train Brier:     {train_brier:.4f}")
        print(f"         • Test Brier:      {test_brier:.4f}")
        
        tier_models[model_name] = {
            'model': best_model,
            'scaler': scaler,
            'best_params': best_params,
            'cv_score': best_cv_score,
            'train_auc': train_auc,
            'test_auc': test_auc,
            'train_brier': train_brier,
            'test_brier': test_brier,
            'auc_gap': auc_gap,
            'y_pred_train': y_pred_train_proba,
            'y_pred_test': y_pred_test_proba,
            'training_time': elapsed,
        }
        
        tier_results.append({
            'Tier': tier_num,
            'K': K,
            'N_Features': n_features,
            'Model': model_name,
            'CV_Score': best_cv_score,
            'Train_AUC': train_auc,
            'Test_AUC': test_auc,
            'AUC_Gap': auc_gap,
            'Train_Brier': train_brier,
            'Test_Brier': test_brier,
            'Training_Time_s': elapsed,
        })
    
    ALL_TIER_MODELS[f'tier_{tier_num}'] = {
        'tier': tier_num,
        'K': K,
        'n_features': n_features,
        'features': selected_features,
        'models': tier_models,
        'results_df': pd.DataFrame(tier_results),
    }
    
    print(f"\n   {'='*80}")
    print(f"   📊 TIER {tier_num} SUMMARY ({n_features} features):\n")
    
    tier_df = pd.DataFrame(tier_results)
    print(f"      {'Model':25s}  {'Test AUC':>8s}  {'Gap':>6s}  {'Brier':>7s}")
    print(f"      {'-'*60}")
    
    for idx, row in tier_df.iterrows():
        print(f"      {row['Model']:25s}  {row['Test_AUC']:8.4f}  {row['AUC_Gap']:6.4f}  {row['Test_Brier']:7.4f}")
    
    best_idx = tier_df['Test_AUC'].idxmax()
    best_model_name = tier_df.loc[best_idx, 'Model']
    best_test_auc = tier_df.loc[best_idx, 'Test_AUC']
    
    print(f"\n      🏆 Best: {best_model_name} (AUC={best_test_auc:.4f})")
    
    save_csv(tier_df, f'step13_tier{tier_num}_results')

# ══════════════════════════════════════════════════════════════════════════════
# OVERALL SUMMARY
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 OVERALL SUMMARY: 18 MODELS (3 TIERS × 6 MODELS)")
print(f"{'='*100}\n")

all_results = []
for tier_key, tier_data in ALL_TIER_MODELS.items():
    all_results.append(tier_data['results_df'])

combined_df = pd.concat(all_results, ignore_index=True)

print(f"   {'Tier':>5s}  {'Feat':>5s}  {'Model':25s}  {'Test AUC':>8s}  {'Gap':>6s}")
print(f"   {'-'*65}")

for idx, row in combined_df.iterrows():
    print(f"   {int(row['Tier']):5d}  {int(row['N_Features']):5d}  {row['Model']:25s}  "
          f"{row['Test_AUC']:8.4f}  {row['AUC_Gap']:6.4f}")

save_csv(combined_df, 'step13_all_models_summary')

print(f"\n{'='*100}")
print(f"🏆 TOP 5 MODELS (Test AUC):\n")

top5 = combined_df.nlargest(5, 'Test_AUC')

print(f"   {'Rank':>5s}  {'Tier':>5s}  {'Features':>9s}  {'Model':25s}  {'Test AUC':>8s}  {'Gap':>6s}")
print(f"   {'-'*80}")

for rank, (idx, row) in enumerate(top5.iterrows(), 1):
    print(f"   {rank:5d}  {int(row['Tier']):5d}  {int(row['N_Features']):9d}  "
          f"{row['Model']:25s}  {row['Test_AUC']:8.4f}  {row['AUC_Gap']:6.4f}")

print(f"\n{'='*100}")
print(f"✅ BEST GENERALIZATION (Smallest Gap):\n")

top5_gen = combined_df.nsmallest(5, 'AUC_Gap')

print(f"   {'Rank':>5s}  {'Tier':>5s}  {'Model':25s}  {'Test AUC':>8s}  {'Gap':>6s}")
print(f"   {'-'*70}")

for rank, (idx, row) in enumerate(top5_gen.iterrows(), 1):
    print(f"   {rank:5d}  {int(row['Tier']):5d}  {row['Model']:25s}  "
          f"{row['Test_AUC']:8.4f}  {row['AUC_Gap']:6.4f}")

# ══════════════════════════════════════════════════════════════════════════════
# VISUALIZATION
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 Generating visualization...\n")

fig = plt.figure(figsize=(20, 12), dpi=300)
gs = fig.add_gridspec(3, 3, hspace=0.35, wspace=0.3)

tier_colors = [COLORS['primary'], COLORS['secondary'], COLORS['sig']]
model_names_short = ['LR', 'RF', 'GB', 'XGB', 'SVM', 'EN']

# Plot 1: Test AUC comparison
ax1 = fig.add_subplot(gs[0, :])

for tier_num in [1, 2, 3]:
    tier_df = combined_df[combined_df['Tier'] == tier_num]
    x = np.arange(len(tier_df))
    ax1.plot(x, tier_df['Test_AUC'].values, marker='o', linewidth=2.5, markersize=10,
             label=f'Tier {tier_num} ({tier_df["N_Features"].iloc[0]} features)',
             color=tier_colors[tier_num-1])

ax1.set_xlabel('Model', fontsize=12, fontweight='bold')
ax1.set_ylabel('Test AUC', fontsize=12, fontweight='bold')
ax1.set_title('A. Test AUC Comparison Across 3 Tiers', fontsize=13, fontweight='bold', loc='left')
ax1.set_xticks(range(6))
ax1.set_xticklabels(model_names_short)
ax1.legend(fontsize=10, loc='lower right')
ax1.grid(alpha=0.3)
ax1.set_ylim([0.75, 0.90])

# Plot 2: Overfitting boxplot
ax2 = fig.add_subplot(gs[1, 0])

gaps_by_tier = []
for tier_num in [1, 2, 3]:
    tier_df = combined_df[combined_df['Tier'] == tier_num]
    gaps_by_tier.append(tier_df['AUC_Gap'].values)

bp = ax2.boxplot(gaps_by_tier, labels=['Tier 1\n(13 feat)', 'Tier 2\n(16 feat)', 'Tier 3\n(17 feat)'],
                 patch_artist=True, widths=0.6)

for patch, color in zip(bp['boxes'], tier_colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)

ax2.axhline(y=0.05, color='orange', linestyle='--', linewidth=2, label='0.05 threshold')
ax2.set_ylabel('AUC Gap (Train - Test)', fontsize=11, fontweight='bold')
ax2.set_title('B. Generalization (AUC Gap)', fontsize=12, fontweight='bold', loc='left')
ax2.legend(fontsize=9)
ax2.grid(axis='y', alpha=0.3)

# Plot 3: Brier Score
ax3 = fig.add_subplot(gs[1, 1])

x = np.arange(6)
width = 0.25

for tier_idx, tier_num in enumerate([1, 2, 3]):
    tier_df = combined_df[combined_df['Tier'] == tier_num]
    offset = (tier_idx - 1) * width
    ax3.bar(x + offset, tier_df['Test_Brier'].values, width, 
           label=f'Tier {tier_num}', color=tier_colors[tier_idx], alpha=0.7)

ax3.set_xlabel('Model', fontsize=11, fontweight='bold')
ax3.set_ylabel('Brier Score', fontsize=11, fontweight='bold')
ax3.set_title('C. Calibration', fontsize=12, fontweight='bold', loc='left')
ax3.set_xticks(x)
ax3.set_xticklabels(model_names_short)
ax3.legend(fontsize=9)
ax3.grid(axis='y', alpha=0.3)

# Plot 4: Training time
ax4 = fig.add_subplot(gs[1, 2])

time_by_model = combined_df.groupby('Model')['Training_Time_s'].mean().sort_values()

ax4.barh(range(len(time_by_model)), time_by_model.values, 
        color=COLORS['primary'], alpha=0.7, edgecolor='black')
ax4.set_yticks(range(len(time_by_model)))
ax4.set_yticklabels(time_by_model.index, fontsize=9)
ax4.set_xlabel('Training Time (s)', fontsize=11, fontweight='bold')
ax4.set_title('D. Computational Cost', fontsize=12, fontweight='bold', loc='left')
ax4.grid(axis='x', alpha=0.3)

# Plots 5-7: AUC gap per tier
for tier_idx, tier_num in enumerate([1, 2, 3]):
    ax = fig.add_subplot(gs[2, tier_idx])
    
    tier_df = combined_df[combined_df['Tier'] == tier_num]
    
    x = np.arange(len(tier_df))
    colors_gap = [tier_colors[tier_idx] if gap < 0.05 else 'red' 
                  for gap in tier_df['AUC_Gap'].values]
    
    ax.bar(x, tier_df['AUC_Gap'].values, color=colors_gap, 
           alpha=0.7, edgecolor='black', linewidth=1.5)
    ax.axhline(y=0.05, color='orange', linestyle='--', linewidth=2)
    
    ax.set_xlabel('Model', fontsize=11, fontweight='bold')
    ax.set_ylabel('AUC Gap', fontsize=11, fontweight='bold')
    ax.set_title(f'{"EFG"[tier_idx]}. Tier {tier_num} ({tier_df["N_Features"].iloc[0]} features)', 
                fontsize=12, fontweight='bold', loc='left')
    ax.set_xticks(x)
    ax.set_xticklabels(model_names_short, fontsize=9)
    ax.grid(axis='y', alpha=0.3)

fig.suptitle('Figure 9. Model Performance Comparison Across 3 Feature Tiers\n(Internal Test Set)', 
             fontsize=15, fontweight='bold', y=0.98)

save_figure(fig, 'step13_fig9_3tier_model_comparison')
plt.show()

print(f"✅ Figure 9 saved")

# ══════════════════════════════════════════════════════════════════════════════
# SAVE & LOG
# ══════════════════════════════════════════════════════════════════════════════

save_pickle(ALL_TIER_MODELS, 'step13_all_tier_models')

best_overall_idx = combined_df['Test_AUC'].idxmax()
best_overall = combined_df.loc[best_overall_idx]

best_gen_idx = combined_df['AUC_Gap'].idxmin()
best_gen = combined_df.loc[best_gen_idx]

append_runlog("13", {
    "analysis": "Model training and hyperparameter tuning (3-tier)",
    "n_tiers": 3,
    "n_models_per_tier": 6,
    "total_models": 18,
    "best_model_overall": {
        "tier": int(best_overall['Tier']),
        "model": best_overall['Model'],
        "n_features": int(best_overall['N_Features']),
        "test_auc": float(best_overall['Test_AUC']),
        "auc_gap": float(best_overall['AUC_Gap']),
    },
    "best_generalization": {
        "tier": int(best_gen['Tier']),
        "model": best_gen['Model'],
        "test_auc": float(best_gen['Test_AUC']),
        "auc_gap": float(best_gen['AUC_Gap']),
    },
})

TRAINED_MODELS_3TIER = ALL_TIER_MODELS

print("\n💾 Stored: TRAINED_MODELS_3TIER")
print("\n" + "="*100)
print("✅ STEP 13 COMPLETE")
print("="*100)
print(f"\n📊 Best AUC:  {best_overall['Model']} (Tier {int(best_overall['Tier'])}) — AUC={best_overall['Test_AUC']:.4f}")
print(f"   Best Gap:  {best_gen['Model']} (Tier {int(best_gen['Tier'])}) — Gap={best_gen['AUC_Gap']:.4f}")
print(f"\n🎯 NEXT: Step 14 — External validation")
print("="*100 + "\n")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 13: MODEL TRAINING & HYPERPARAMETER TUNING (3-TIER APPROACH)
# TRIPOD: 10b (Model development), 10c (Hyperparameter tuning)
# ═══════════════════════════════════════════════════════════════════════════════

from sklearn.linear_model import LogisticRegression, ElasticNet
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (roc_auc_score, roc_curve, precision_recall_curve,
                             brier_score_loss, classification_report, confusion_matrix)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("\n" + "="*100)
print("STEP 13: MODEL TRAINING & HYPERPARAMETER TUNING (3-TIER APPROACH)")
print("="*100)
print(f"UTC: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"User: zainzampawala786-sudo\n")

# Get data
X_train = CLEAN_FEATURE_DATA["X_train_clean"].copy()
y_train = CLEAN_FEATURE_DATA["y_train"].copy()
X_test = CLEAN_FEATURE_DATA["X_test_clean"].copy()
y_test = CLEAN_FEATURE_DATA["y_test"].copy()
TIER_RESULTS = ELASTICNET_3TIER
COLORS = DISTRIBUTION_DATA["colors_enhanced"]

print(f"{'='*100}")
print(f"📊 DATA SUMMARY:\n")
print(f"   Training samples:  {len(X_train)} (events: {(y_train==1).sum()}, {(y_train==1).sum()/len(y_train)*100:.1f}%)")
print(f"   Test samples:      {len(X_test)} (events: {(y_test==1).sum()}, {(y_test==1).sum()/len(y_test)*100:.1f}%)")
print(f"\n   Tiers: {len(TIER_RESULTS)}")

for key, tier_data in TIER_RESULTS.items():
    print(f"      • Tier {tier_data['tier']}: {tier_data['n_features_final']} features (K={tier_data['K']})")

# ══════════════════════════════════════════════════════════════════════════════
# MODEL DEFINITIONS & HYPERPARAMETER GRIDS
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"🤖 MODEL CONFIGURATIONS:\n")

scale_pos_weight = len(y_train[y_train==0]) / len(y_train[y_train==1])

MODELS = {
    'Logistic Regression': {
        'model': LogisticRegression(random_state=42, max_iter=2000, solver='saga'),
        'params': {
            'C': [0.001, 0.01, 0.1, 1.0],
            'penalty': ['l1', 'l2', 'elasticnet'],
            'l1_ratio': [0.3, 0.5, 0.7],
            'class_weight': ['balanced']
        },
        'scale': True
    },
    
    'Random Forest': {
        'model': RandomForestClassifier(random_state=42, n_jobs=-1),
        'params': {
            'n_estimators': [200, 300, 500],
            'max_depth': [4, 6],
            'min_samples_split': [10, 20, 30],
            'min_samples_leaf': [5, 10],
            'max_features': ['sqrt', 'log2'],
            'class_weight': ['balanced']
        },
        'scale': False
    },
    
    'Gradient Boosting': {
        'model': GradientBoostingClassifier(random_state=42),
        'params': {
            'n_estimators': [100, 200, 300],
            'learning_rate': [0.01, 0.05],
            'max_depth': [3, 4],
            'min_samples_split': [10, 20],
            'min_samples_leaf': [5, 10],
            'subsample': [0.7, 0.8],
            'max_features': ['sqrt', 'log2']
        },
        'scale': False
    },
    
    'XGBoost': {
        'model': XGBClassifier(random_state=42, eval_metric='logloss', 
                              use_label_encoder=False, n_jobs=-1),
        'params': {
            'n_estimators': [200, 300, 500],
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [3, 4, 5],
            'min_child_weight': [3, 5, 7],
            'subsample': [0.7, 0.8],
            'colsample_bytree': [0.7, 0.8],
            'gamma': [0.1, 0.5],
            'reg_alpha': [0.1, 0.5],
            'reg_lambda': [1.0, 2.0],
            'scale_pos_weight': [scale_pos_weight]
        },
        'scale': False
    },
    
    'SVM (RBF)': {
        'model': SVC(probability=True, random_state=42, max_iter=2000),
        'params': {
            'C': [0.1, 1.0, 10],
            'gamma': ['scale', 'auto', 0.01],
            'class_weight': ['balanced']
        },
        'scale': True
    },
    
    'Elastic Net': {
        'model': ElasticNet(random_state=42, max_iter=2000, selection='random'),
        'params': {
            'alpha': [0.001, 0.01, 0.1, 1.0],
            'l1_ratio': [0.3, 0.5, 0.7, 0.9],
        },
        'scale': True
    }
}

for model_name, config in MODELS.items():
    n_combos = np.prod([len(v) for v in config['params'].values()])
    print(f"   • {model_name:25s} — {n_combos:4d} hyperparameter combinations")

print(f"\n   Cross-validation: 5-fold stratified | Scoring: ROC AUC")

ALL_TIER_MODELS = {}
cv_strategy = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# ══════════════════════════════════════════════════════════════════════════════
# TRAIN MODELS FOR EACH TIER
# ══════════════════════════════════════════════════════════════════════════════

for tier_key, tier_data in TIER_RESULTS.items():
    
    tier_num = tier_data['tier']
    K = tier_data['K']
    n_features = tier_data['n_features_final']
    selected_features = tier_data['selected_features_enet']
    
    print(f"\n{'='*100}")
    print(f"🔬 TIER {tier_num}: {n_features} FEATURES (K={K})")
    print(f"{'='*100}\n")
    
    print(f"   Features ({n_features}):")
    for i, feat in enumerate(selected_features, 1):
        print(f"      {i:2d}. {feat}")
    
    X_train_tier = X_train[selected_features].copy()
    X_test_tier = X_test[selected_features].copy()
    
    print(f"\n   Training shape: {X_train_tier.shape}")
    print(f"   Test shape:     {X_test_tier.shape}")
    print(f"   EPV:            {(y_train==1).sum() / n_features:.2f}")
    
    tier_models = {}
    tier_results = []
    
    for model_name, config in MODELS.items():
        
        print(f"\n   {'─'*80}")
        print(f"   🤖 {model_name}")
        print(f"   {'─'*80}\n")
        
        if config['scale']:
            scaler = StandardScaler()
            X_train_processed = scaler.fit_transform(X_train_tier)
            X_test_processed = scaler.transform(X_test_tier)
            print(f"      ✅ Features scaled")
        else:
            X_train_processed = X_train_tier.values
            X_test_processed = X_test_tier.values
            scaler = None
        
        if model_name == 'Elastic Net':
            grid_search = GridSearchCV(
                estimator=config['model'],
                param_grid=config['params'],
                cv=cv_strategy,
                scoring='neg_mean_squared_error',
                n_jobs=-1,
                verbose=0
            )
        else:
            grid_search = GridSearchCV(
                estimator=config['model'],
                param_grid=config['params'],
                cv=cv_strategy,
                scoring='roc_auc',
                n_jobs=-1,
                verbose=0
            )
        
        start_time = datetime.utcnow()
        grid_search.fit(X_train_processed, y_train)
        elapsed = (datetime.utcnow() - start_time).total_seconds()
        
        print(f"      ✅ Training complete ({elapsed:.1f}s)")
        
        best_model = grid_search.best_estimator_
        best_params = grid_search.best_params_
        best_cv_score = grid_search.best_score_
        
        print(f"\n      📊 Results:")
        if model_name == 'Elastic Net':
            print(f"         • Best CV MSE:     {-best_cv_score:.4f}")
        else:
            print(f"         • Best CV AUC:     {best_cv_score:.4f}")
        
        print(f"         • Best parameters:")
        for param, value in best_params.items():
            print(f"            - {param}: {value}")
        
        if model_name == 'Elastic Net':
            y_pred_train_proba = np.clip(best_model.predict(X_train_processed), 0, 1)
            y_pred_test_proba = np.clip(best_model.predict(X_test_processed), 0, 1)
        elif hasattr(best_model, 'predict_proba'):
            y_pred_train_proba = best_model.predict_proba(X_train_processed)[:, 1]
            y_pred_test_proba = best_model.predict_proba(X_test_processed)[:, 1]
        else:
            y_pred_train_proba = best_model.decision_function(X_train_processed)
            y_pred_test_proba = best_model.decision_function(X_test_processed)
        
        train_auc = roc_auc_score(y_train, y_pred_train_proba)
        test_auc = roc_auc_score(y_test, y_pred_test_proba)
        train_brier = brier_score_loss(y_train, y_pred_train_proba)
        test_brier = brier_score_loss(y_test, y_pred_test_proba)
        auc_gap = abs(train_auc - test_auc)
        
        print(f"         • Train AUC:       {train_auc:.4f}")
        print(f"         • Test AUC:        {test_auc:.4f}")
        print(f"         • AUC Gap:         {auc_gap:.4f}")
        print(f"         • Train Brier:     {train_brier:.4f}")
        print(f"         • Test Brier:      {test_brier:.4f}")
        
        tier_models[model_name] = {
            'model': best_model,
            'scaler': scaler,
            'best_params': best_params,
            'cv_score': best_cv_score,
            'train_auc': train_auc,
            'test_auc': test_auc,
            'train_brier': train_brier,
            'test_brier': test_brier,
            'auc_gap': auc_gap,
            'y_pred_train': y_pred_train_proba,
            'y_pred_test': y_pred_test_proba,
            'training_time': elapsed,
        }
        
        tier_results.append({
            'Tier': tier_num,
            'K': K,
            'N_Features': n_features,
            'Model': model_name,
            'CV_Score': best_cv_score,
            'Train_AUC': train_auc,
            'Test_AUC': test_auc,
            'AUC_Gap': auc_gap,
            'Train_Brier': train_brier,
            'Test_Brier': test_brier,
            'Training_Time_s': elapsed,
        })
    
    ALL_TIER_MODELS[f'tier_{tier_num}'] = {
        'tier': tier_num,
        'K': K,
        'n_features': n_features,
        'features': selected_features,
        'models': tier_models,
        'results_df': pd.DataFrame(tier_results),
    }
    
    print(f"\n   {'='*80}")
    print(f"   📊 TIER {tier_num} SUMMARY ({n_features} features):\n")
    
    tier_df = pd.DataFrame(tier_results)
    print(f"      {'Model':25s}  {'Test AUC':>8s}  {'Gap':>6s}  {'Brier':>7s}")
    print(f"      {'-'*60}")
    
    for idx, row in tier_df.iterrows():
        print(f"      {row['Model']:25s}  {row['Test_AUC']:8.4f}  {row['AUC_Gap']:6.4f}  {row['Test_Brier']:7.4f}")
    
    best_idx = tier_df['Test_AUC'].idxmax()
    best_model_name = tier_df.loc[best_idx, 'Model']
    best_test_auc = tier_df.loc[best_idx, 'Test_AUC']
    
    print(f"\n      🏆 Best: {best_model_name} (AUC={best_test_auc:.4f})")
    
    save_csv(tier_df, f'step13_tier{tier_num}_results')

# ══════════════════════════════════════════════════════════════════════════════
# OVERALL SUMMARY
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 OVERALL SUMMARY: 18 MODELS (3 TIERS × 6 MODELS)")
print(f"{'='*100}\n")

all_results = []
for tier_key, tier_data in ALL_TIER_MODELS.items():
    all_results.append(tier_data['results_df'])

combined_df = pd.concat(all_results, ignore_index=True)

print(f"   {'Tier':>5s}  {'Feat':>5s}  {'Model':25s}  {'Test AUC':>8s}  {'Gap':>6s}")
print(f"   {'-'*65}")

for idx, row in combined_df.iterrows():
    print(f"   {int(row['Tier']):5d}  {int(row['N_Features']):5d}  {row['Model']:25s}  "
          f"{row['Test_AUC']:8.4f}  {row['AUC_Gap']:6.4f}")

save_csv(combined_df, 'step13_all_models_summary')

print(f"\n{'='*100}")
print(f"🏆 TOP 5 MODELS (Test AUC):\n")

top5 = combined_df.nlargest(5, 'Test_AUC')

print(f"   {'Rank':>5s}  {'Tier':>5s}  {'Features':>9s}  {'Model':25s}  {'Test AUC':>8s}  {'Gap':>6s}")
print(f"   {'-'*80}")

for rank, (idx, row) in enumerate(top5.iterrows(), 1):
    print(f"   {rank:5d}  {int(row['Tier']):5d}  {int(row['N_Features']):9d}  "
          f"{row['Model']:25s}  {row['Test_AUC']:8.4f}  {row['AUC_Gap']:6.4f}")

print(f"\n{'='*100}")
print(f"✅ BEST GENERALIZATION (Smallest Gap):\n")

top5_gen = combined_df.nsmallest(5, 'AUC_Gap')

print(f"   {'Rank':>5s}  {'Tier':>5s}  {'Model':25s}  {'Test AUC':>8s}  {'Gap':>6s}")
print(f"   {'-'*70}")

for rank, (idx, row) in enumerate(top5_gen.iterrows(), 1):
    print(f"   {rank:5d}  {int(row['Tier']):5d}  {row['Model']:25s}  "
          f"{row['Test_AUC']:8.4f}  {row['AUC_Gap']:6.4f}")

# ══════════════════════════════════════════════════════════════════════════════
# VISUALIZATION
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 Generating visualization...\n")

fig = plt.figure(figsize=(20, 12), dpi=300)
gs = fig.add_gridspec(3, 3, hspace=0.35, wspace=0.3)

tier_colors = [COLORS['primary'], COLORS['secondary'], COLORS['sig']]
model_names_short = ['LR', 'RF', 'GB', 'XGB', 'SVM', 'EN']

# Plot 1: Test AUC comparison
ax1 = fig.add_subplot(gs[0, :])

for tier_num in [1, 2, 3]:
    tier_df = combined_df[combined_df['Tier'] == tier_num]
    x = np.arange(len(tier_df))
    ax1.plot(x, tier_df['Test_AUC'].values, marker='o', linewidth=2.5, markersize=10,
             label=f'Tier {tier_num} ({tier_df["N_Features"].iloc[0]} features)',
             color=tier_colors[tier_num-1])

ax1.set_xlabel('Model', fontsize=12, fontweight='bold')
ax1.set_ylabel('Test AUC', fontsize=12, fontweight='bold')
ax1.set_title('A. Test AUC Comparison Across 3 Tiers', fontsize=13, fontweight='bold', loc='left')
ax1.set_xticks(range(6))
ax1.set_xticklabels(model_names_short)
ax1.legend(fontsize=10, loc='lower right')
ax1.grid(alpha=0.3)
ax1.set_ylim([0.75, 0.90])

# Plot 2: Overfitting boxplot
ax2 = fig.add_subplot(gs[1, 0])

gaps_by_tier = []
for tier_num in [1, 2, 3]:
    tier_df = combined_df[combined_df['Tier'] == tier_num]
    gaps_by_tier.append(tier_df['AUC_Gap'].values)

bp = ax2.boxplot(gaps_by_tier, labels=['Tier 1\n(13 feat)', 'Tier 2\n(16 feat)', 'Tier 3\n(17 feat)'],
                 patch_artist=True, widths=0.6)

for patch, color in zip(bp['boxes'], tier_colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)

ax2.axhline(y=0.05, color='orange', linestyle='--', linewidth=2, label='0.05 threshold')
ax2.set_ylabel('AUC Gap (Train - Test)', fontsize=11, fontweight='bold')
ax2.set_title('B. Generalization (AUC Gap)', fontsize=12, fontweight='bold', loc='left')
ax2.legend(fontsize=9)
ax2.grid(axis='y', alpha=0.3)

# Plot 3: Brier Score
ax3 = fig.add_subplot(gs[1, 1])

x = np.arange(6)
width = 0.25

for tier_idx, tier_num in enumerate([1, 2, 3]):
    tier_df = combined_df[combined_df['Tier'] == tier_num]
    offset = (tier_idx - 1) * width
    ax3.bar(x + offset, tier_df['Test_Brier'].values, width, 
           label=f'Tier {tier_num}', color=tier_colors[tier_idx], alpha=0.7)

ax3.set_xlabel('Model', fontsize=11, fontweight='bold')
ax3.set_ylabel('Brier Score', fontsize=11, fontweight='bold')
ax3.set_title('C. Calibration', fontsize=12, fontweight='bold', loc='left')
ax3.set_xticks(x)
ax3.set_xticklabels(model_names_short)
ax3.legend(fontsize=9)
ax3.grid(axis='y', alpha=0.3)

# Plot 4: Training time
ax4 = fig.add_subplot(gs[1, 2])

time_by_model = combined_df.groupby('Model')['Training_Time_s'].mean().sort_values()

ax4.barh(range(len(time_by_model)), time_by_model.values, 
        color=COLORS['primary'], alpha=0.7, edgecolor='black')
ax4.set_yticks(range(len(time_by_model)))
ax4.set_yticklabels(time_by_model.index, fontsize=9)
ax4.set_xlabel('Training Time (s)', fontsize=11, fontweight='bold')
ax4.set_title('D. Computational Cost', fontsize=12, fontweight='bold', loc='left')
ax4.grid(axis='x', alpha=0.3)

# Plots 5-7: AUC gap per tier
for tier_idx, tier_num in enumerate([1, 2, 3]):
    ax = fig.add_subplot(gs[2, tier_idx])
    
    tier_df = combined_df[combined_df['Tier'] == tier_num]
    
    x = np.arange(len(tier_df))
    colors_gap = [tier_colors[tier_idx] if gap < 0.05 else 'red' 
                  for gap in tier_df['AUC_Gap'].values]
    
    ax.bar(x, tier_df['AUC_Gap'].values, color=colors_gap, 
           alpha=0.7, edgecolor='black', linewidth=1.5)
    ax.axhline(y=0.05, color='orange', linestyle='--', linewidth=2)
    
    ax.set_xlabel('Model', fontsize=11, fontweight='bold')
    ax.set_ylabel('AUC Gap', fontsize=11, fontweight='bold')
    ax.set_title(f'{"EFG"[tier_idx]}. Tier {tier_num} ({tier_df["N_Features"].iloc[0]} features)', 
                fontsize=12, fontweight='bold', loc='left')
    ax.set_xticks(x)
    ax.set_xticklabels(model_names_short, fontsize=9)
    ax.grid(axis='y', alpha=0.3)

fig.suptitle('Figure 9. Model Performance Comparison Across 3 Feature Tiers\n(Internal Test Set)', 
             fontsize=15, fontweight='bold', y=0.98)

save_figure(fig, 'step13_fig9_3tier_model_comparison')
plt.show()

print(f"✅ Figure 9 saved")

# ══════════════════════════════════════════════════════════════════════════════
# SAVE & LOG
# ══════════════════════════════════════════════════════════════════════════════

save_pickle(ALL_TIER_MODELS, 'step13_all_tier_models')

best_overall_idx = combined_df['Test_AUC'].idxmax()
best_overall = combined_df.loc[best_overall_idx]

best_gen_idx = combined_df['AUC_Gap'].idxmin()
best_gen = combined_df.loc[best_gen_idx]

append_runlog("13", {
    "analysis": "Model training and hyperparameter tuning (3-tier)",
    "n_tiers": 3,
    "n_models_per_tier": 6,
    "total_models": 18,
    "best_model_overall": {
        "tier": int(best_overall['Tier']),
        "model": best_overall['Model'],
        "n_features": int(best_overall['N_Features']),
        "test_auc": float(best_overall['Test_AUC']),
        "auc_gap": float(best_overall['AUC_Gap']),
    },
    "best_generalization": {
        "tier": int(best_gen['Tier']),
        "model": best_gen['Model'],
        "test_auc": float(best_gen['Test_AUC']),
        "auc_gap": float(best_gen['AUC_Gap']),
    },
})

TRAINED_MODELS_3TIER = ALL_TIER_MODELS

print("\n💾 Stored: TRAINED_MODELS_3TIER")
print("\n" + "="*100)
print("✅ STEP 13 COMPLETE")
print("="*100)
print(f"\n📊 Best AUC:  {best_overall['Model']} (Tier {int(best_overall['Tier'])}) — AUC={best_overall['Test_AUC']:.4f}")
print(f"   Best Gap:  {best_gen['Model']} (Tier {int(best_gen['Tier'])}) — Gap={best_gen['AUC_Gap']:.4f}")
print(f"\n🎯 NEXT: Step 14 — External validation")
print("="*100 + "\n")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 14: EXTERNAL VALIDATION (3-TIER APPROACH)
# TRIPOD: 10d (Model validation), 15 (Final model selection)
# ═══════════════════════════════════════════════════════════════════════════════

from sklearn.metrics import (roc_auc_score, roc_curve, precision_recall_curve,
                             brier_score_loss, confusion_matrix, classification_report,
                             accuracy_score, precision_score, recall_score, f1_score)
from sklearn.calibration import calibration_curve
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("\n" + "="*100)
print("STEP 14: EXTERNAL VALIDATION (3-TIER APPROACH)")
print("="*100)
print(f"UTC: 2025-10-19 20:30:16")
print(f"User: zainzampawala786-sudo\n")

# Get data
X_external = CLEAN_FEATURE_DATA["X_external_clean"].copy()
y_external = CLEAN_FEATURE_DATA["y_external"].copy()
TRAINED_MODELS = TRAINED_MODELS_3TIER
COLORS = DISTRIBUTION_DATA["colors_enhanced"]

print(f"{'='*100}")
print(f"📊 DATA SUMMARY:\n")
print(f"   External samples:  {len(X_external)}")
print(f"   External events:   {(y_external==1).sum()} ({(y_external==1).sum()/len(y_external)*100:.1f}%)")
print(f"   Tiers loaded:      {len(TRAINED_MODELS)}")

# ══════════════════════════════════════════════════════════════════════════════
# VALIDATE EACH TIER ON EXTERNAL DATA
# ══════════════════════════════════════════════════════════════════════════════

external_results = []

for tier_key, tier_data in TRAINED_MODELS.items():
    
    tier_num = tier_data['tier']
    n_features = tier_data['n_features']
    selected_features = tier_data['features']
    models = tier_data['models']
    
    print(f"\n{'='*100}")
    print(f"🔬 TIER {tier_num}: EXTERNAL VALIDATION ({n_features} FEATURES)")
    print(f"{'='*100}\n")
    
    print(f"   Features ({n_features}):")
    for i, feat in enumerate(selected_features, 1):
        print(f"      {i:2d}. {feat}")
    
    # Subset external data to tier features
    X_external_tier = X_external[selected_features].copy()
    
    print(f"\n   External shape: {X_external_tier.shape}")
    print(f"   Events:         {(y_external==1).sum()}")
    print(f"   EPV:            {(y_external==1).sum() / n_features:.2f}\n")
    
    # Evaluate each model
    for model_name, model_data in models.items():
        
        print(f"   {'─'*80}")
        print(f"   🤖 {model_name}")
        print(f"   {'─'*80}\n")
        
        best_model = model_data['model']
        scaler = model_data['scaler']
        
        # Scale if needed
        if scaler is not None:
            X_external_processed = scaler.transform(X_external_tier)
        else:
            X_external_processed = X_external_tier.values
        
        # Predict
        if model_name == 'Elastic Net':
            y_pred_proba = np.clip(best_model.predict(X_external_processed), 0, 1)
        elif hasattr(best_model, 'predict_proba'):
            y_pred_proba = best_model.predict_proba(X_external_processed)[:, 1]
        else:
            y_pred_proba = best_model.decision_function(X_external_processed)
        
        # Calculate metrics
        external_auc = roc_auc_score(y_external, y_pred_proba)
        external_brier = brier_score_loss(y_external, y_pred_proba)
        
        # Get internal test AUC for comparison
        internal_test_auc = model_data['test_auc']
        auc_drop = internal_test_auc - external_auc
        
        # Optimal threshold (Youden's J statistic)
        fpr, tpr, thresholds = roc_curve(y_external, y_pred_proba)
        j_scores = tpr - fpr
        optimal_idx = np.argmax(j_scores)
        optimal_threshold = thresholds[optimal_idx]
        
        # Binary predictions at optimal threshold
        y_pred_binary = (y_pred_proba >= optimal_threshold).astype(int)
        
        # Classification metrics
        accuracy = accuracy_score(y_external, y_pred_binary)
        precision = precision_score(y_external, y_pred_binary, zero_division=0)
        recall = recall_score(y_external, y_pred_binary, zero_division=0)
        f1 = f1_score(y_external, y_pred_binary, zero_division=0)
        
        # Confusion matrix
        tn, fp, fn, tp = confusion_matrix(y_external, y_pred_binary).ravel()
        specificity = tn / (tn + fp)
        sensitivity = tp / (tp + fn)
        
        print(f"      📊 Performance:")
        print(f"         • Internal Test AUC:   {internal_test_auc:.4f}")
        print(f"         • External AUC:        {external_auc:.4f}")
        print(f"         • AUC Drop:            {auc_drop:.4f}", end='')
        
        if auc_drop < 0.05:
            print(f"  ✅ Excellent (<0.05)")
        elif auc_drop < 0.10:
            print(f"  ✅ Good (<0.10)")
        elif auc_drop < 0.15:
            print(f"  ⚠️  Moderate (<0.15)")
        else:
            print(f"  🔴 Poor (≥0.15)")
        
        print(f"         • Brier Score:         {external_brier:.4f}")
        print(f"         • Optimal Threshold:   {optimal_threshold:.4f}")
        print(f"\n      📈 Classification Metrics (at optimal threshold):")
        print(f"         • Accuracy:            {accuracy:.4f}")
        print(f"         • Sensitivity:         {sensitivity:.4f}")
        print(f"         • Specificity:         {specificity:.4f}")
        print(f"         • Precision:           {precision:.4f}")
        print(f"         • F1-Score:            {f1:.4f}")
        print(f"\n      🎯 Confusion Matrix:")
        print(f"         • True Negatives:      {tn}")
        print(f"         • False Positives:     {fp}")
        print(f"         • False Negatives:     {fn}")
        print(f"         • True Positives:      {tp}")
        
        # Store results
        external_results.append({
            'Tier': tier_num,
            'N_Features': n_features,
            'Model': model_name,
            'Internal_Test_AUC': internal_test_auc,
            'External_AUC': external_auc,
            'AUC_Drop': auc_drop,
            'Brier_Score': external_brier,
            'Optimal_Threshold': optimal_threshold,
            'Accuracy': accuracy,
            'Sensitivity': sensitivity,
            'Specificity': specificity,
            'Precision': precision,
            'F1_Score': f1,
            'TP': tp,
            'TN': tn,
            'FP': fp,
            'FN': fn,
            'y_pred_proba': y_pred_proba,
        })
    
    # Tier summary
    print(f"\n   {'='*80}")
    print(f"   📊 TIER {tier_num} EXTERNAL VALIDATION SUMMARY:\n")
    
    tier_results = [r for r in external_results if r['Tier'] == tier_num]
    tier_df = pd.DataFrame(tier_results)
    
    print(f"      {'Model':25s}  {'Ext AUC':>8s}  {'Drop':>6s}  {'Sens':>6s}  {'Spec':>6s}")
    print(f"      {'-'*70}")
    
    for idx, row in tier_df.iterrows():
        print(f"      {row['Model']:25s}  {row['External_AUC']:8.4f}  {row['AUC_Drop']:6.4f}  "
              f"{row['Sensitivity']:6.4f}  {row['Specificity']:6.4f}")
    
    best_idx = tier_df['External_AUC'].idxmax()
    best_model_name = tier_df.loc[best_idx, 'Model']
    best_external_auc = tier_df.loc[best_idx, 'External_AUC']
    
    print(f"\n      🏆 Best: {best_model_name} (External AUC={best_external_auc:.4f})")

# ══════════════════════════════════════════════════════════════════════════════
# OVERALL EXTERNAL VALIDATION SUMMARY
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 OVERALL EXTERNAL VALIDATION: ALL 18 MODELS")
print(f"{'='*100}\n")

external_df = pd.DataFrame(external_results)

print(f"   {'Tier':>5s}  {'Feat':>5s}  {'Model':25s}  {'Int AUC':>8s}  {'Ext AUC':>8s}  {'Drop':>6s}")
print(f"   {'-'*75}")

for idx, row in external_df.iterrows():
    print(f"   {int(row['Tier']):5d}  {int(row['N_Features']):5d}  {row['Model']:25s}  "
          f"{row['Internal_Test_AUC']:8.4f}  {row['External_AUC']:8.4f}  {row['AUC_Drop']:6.4f}")

save_csv(external_df.drop(columns=['y_pred_proba']), 'step14_external_validation_all_models')

# ══════════════════════════════════════════════════════════════════════════════
# TOP MODELS BY EXTERNAL AUC
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"🏆 TOP 5 MODELS BY EXTERNAL AUC")
print(f"{'='*100}\n")

top5_external = external_df.nlargest(5, 'External_AUC')

print(f"   {'Rank':>5s}  {'Tier':>5s}  {'Features':>9s}  {'Model':25s}  {'Ext AUC':>8s}  {'Drop':>6s}  {'Sens':>6s}  {'Spec':>6s}")
print(f"   {'-'*95}")

for rank, (idx, row) in enumerate(top5_external.iterrows(), 1):
    print(f"   {rank:5d}  {int(row['Tier']):5d}  {int(row['N_Features']):9d}  {row['Model']:25s}  "
          f"{row['External_AUC']:8.4f}  {row['AUC_Drop']:6.4f}  "
          f"{row['Sensitivity']:6.4f}  {row['Specificity']:6.4f}")

# ══════════════════════════════════════════════════════════════════════════════
# BEST GENERALIZATION (SMALLEST AUC DROP)
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"✅ TOP 5 MODELS BY GENERALIZATION (Smallest AUC Drop)")
print(f"{'='*100}\n")

top5_gen = external_df.nsmallest(5, 'AUC_Drop')

print(f"   {'Rank':>5s}  {'Tier':>5s}  {'Model':25s}  {'Int AUC':>8s}  {'Ext AUC':>8s}  {'Drop':>6s}")
print(f"   {'-'*80}")

for rank, (idx, row) in enumerate(top5_gen.iterrows(), 1):
    print(f"   {rank:5d}  {int(row['Tier']):5d}  {row['Model']:25s}  "
          f"{row['Internal_Test_AUC']:8.4f}  {row['External_AUC']:8.4f}  {row['AUC_Drop']:6.4f}")

# ══════════════════════════════════════════════════════════════════════════════
# TIER-WISE COMPARISON
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 TIER-WISE EXTERNAL VALIDATION COMPARISON")
print(f"{'='*100}\n")

tier_summary = external_df.groupby('Tier').agg({
    'External_AUC': ['mean', 'std', 'max'],
    'AUC_Drop': ['mean', 'std', 'min'],
    'Sensitivity': 'mean',
    'Specificity': 'mean',
}).round(4)

print(f"   {'Tier':>5s}  {'Mean AUC':>9s}  {'Std AUC':>8s}  {'Max AUC':>8s}  {'Mean Drop':>10s}  {'Mean Sens':>10s}  {'Mean Spec':>10s}")
print(f"   {'-'*85}")

for tier_num in [1, 2, 3]:
    row = tier_summary.loc[tier_num]
    print(f"   {tier_num:5d}  {row[('External_AUC', 'mean')]:9.4f}  {row[('External_AUC', 'std')]:8.4f}  "
          f"{row[('External_AUC', 'max')]:8.4f}  {row[('AUC_Drop', 'mean')]:10.4f}  "
          f"{row[('Sensitivity', 'mean')]:10.4f}  {row[('Specificity', 'mean')]:10.4f}")

# Best tier
best_tier_num = external_df.groupby('Tier')['External_AUC'].mean().idxmax()
best_tier_auc = external_df.groupby('Tier')['External_AUC'].mean().max()

print(f"\n   🏆 Best Tier: Tier {best_tier_num} (Mean External AUC={best_tier_auc:.4f})")

save_csv(tier_summary.reset_index(), 'step14_tier_comparison')

# ══════════════════════════════════════════════════════════════════════════════
# FINAL MODEL SELECTION
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"🎯 FINAL MODEL SELECTION")
print(f"{'='*100}\n")

# Select best model by external AUC
best_model_idx = external_df['External_AUC'].idxmax()
final_model = external_df.loc[best_model_idx]

print(f"   🏆 SELECTED FINAL MODEL:\n")
print(f"      Tier:              {int(final_model['Tier'])}")
print(f"      Features:          {int(final_model['N_Features'])}")
print(f"      Algorithm:         {final_model['Model']}")
print(f"\n   📊 PERFORMANCE:")
print(f"      Internal Test AUC: {final_model['Internal_Test_AUC']:.4f}")
print(f"      External AUC:      {final_model['External_AUC']:.4f}")
print(f"      AUC Drop:          {final_model['AUC_Drop']:.4f}")
print(f"      Brier Score:       {final_model['Brier_Score']:.4f}")
print(f"\n   🎯 CLASSIFICATION (Optimal Threshold={final_model['Optimal_Threshold']:.4f}):")
print(f"      Accuracy:          {final_model['Accuracy']:.4f}")
print(f"      Sensitivity:       {final_model['Sensitivity']:.4f}")
print(f"      Specificity:       {final_model['Specificity']:.4f}")
print(f"      Precision:         {final_model['Precision']:.4f}")
print(f"      F1-Score:          {final_model['F1_Score']:.4f}")
print(f"\n   📋 CONFUSION MATRIX:")
print(f"      True Negatives:    {int(final_model['TN'])}")
print(f"      False Positives:   {int(final_model['FP'])}")
print(f"      False Negatives:   {int(final_model['FN'])}")
print(f"      True Positives:    {int(final_model['TP'])}")

# Get final model features
final_tier_key = f"tier_{int(final_model['Tier'])}"
final_features = TRAINED_MODELS[final_tier_key]['features']

print(f"\n   🎯 FINAL {int(final_model['N_Features'])} FEATURES:\n")
for i, feat in enumerate(final_features, 1):
    print(f"      {i:2d}. {feat}")

# ══════════════════════════════════════════════════════════════════════════════
# VISUALIZATION
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 Generating visualizations...\n")

fig = plt.figure(figsize=(22, 14), dpi=300)
gs = fig.add_gridspec(3, 4, hspace=0.35, wspace=0.3)

tier_colors = [COLORS['primary'], COLORS['secondary'], COLORS['sig']]
model_names_short = ['LR', 'RF', 'GB', 'XGB', 'SVM', 'EN']

# Plot 1: External AUC comparison
ax1 = fig.add_subplot(gs[0, :2])

for tier_num in [1, 2, 3]:
    tier_df = external_df[external_df['Tier'] == tier_num]
    x = np.arange(len(tier_df))
    ax1.plot(x, tier_df['External_AUC'].values, marker='o', linewidth=2.5, markersize=10,
             label=f'Tier {tier_num} ({tier_df["N_Features"].iloc[0]} features)',
             color=tier_colors[tier_num-1])

ax1.set_xlabel('Model', fontsize=12, fontweight='bold')
ax1.set_ylabel('External AUC', fontsize=12, fontweight='bold')
ax1.set_title('A. External Validation: AUC Comparison', fontsize=13, fontweight='bold', loc='left')
ax1.set_xticks(range(6))
ax1.set_xticklabels(model_names_short)
ax1.legend(fontsize=10)
ax1.grid(alpha=0.3)
ax1.set_ylim([0.70, 0.90])

# Plot 2: AUC Drop (Internal → External)
ax2 = fig.add_subplot(gs[0, 2:])

for tier_num in [1, 2, 3]:
    tier_df = external_df[external_df['Tier'] == tier_num]
    x = np.arange(len(tier_df))
    ax2.plot(x, tier_df['AUC_Drop'].values, marker='s', linewidth=2.5, markersize=10,
             label=f'Tier {tier_num}', color=tier_colors[tier_num-1])

ax2.axhline(y=0.05, color='green', linestyle='--', linewidth=2, label='Excellent (<0.05)')
ax2.axhline(y=0.10, color='orange', linestyle='--', linewidth=2, label='Good (<0.10)')
ax2.set_xlabel('Model', fontsize=12, fontweight='bold')
ax2.set_ylabel('AUC Drop (Internal - External)', fontsize=12, fontweight='bold')
ax2.set_title('B. Generalization: AUC Drop', fontsize=13, fontweight='bold', loc='left')
ax2.set_xticks(range(6))
ax2.set_xticklabels(model_names_short)
ax2.legend(fontsize=9)
ax2.grid(alpha=0.3)

# Plot 3: Sensitivity vs Specificity
ax3 = fig.add_subplot(gs[1, :2])

for tier_num in [1, 2, 3]:
    tier_df = external_df[external_df['Tier'] == tier_num]
    ax3.scatter(tier_df['Specificity'], tier_df['Sensitivity'], 
               s=200, alpha=0.7, color=tier_colors[tier_num-1],
               label=f'Tier {tier_num}', edgecolors='black', linewidth=1.5)

ax3.plot([0, 1], [0, 1], 'k--', alpha=0.3)
ax3.set_xlabel('Specificity', fontsize=12, fontweight='bold')
ax3.set_ylabel('Sensitivity', fontsize=12, fontweight='bold')
ax3.set_title('C. Sensitivity vs Specificity', fontsize=13, fontweight='bold', loc='left')
ax3.legend(fontsize=10)
ax3.grid(alpha=0.3)
ax3.set_xlim([0.5, 1.0])
ax3.set_ylim([0.5, 1.0])

# Plot 4: ROC Curves for top 3 models
ax4 = fig.add_subplot(gs[1, 2:])

top3 = external_df.nlargest(3, 'External_AUC')

for rank, (idx, row) in enumerate(top3.iterrows(), 1):
    y_pred_proba = row['y_pred_proba']
    fpr, tpr, _ = roc_curve(y_external, y_pred_proba)
    
    label = f"Rank {rank}: {row['Model']} (Tier {int(row['Tier'])}, AUC={row['External_AUC']:.3f})"
    ax4.plot(fpr, tpr, linewidth=2.5, label=label)

ax4.plot([0, 1], [0, 1], 'k--', linewidth=2, label='Random')
ax4.set_xlabel('False Positive Rate', fontsize=12, fontweight='bold')
ax4.set_ylabel('True Positive Rate', fontsize=12, fontweight='bold')
ax4.set_title('D. ROC Curves: Top 3 Models', fontsize=13, fontweight='bold', loc='left')
ax4.legend(fontsize=9, loc='lower right')
ax4.grid(alpha=0.3)

# Plot 5: Tier comparison boxplot
ax5 = fig.add_subplot(gs[2, 0])

auc_by_tier = [external_df[external_df['Tier']==t]['External_AUC'].values for t in [1,2,3]]

bp = ax5.boxplot(auc_by_tier, labels=['Tier 1\n(13 feat)', 'Tier 2\n(16 feat)', 'Tier 3\n(17 feat)'],
                 patch_artist=True, widths=0.6)

for patch, color in zip(bp['boxes'], tier_colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)

ax5.set_ylabel('External AUC', fontsize=11, fontweight='bold')
ax5.set_title('E. External AUC by Tier', fontsize=12, fontweight='bold', loc='left')
ax5.grid(axis='y', alpha=0.3)

# Plot 6: Brier Score comparison
ax6 = fig.add_subplot(gs[2, 1])

x = np.arange(6)
width = 0.25

for tier_idx, tier_num in enumerate([1, 2, 3]):
    tier_df = external_df[external_df['Tier'] == tier_num]
    offset = (tier_idx - 1) * width
    ax6.bar(x + offset, tier_df['Brier_Score'].values, width, 
           label=f'Tier {tier_num}', color=tier_colors[tier_idx], alpha=0.7)

ax6.set_xlabel('Model', fontsize=11, fontweight='bold')
ax6.set_ylabel('Brier Score', fontsize=11, fontweight='bold')
ax6.set_title('F. Calibration (Brier Score)', fontsize=12, fontweight='bold', loc='left')
ax6.set_xticks(x)
ax6.set_xticklabels(model_names_short)
ax6.legend(fontsize=9)
ax6.grid(axis='y', alpha=0.3)

# Plot 7: Confusion matrix for final model
ax7 = fig.add_subplot(gs[2, 2])

cm = np.array([[final_model['TN'], final_model['FP']],
               [final_model['FN'], final_model['TP']]])

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, ax=ax7,
           xticklabels=['Predicted 0', 'Predicted 1'],
           yticklabels=['Actual 0', 'Actual 1'],
           annot_kws={'fontsize': 14, 'fontweight': 'bold'})

ax7.set_title(f'G. Final Model Confusion Matrix\n{final_model["Model"]} (Tier {int(final_model["Tier"])})', 
             fontsize=12, fontweight='bold', loc='left')

# Plot 8: Final model metrics
ax8 = fig.add_subplot(gs[2, 3])

metrics = ['AUC', 'Sensitivity', 'Specificity', 'Precision', 'F1-Score']
values = [final_model['External_AUC'], final_model['Sensitivity'], 
          final_model['Specificity'], final_model['Precision'], final_model['F1_Score']]

bars = ax8.barh(metrics, values, color=COLORS['primary'], alpha=0.7, edgecolor='black', linewidth=1.5)

for i, (bar, val) in enumerate(zip(bars, values)):
    ax8.text(val + 0.01, i, f'{val:.3f}', va='center', fontweight='bold', fontsize=10)

ax8.set_xlabel('Score', fontsize=11, fontweight='bold')
ax8.set_title(f'H. Final Model Performance\n{final_model["Model"]} (Tier {int(final_model["Tier"])})', 
             fontsize=12, fontweight='bold', loc='left')
ax8.set_xlim([0, 1.0])
ax8.grid(axis='x', alpha=0.3)

fig.suptitle('Figure 10. External Validation Results: 3-Tier Comparison\n'
             f'Final Model: {final_model["Model"]} (Tier {int(final_model["Tier"])}, {int(final_model["N_Features"])} features) — External AUC={final_model["External_AUC"]:.4f}', 
             fontsize=16, fontweight='bold', y=0.98)

save_figure(fig, 'step14_fig10_external_validation')
plt.show()

print(f"✅ Figure 10 saved")

# ══════════════════════════════════════════════════════════════════════════════
# SAVE & LOG
# ══════════════════════════════════════════════════════════════════════════════

save_pickle(external_df.drop(columns=['y_pred_proba']), 'step14_external_validation_results')

final_model_dict = {
    'tier': int(final_model['Tier']),
    'n_features': int(final_model['N_Features']),
    'algorithm': final_model['Model'],
    'features': final_features,
    'internal_test_auc': float(final_model['Internal_Test_AUC']),
    'external_auc': float(final_model['External_AUC']),
    'auc_drop': float(final_model['AUC_Drop']),
    'brier_score': float(final_model['Brier_Score']),
    'optimal_threshold': float(final_model['Optimal_Threshold']),
    'sensitivity': float(final_model['Sensitivity']),
    'specificity': float(final_model['Specificity']),
    'precision': float(final_model['Precision']),
    'f1_score': float(final_model['F1_Score']),
    'confusion_matrix': {
        'tn': int(final_model['TN']),
        'fp': int(final_model['FP']),
        'fn': int(final_model['FN']),
        'tp': int(final_model['TP']),
    }
}

save_pickle(final_model_dict, 'step14_final_model')

append_runlog("14", {
    "analysis": "External validation (3-tier approach)",
    "n_models_validated": 18,
    "best_tier": best_tier_num,
    "final_model": final_model_dict,
})

EXTERNAL_VALIDATION_RESULTS = {
    'all_results': external_df,
    'tier_summary': tier_summary,
    'final_model': final_model_dict,
}

print("\n💾 Stored: EXTERNAL_VALIDATION_RESULTS")
print("\n" + "="*100)
print("✅ STEP 14 COMPLETE — EXTERNAL VALIDATION")
print("="*100)
print(f"\n🏆 FINAL MODEL SELECTED:")
print(f"   Algorithm:     {final_model['Model']}")
print(f"   Tier:          {int(final_model['Tier'])} ({int(final_model['N_Features'])} features)")
print(f"   External AUC:  {final_model['External_AUC']:.4f}")
print(f"   Sensitivity:   {final_model['Sensitivity']:.4f}")
print(f"   Specificity:   {final_model['Specificity']:.4f}")
print(f"\n✅ 3 PRESERVED FEATURES IN FINAL MODEL:")

preserved_features = ['glucose_min', 'neutrophils_abs_min', 'rbc_count_max']
for feat in preserved_features:
    if feat in final_features:
        print(f"   ✅ {feat}")
    else:
        print(f"   ❌ {feat} (not in final model)")

print("\n" + "="*100 + "\n")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 15: MODEL EVALUATION & CALIBRATION
# TRIPOD: 10d (Model performance), 10e (Model calibration)
# ═══════════════════════════════════════════════════════════════════════════════

from sklearn.metrics import roc_auc_score, roc_curve, brier_score_loss
from sklearn.calibration import calibration_curve
from scipy import stats
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("\n" + "="*100)
print("STEP 15: MODEL EVALUATION & CALIBRATION")
print("="*100)
print(f"UTC: 2025-10-19 21:25:24")
print(f"User: zainzampawala786-sudo\n")

# Get data
X_external = CLEAN_FEATURE_DATA["X_external_clean"].copy()
y_external = CLEAN_FEATURE_DATA["y_external"].copy()
FINAL_MODEL = EXTERNAL_VALIDATION_RESULTS['final_model']
TRAINED_MODELS = TRAINED_MODELS_3TIER
COLORS = DISTRIBUTION_DATA["colors_enhanced"]

print(f"{'='*100}")
print(f"📊 FINAL MODEL SUMMARY:\n")
print(f"   Algorithm:         {FINAL_MODEL['algorithm']}")
print(f"   Tier:              {FINAL_MODEL['tier']}")
print(f"   Features:          {FINAL_MODEL['n_features']}")
print(f"   External AUC:      {FINAL_MODEL['external_auc']:.4f}")
print(f"   Sensitivity:       {FINAL_MODEL['sensitivity']:.4f}")
print(f"   Specificity:       {FINAL_MODEL['specificity']:.4f}")

# Get final model object and predictions
final_tier_key = f"tier_{FINAL_MODEL['tier']}"
final_model_obj = TRAINED_MODELS[final_tier_key]['models'][FINAL_MODEL['algorithm']]['model']
final_scaler = TRAINED_MODELS[final_tier_key]['models'][FINAL_MODEL['algorithm']]['scaler']
final_features = FINAL_MODEL['features']

# Prepare external data
X_external_final = X_external[final_features].copy()

if final_scaler is not None:
    X_external_processed = final_scaler.transform(X_external_final)
else:
    X_external_processed = X_external_final.values

# Get predictions
if FINAL_MODEL['algorithm'] == 'Elastic Net':
    y_pred_proba = np.clip(final_model_obj.predict(X_external_processed), 0, 1)
elif hasattr(final_model_obj, 'predict_proba'):
    y_pred_proba = final_model_obj.predict_proba(X_external_processed)[:, 1]
else:
    y_pred_proba = final_model_obj.decision_function(X_external_processed)

# ══════════════════════════════════════════════════════════════════════════════
# 1. CONFIDENCE INTERVALS FOR AUC (DeLong Method)
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 1. CONFIDENCE INTERVALS FOR AUC")
print(f"{'='*100}\n")

def delong_roc_variance(ground_truth, predictions):
    """
    Computes ROC AUC variance using DeLong's method
    """
    order = np.argsort(predictions)
    predictions_sorted = predictions[order]
    ground_truth_sorted = ground_truth[order]
    
    n_pos = np.sum(ground_truth_sorted)
    n_neg = len(ground_truth_sorted) - n_pos
    
    if n_pos == 0 or n_neg == 0:
        return np.nan
    
    # Compute AUC using Mann-Whitney U statistic
    pos_ranks = np.where(ground_truth_sorted == 1)[0]
    rank_sum = np.sum(pos_ranks + 1)
    auc = (rank_sum - n_pos * (n_pos + 1) / 2) / (n_pos * n_neg)
    
    # Compute variance using DeLong's method
    predictions_sorted_neg = predictions_sorted[ground_truth_sorted == 0]
    predictions_sorted_pos = predictions_sorted[ground_truth_sorted == 1]
    
    V10 = np.zeros(n_pos)
    V01 = np.zeros(n_neg)
    
    for i in range(n_pos):
        V10[i] = np.mean(predictions_sorted_pos[i] > predictions_sorted_neg)
    
    for j in range(n_neg):
        V01[j] = np.mean(predictions_sorted_pos > predictions_sorted_neg[j])
    
    S10 = np.var(V10) / n_pos
    S01 = np.var(V01) / n_neg
    
    variance = S10 + S01
    
    return auc, variance

# DeLong CI
auc_delong, var_delong = delong_roc_variance(y_external.values, y_pred_proba)
se_delong = np.sqrt(var_delong)
ci_lower_delong = auc_delong - 1.96 * se_delong
ci_upper_delong = auc_delong + 1.96 * se_delong

print(f"   🔬 DeLong Method:")
print(f"      AUC:           {auc_delong:.4f}")
print(f"      SE:            {se_delong:.4f}")
print(f"      95% CI:        [{ci_lower_delong:.4f}, {ci_upper_delong:.4f}]")

# Bootstrap CI
print(f"\n   🔄 Bootstrap Method (1000 iterations):")

np.random.seed(42)
n_bootstrap = 1000
bootstrap_aucs = []

for i in range(n_bootstrap):
    indices = np.random.choice(len(y_external), len(y_external), replace=True)
    y_boot = y_external.values[indices]
    pred_boot = y_pred_proba[indices]
    
    if len(np.unique(y_boot)) > 1:
        auc_boot = roc_auc_score(y_boot, pred_boot)
        bootstrap_aucs.append(auc_boot)

bootstrap_aucs = np.array(bootstrap_aucs)
ci_lower_boot = np.percentile(bootstrap_aucs, 2.5)
ci_upper_boot = np.percentile(bootstrap_aucs, 97.5)
se_boot = np.std(bootstrap_aucs)

print(f"      AUC (mean):    {np.mean(bootstrap_aucs):.4f}")
print(f"      SE:            {se_boot:.4f}")
print(f"      95% CI:        [{ci_lower_boot:.4f}, {ci_upper_boot:.4f}]")

# Store CI results
ci_results = {
    'delong': {
        'auc': auc_delong,
        'se': se_delong,
        'ci_lower': ci_lower_delong,
        'ci_upper': ci_upper_delong,
    },
    'bootstrap': {
        'auc': np.mean(bootstrap_aucs),
        'se': se_boot,
        'ci_lower': ci_lower_boot,
        'ci_upper': ci_upper_boot,
    }
}

# ══════════════════════════════════════════════════════════════════════════════
# 2. CALIBRATION ANALYSIS
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 2. CALIBRATION ANALYSIS")
print(f"{'='*100}\n")

# Calibration curve
prob_true, prob_pred = calibration_curve(y_external, y_pred_proba, n_bins=10, strategy='quantile')

# Hosmer-Lemeshow test
def hosmer_lemeshow_test(y_true, y_pred, n_bins=10):
    """Hosmer-Lemeshow goodness-of-fit test"""
    
    # Create bins
    bins = np.linspace(0, 1, n_bins + 1)
    bin_indices = np.digitize(y_pred, bins[:-1]) - 1
    bin_indices = np.clip(bin_indices, 0, n_bins - 1)
    
    observed = np.zeros(n_bins)
    expected = np.zeros(n_bins)
    total = np.zeros(n_bins)
    
    for i in range(n_bins):
        mask = bin_indices == i
        total[i] = np.sum(mask)
        if total[i] > 0:
            observed[i] = np.sum(y_true[mask])
            expected[i] = np.sum(y_pred[mask])
    
    # Remove empty bins
    mask = total > 0
    observed = observed[mask]
    expected = expected[mask]
    total = total[mask]
    
    # Chi-square statistic
    hl_statistic = np.sum((observed - expected) ** 2 / (expected * (1 - expected / total) + 1e-10))
    df = len(observed) - 2
    p_value = 1 - stats.chi2.cdf(hl_statistic, df)
    
    return hl_statistic, p_value, df

hl_stat, hl_p, hl_df = hosmer_lemeshow_test(y_external.values, y_pred_proba, n_bins=10)

print(f"   🔬 Hosmer-Lemeshow Test:")
print(f"      Chi-square:    {hl_stat:.4f}")
print(f"      df:            {hl_df}")
print(f"      p-value:       {hl_p:.4f}", end='')

if hl_p > 0.05:
    print(f"  ✅ Good calibration (p > 0.05)")
else:
    print(f"  ⚠️  Poor calibration (p ≤ 0.05)")

# Brier score decomposition
brier_score = brier_score_loss(y_external, y_pred_proba)
prevalence = np.mean(y_external)
brier_max = prevalence * (1 - prevalence)
brier_scaled = 1 - (brier_score / brier_max)

print(f"\n   📊 Brier Score:")
print(f"      Brier Score:   {brier_score:.4f}")
print(f"      Max Brier:     {brier_max:.4f}")
print(f"      Scaled Brier:  {brier_scaled:.4f}")

# Calibration slope and intercept
from sklearn.linear_model import LogisticRegression

log_odds = np.log(y_pred_proba / (1 - y_pred_proba + 1e-10))
calib_model = LogisticRegression(penalty=None, max_iter=1000)
calib_model.fit(log_odds.reshape(-1, 1), y_external)

calib_slope = calib_model.coef_[0][0]
calib_intercept = calib_model.intercept_[0]

print(f"\n   📈 Calibration Parameters:")
print(f"      Slope:         {calib_slope:.4f}", end='')

if 0.8 <= calib_slope <= 1.2:
    print(f"  ✅ Good (0.8-1.2)")
elif 0.7 <= calib_slope <= 1.3:
    print(f"  ⚠️  Moderate (0.7-1.3)")
else:
    print(f"  🔴 Poor (<0.7 or >1.3)")

print(f"      Intercept:     {calib_intercept:.4f}", end='')

if abs(calib_intercept) < 0.1:
    print(f"  ✅ Good (|intercept| < 0.1)")
elif abs(calib_intercept) < 0.2:
    print(f"  ⚠️  Moderate (|intercept| < 0.2)")
else:
    print(f"  🔴 Poor (|intercept| ≥ 0.2)")

calibration_results = {
    'hosmer_lemeshow': {
        'statistic': hl_stat,
        'p_value': hl_p,
        'df': hl_df,
    },
    'brier_score': {
        'brier': brier_score,
        'brier_max': brier_max,
        'brier_scaled': brier_scaled,
    },
    'calibration_params': {
        'slope': calib_slope,
        'intercept': calib_intercept,
    },
    'calibration_curve': {
        'prob_true': prob_true,
        'prob_pred': prob_pred,
    }
}

# ══════════════════════════════════════════════════════════════════════════════
# 3. DECISION CURVE ANALYSIS
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 3. DECISION CURVE ANALYSIS")
print(f"{'='*100}\n")

def decision_curve_analysis(y_true, y_pred, thresholds=None):
    """Calculate net benefit for decision curve analysis"""
    
    if thresholds is None:
        thresholds = np.linspace(0.01, 0.99, 99)
    
    net_benefit_model = []
    net_benefit_all = []
    
    prevalence = np.mean(y_true)
    
    for threshold in thresholds:
        # Model
        tp = np.sum((y_pred >= threshold) & (y_true == 1))
        fp = np.sum((y_pred >= threshold) & (y_true == 0))
        n = len(y_true)
        
        net_benefit = (tp / n) - (fp / n) * (threshold / (1 - threshold))
        net_benefit_model.append(net_benefit)
        
        # Treat all
        net_benefit_all.append(prevalence - (1 - prevalence) * (threshold / (1 - threshold)))
    
    return thresholds, net_benefit_model, net_benefit_all

thresholds_dca, nb_model, nb_all = decision_curve_analysis(y_external.values, y_pred_proba)

# Find optimal threshold range
nb_model_array = np.array(nb_model)
nb_all_array = np.array(nb_all)

# Where model is better than treat-all and treat-none
useful_range = (nb_model_array > 0) & (nb_model_array > nb_all_array)
if np.any(useful_range):
    optimal_range_start = thresholds_dca[useful_range][0]
    optimal_range_end = thresholds_dca[useful_range][-1]
    print(f"   📈 Net Benefit Analysis:")
    print(f"      Model provides clinical utility for thresholds: {optimal_range_start:.3f} - {optimal_range_end:.3f}")
    print(f"      Max Net Benefit: {nb_model_array.max():.4f}")
    print(f"      ✅ Model outperforms 'treat all' and 'treat none' strategies")
else:
    print(f"   ⚠️  Model does not provide substantial net benefit over default strategies")

dca_results = {
    'thresholds': thresholds_dca,
    'net_benefit_model': nb_model,
    'net_benefit_all': nb_all,
}

# ══════════════════════════════════════════════════════════════════════════════
# 4. VISUALIZATION
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"📊 Generating visualizations...\n")

fig = plt.figure(figsize=(20, 12), dpi=300)
gs = fig.add_gridspec(3, 3, hspace=0.35, wspace=0.3)

# Plot 1: ROC Curve with CI
ax1 = fig.add_subplot(gs[0, 0])

fpr, tpr, _ = roc_curve(y_external, y_pred_proba)
ax1.plot(fpr, tpr, linewidth=3, color=COLORS['primary'], 
         label=f'AUC = {auc_delong:.3f} (95% CI: {ci_lower_delong:.3f}-{ci_upper_delong:.3f})')
ax1.plot([0, 1], [0, 1], 'k--', linewidth=2, label='Random')
ax1.fill_between(fpr, tpr, alpha=0.2, color=COLORS['primary'])

ax1.set_xlabel('False Positive Rate', fontsize=12, fontweight='bold')
ax1.set_ylabel('True Positive Rate', fontsize=12, fontweight='bold')
ax1.set_title('A. ROC Curve with 95% CI', fontsize=13, fontweight='bold', loc='left')
ax1.legend(fontsize=10, loc='lower right')
ax1.grid(alpha=0.3)

# Plot 2: Calibration Curve
ax2 = fig.add_subplot(gs[0, 1])

ax2.plot([0, 1], [0, 1], 'k--', linewidth=2, label='Perfect calibration')
ax2.plot(prob_pred, prob_true, marker='o', linewidth=3, markersize=10,
         color=COLORS['secondary'], label=f'Model (slope={calib_slope:.2f})')

ax2.set_xlabel('Predicted Probability', fontsize=12, fontweight='bold')
ax2.set_ylabel('Observed Probability', fontsize=12, fontweight='bold')
ax2.set_title(f'B. Calibration Plot (HL p={hl_p:.3f})', fontsize=13, fontweight='bold', loc='left')
ax2.legend(fontsize=10, loc='upper left')
ax2.grid(alpha=0.3)
ax2.set_xlim([0, 1])
ax2.set_ylim([0, 1])

# Plot 3: Decision Curve Analysis - IMPROVED VISUALIZATION
ax3 = fig.add_subplot(gs[0, 2])

# Plot Model curve (should be curved and peak)
ax3.plot(thresholds_dca, nb_model, linewidth=3.5, color=COLORS['primary'], 
         label='Model', zorder=3)

# Plot Treat All curve (declining)
ax3.plot(thresholds_dca, nb_all, linewidth=2.5, linestyle='--', 
         color=COLORS['secondary'], label='Treat all', alpha=0.8, zorder=2)

# Plot Treat None (flat at 0)
ax3.axhline(y=0, color='gray', linestyle='--', linewidth=2.5, 
            label='Treat none', alpha=0.7, zorder=1)

# Fill area where model is beneficial
useful_mask = (nb_model_array > 0) & (nb_model_array > nb_all_array)
if np.any(useful_mask):
    ax3.fill_between(thresholds_dca[useful_mask], 0, nb_model_array[useful_mask], 
                     alpha=0.2, color=COLORS['primary'], label='Net benefit region')

ax3.set_xlabel('Threshold Probability', fontsize=12, fontweight='bold')
ax3.set_ylabel('Net Benefit', fontsize=12, fontweight='bold')
ax3.set_title('C. Decision Curve Analysis', fontsize=13, fontweight='bold', loc='left')
ax3.legend(fontsize=9, loc='upper right')
ax3.grid(alpha=0.3)
ax3.set_xlim([0, 1])
ax3.set_ylim([-0.05, max(nb_model_array.max() * 1.1, 0.4)])  # Dynamic y-limit

# Add annotation for max net benefit
max_idx = np.argmax(nb_model_array)
max_threshold = thresholds_dca[max_idx]
max_nb = nb_model_array[max_idx]
ax3.plot(max_threshold, max_nb, 'r*', markersize=15, zorder=4)
ax3.annotate(f'Peak: {max_nb:.3f}\nat {max_threshold:.2f}', 
             xy=(max_threshold, max_nb), xytext=(max_threshold + 0.15, max_nb - 0.05),
             fontsize=9, fontweight='bold',
             bbox=dict(boxstyle='round,pad=0.5', facecolor='yellow', alpha=0.7),
             arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0', lw=2))

# Plot 4: Bootstrap AUC Distribution
ax4 = fig.add_subplot(gs[1, 0])

ax4.hist(bootstrap_aucs, bins=30, color=COLORS['primary'], alpha=0.7, edgecolor='black')
ax4.axvline(x=np.mean(bootstrap_aucs), color='red', linestyle='--', linewidth=2, 
           label=f'Mean = {np.mean(bootstrap_aucs):.3f}')
ax4.axvline(x=ci_lower_boot, color='green', linestyle='--', linewidth=2, label=f'95% CI')
ax4.axvline(x=ci_upper_boot, color='green', linestyle='--', linewidth=2)

ax4.set_xlabel('AUC', fontsize=12, fontweight='bold')
ax4.set_ylabel('Frequency', fontsize=12, fontweight='bold')
ax4.set_title('D. Bootstrap AUC Distribution', fontsize=13, fontweight='bold', loc='left')
ax4.legend(fontsize=10)
ax4.grid(alpha=0.3, axis='y')

# Plot 5: Predicted Probability Distribution
ax5 = fig.add_subplot(gs[1, 1])

ax5.hist(y_pred_proba[y_external == 0], bins=30, alpha=0.6, color='blue', 
        label=f'Survivors (n={np.sum(y_external==0)})', edgecolor='black')
ax5.hist(y_pred_proba[y_external == 1], bins=30, alpha=0.6, color='red', 
        label=f'Deaths (n={np.sum(y_external==1)})', edgecolor='black')

ax5.set_xlabel('Predicted Probability', fontsize=12, fontweight='bold')
ax5.set_ylabel('Frequency', fontsize=12, fontweight='bold')
ax5.set_title('E. Predicted Probability Distribution', fontsize=13, fontweight='bold', loc='left')
ax5.legend(fontsize=10)
ax5.grid(alpha=0.3, axis='y')

# Plot 6: Calibration by Decile
ax6 = fig.add_subplot(gs[1, 2])

deciles = pd.qcut(y_pred_proba, q=10, duplicates='drop')
calib_decile = pd.DataFrame({
    'predicted': y_pred_proba,
    'actual': y_external,
    'decile': deciles
})

decile_stats = calib_decile.groupby('decile').agg({
    'predicted': 'mean',
    'actual': 'mean'
}).reset_index()

x_pos = np.arange(len(decile_stats))
ax6.bar(x_pos, decile_stats['actual'], alpha=0.7, color=COLORS['secondary'], 
       label='Observed', edgecolor='black')
ax6.plot(x_pos, decile_stats['predicted'], marker='o', linewidth=2.5, markersize=8,
        color='red', label='Predicted')

ax6.set_xlabel('Risk Decile', fontsize=12, fontweight='bold')
ax6.set_ylabel('Mortality Rate', fontsize=12, fontweight='bold')
ax6.set_title('F. Calibration by Decile', fontsize=13, fontweight='bold', loc='left')
ax6.set_xticks(x_pos)
ax6.set_xticklabels(range(1, len(decile_stats) + 1))
ax6.legend(fontsize=10)
ax6.grid(alpha=0.3, axis='y')

# Plot 7: Brier Score Components
ax7 = fig.add_subplot(gs[2, 0])

# Brier score decomposition
uncertainty = prevalence * (1 - prevalence)
resolution = np.mean((prob_true - prevalence) ** 2)
reliability = np.mean((prob_pred - prob_true) ** 2)

components = ['Uncertainty', 'Resolution', 'Reliability', 'Brier Score']
values = [uncertainty, resolution, reliability, brier_score]
colors_comp = [COLORS['primary'], COLORS['secondary'], COLORS['sig'], 'gray']

bars = ax7.barh(components, values, color=colors_comp, alpha=0.7, edgecolor='black', linewidth=1.5)

for i, (bar, val) in enumerate(zip(bars, values)):
    ax7.text(val + 0.005, i, f'{val:.4f}', va='center', fontweight='bold', fontsize=10)

ax7.set_xlabel('Value', fontsize=12, fontweight='bold')
ax7.set_title('G. Brier Score Decomposition', fontsize=13, fontweight='bold', loc='left')
ax7.grid(alpha=0.3, axis='x')

# Plot 8: Performance Metrics Summary
ax8 = fig.add_subplot(gs[2, 1])

metrics = ['AUC', 'Sensitivity', 'Specificity', 'Precision', 'Brier\n(scaled)']
values_metrics = [
    FINAL_MODEL['external_auc'],
    FINAL_MODEL['sensitivity'],
    FINAL_MODEL['specificity'],
    FINAL_MODEL['precision'],
    brier_scaled
]

bars = ax8.bar(range(len(metrics)), values_metrics, color=COLORS['primary'], 
              alpha=0.7, edgecolor='black', linewidth=1.5)

for bar, val in zip(bars, values_metrics):
    height = bar.get_height()
    ax8.text(bar.get_x() + bar.get_width()/2., height + 0.02,
            f'{val:.3f}', ha='center', va='bottom', fontweight='bold', fontsize=11)

ax8.set_ylabel('Score', fontsize=12, fontweight='bold')
ax8.set_title('H. Performance Metrics', fontsize=13, fontweight='bold', loc='left')
ax8.set_xticks(range(len(metrics)))
ax8.set_xticklabels(metrics, fontsize=10)
ax8.set_ylim([0, 1.0])
ax8.axhline(y=0.7, color='orange', linestyle='--', linewidth=1.5, alpha=0.5)
ax8.grid(alpha=0.3, axis='y')

# Plot 9: Calibration Quality Summary
ax9 = fig.add_subplot(gs[2, 2])

calib_metrics = ['Slope', 'Intercept\n(abs)', 'HL p-value', 'Brier']
calib_values = [calib_slope, abs(calib_intercept), hl_p, brier_score]
calib_targets = [1.0, 0.0, 0.05, 0.15]
calib_colors = []

for val, target in zip(calib_values[:2], calib_targets[:2]):
    if abs(val - target) < 0.1:
        calib_colors.append('green')
    elif abs(val - target) < 0.2:
        calib_colors.append('orange')
    else:
        calib_colors.append('red')

calib_colors.append('green' if calib_values[2] > 0.05 else 'red')
calib_colors.append('green' if calib_values[3] < 0.15 else 'orange' if calib_values[3] < 0.25 else 'red')

bars = ax9.bar(range(len(calib_metrics)), calib_values, color=calib_colors, 
              alpha=0.7, edgecolor='black', linewidth=1.5)

for bar, val in zip(bars, calib_values):
    height = bar.get_height()
    ax9.text(bar.get_x() + bar.get_width()/2., height + 0.01,
            f'{val:.3f}', ha='center', va='bottom', fontweight='bold', fontsize=10)

ax9.set_ylabel('Value', fontsize=12, fontweight='bold')
ax9.set_title('I. Calibration Quality Indicators', fontsize=13, fontweight='bold', loc='left')
ax9.set_xticks(range(len(calib_metrics)))
ax9.set_xticklabels(calib_metrics, fontsize=10)
ax9.grid(alpha=0.3, axis='y')

fig.suptitle(f'Figure 11. Model Evaluation & Calibration: {FINAL_MODEL["algorithm"]} (Tier {FINAL_MODEL["tier"]}, {FINAL_MODEL["n_features"]} features)\n'
             f'External AUC = {auc_delong:.3f} (95% CI: {ci_lower_delong:.3f}-{ci_upper_delong:.3f}) | Max Net Benefit = {nb_model_array.max():.3f}', 
             fontsize=16, fontweight='bold', y=0.98)

save_figure(fig, 'step15_fig11_calibration_evaluation')
plt.show()

print(f"✅ Figure 11 saved")

# ══════════════════════════════════════════════════════════════════════════════
# SAVE RESULTS
# ══════════════════════════════════════════════════════════════════════════════

calibration_summary = pd.DataFrame({
    'Metric': [
        'External AUC',
        'AUC 95% CI Lower (DeLong)',
        'AUC 95% CI Upper (DeLong)',
        'AUC SE (DeLong)',
        'AUC 95% CI Lower (Bootstrap)',
        'AUC 95% CI Upper (Bootstrap)',
        'AUC SE (Bootstrap)',
        'Brier Score',
        'Scaled Brier Score',
        'Calibration Slope',
        'Calibration Intercept',
        'Hosmer-Lemeshow Chi-square',
        'Hosmer-Lemeshow p-value',
        'Max Net Benefit',
        'Sensitivity',
        'Specificity',
        'Precision',
        'F1-Score'
    ],
    'Value': [
        auc_delong,
        ci_lower_delong,
        ci_upper_delong,
        se_delong,
        ci_lower_boot,
        ci_upper_boot,
        se_boot,
        brier_score,
        brier_scaled,
        calib_slope,
        calib_intercept,
        hl_stat,
        hl_p,
        nb_model_array.max(),
        FINAL_MODEL['sensitivity'],
        FINAL_MODEL['specificity'],
        FINAL_MODEL['precision'],
        FINAL_MODEL['f1_score']
    ]
})

save_csv(calibration_summary, 'step15_calibration_summary')

CALIBRATION_RESULTS = {
    'ci_results': ci_results,
    'calibration_results': calibration_results,
    'dca_results': dca_results,
    'summary_table': calibration_summary,
    'predictions': y_pred_proba,
}

save_pickle(CALIBRATION_RESULTS, 'step15_calibration_results')

append_runlog("15", {
    "analysis": "Model evaluation and calibration",
    "auc": auc_delong,
    "auc_ci_lower": ci_lower_delong,
    "auc_ci_upper": ci_upper_delong,
    "brier_score": brier_score,
    "calibration_slope": calib_slope,
    "hosmer_lemeshow_p": hl_p,
    "max_net_benefit": float(nb_model_array.max()),
})

print("\n💾 Stored: CALIBRATION_RESULTS")
print("\n" + "="*100)
print("✅ STEP 15 COMPLETE — CALIBRATION & EVALUATION")
print("="*100)
print(f"\n📊 KEY FINDINGS:")
print(f"   AUC:                {auc_delong:.4f} (95% CI: {ci_lower_delong:.4f}-{ci_upper_delong:.4f})")
print(f"   Max Net Benefit:    {nb_model_array.max():.4f} ✅")
print(f"   Calibration Slope:  {calib_slope:.4f} {'✅' if 0.8 <= calib_slope <= 1.2 else '⚠️'}")
print(f"   HL p-value:         {hl_p:.4f} {'✅' if hl_p > 0.05 else '⚠️'}")
print(f"   Brier Score:        {brier_score:.4f}")
print(f"\n🎯 NEXT: Step 16 — SHAP Interpretability & Platt Calibration")
print("="*100 + "\n")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 16A: PLATT CALIBRATION + SHAP — REPORTABLE MODEL
# TRIPOD: 10e (Calibration), 11 (Validation), 15b (Model explanation)
# ═══════════════════════════════════════════════════════════════════════════════
# PHASE A: REPORTABLE DEVELOPMENT
# - Model trained on TRAIN (n=380)
# - Calibrated on TRAIN using 5-fold cross-validation
# - Evaluated on TEST (n=96) → Internal validation metrics
# - Evaluated on EXTERNAL (n=354) → Primary external validation metrics
# ═══════════════════════════════════════════════════════════════════════════════

from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import roc_auc_score, brier_score_loss, roc_curve
from sklearn.calibration import calibration_curve
from scipy import stats
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import shap
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("\n" + "="*100)
print("STEP 16A: PLATT CALIBRATION + SHAP — REPORTABLE MODEL")
print("="*100)
print(f"UTC: 2025-10-20 00:24:13")
print(f"User: zainzampawala786-sudo")
print(f"\nPHASE A: REPORTABLE DEVELOPMENT")
print(f"   Model trained on TRAIN (n=380)")
print(f"   Calibrated on TRAIN (5-fold CV)")
print(f"   Internal validation on TEST (n=96)")
print(f"   Primary external validation (n=354)\n")

# ══════════════════════════════════════════════════════════════════════════════
# 1. LOAD DATA & MODEL
# ══════════════════════════════════════════════════════════════════════════════

print(f"{'='*100}")
print(f"1. DATA & MODEL SETUP")
print(f"{'='*100}\n")

# Get data
X_train_clean = CLEAN_FEATURE_DATA["X_train_clean"].copy()
y_train = CLEAN_FEATURE_DATA["y_train"].copy()
X_test_clean = CLEAN_FEATURE_DATA["X_test_clean"].copy()
y_test = CLEAN_FEATURE_DATA["y_test"].copy()
X_external = CLEAN_FEATURE_DATA["X_external_clean"].copy()
y_external = CLEAN_FEATURE_DATA["y_external"].copy()

FINAL_MODEL = EXTERNAL_VALIDATION_RESULTS['final_model']
TRAINED_MODELS = TRAINED_MODELS_3TIER
COLORS = DISTRIBUTION_DATA["colors_enhanced"]

print(f"   Dataset Summary:")
print(f"      Training:          {len(X_train_clean):>4} samples, {(y_train==1).sum():>3} events ({(y_train==1).sum()/len(y_train)*100:.1f}%)")
print(f"      Internal Test:     {len(X_test_clean):>4} samples, {(y_test==1).sum():>3} events ({(y_test==1).sum()/len(y_test)*100:.1f}%)")
print(f"      External:          {len(X_external):>4} samples, {(y_external==1).sum():>3} events ({(y_external==1).sum()/len(y_external)*100:.1f}%)")
print(f"\n   Final Model: {FINAL_MODEL['algorithm']} (Tier {FINAL_MODEL['tier']}, {FINAL_MODEL['n_features']} features)")

# Get model objects
final_tier_key = f"tier_{FINAL_MODEL['tier']}"
final_model_obj = TRAINED_MODELS[final_tier_key]['models'][FINAL_MODEL['algorithm']]['model']
final_scaler = TRAINED_MODELS[final_tier_key]['models'][FINAL_MODEL['algorithm']]['scaler']
final_features = FINAL_MODEL['features']

print(f"\n   Feature Set ({len(final_features)} features):")
for i, feat in enumerate(final_features, 1):
    print(f"      {i:2d}. {feat}")

# ══════════════════════════════════════════════════════════════════════════════
# 2. PREPARE DATASETS
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"2. PREPARING DATASETS")
print(f"{'='*100}\n")

# Select features
X_train_final = X_train_clean[final_features].copy()
X_test_final = X_test_clean[final_features].copy()
X_external_final = X_external[final_features].copy()

# Apply scaling
if final_scaler is not None:
    X_train_processed = final_scaler.transform(X_train_final)
    X_test_processed = final_scaler.transform(X_test_final)
    X_external_processed = final_scaler.transform(X_external_final)
    print(f"   ✓ Scaling applied (StandardScaler)")
else:
    X_train_processed = X_train_final.values
    X_test_processed = X_test_final.values
    X_external_processed = X_external_final.values
    print(f"   No scaling required")

print(f"\n   ✓ Datasets prepared:")
print(f"      Training:   {X_train_processed.shape}")
print(f"      Test:       {X_test_processed.shape}")
print(f"      External:   {X_external_processed.shape}")

# ══════════════════════════════════════════════════════════════════════════════
# 3. GET RAW (UNCALIBRATED) PREDICTIONS
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"3. RAW (UNCALIBRATED) PREDICTIONS")
print(f"{'='*100}\n")

# Get predictions
if FINAL_MODEL['algorithm'] == 'Elastic Net':
    y_train_raw = np.clip(final_model_obj.predict(X_train_processed), 0, 1)
    y_test_raw = np.clip(final_model_obj.predict(X_test_processed), 0, 1)
    y_external_raw = np.clip(final_model_obj.predict(X_external_processed), 0, 1)
elif hasattr(final_model_obj, 'predict_proba'):
    y_train_raw = final_model_obj.predict_proba(X_train_processed)[:, 1]
    y_test_raw = final_model_obj.predict_proba(X_test_processed)[:, 1]
    y_external_raw = final_model_obj.predict_proba(X_external_processed)[:, 1]
else:
    y_train_raw = final_model_obj.decision_function(X_train_processed)
    y_test_raw = final_model_obj.decision_function(X_test_processed)
    y_external_raw = final_model_obj.decision_function(X_external_processed)

print(f"   ✓ Raw predictions obtained:")
print(f"      Training:   {len(y_train_raw)} predictions")
print(f"      Test:       {len(y_test_raw)} predictions")
print(f"      External:   {len(y_external_raw)} predictions")

# Raw performance
auc_train_raw = roc_auc_score(y_train, y_train_raw)
auc_test_raw = roc_auc_score(y_test, y_test_raw)
auc_external_raw = roc_auc_score(y_external, y_external_raw)

brier_train_raw = brier_score_loss(y_train, y_train_raw)
brier_test_raw = brier_score_loss(y_test, y_test_raw)
brier_external_raw = brier_score_loss(y_external, y_external_raw)

print(f"\n   Raw Performance (Before Calibration):")
print(f"      {'Dataset':<20} {'AUC':>8} {'Brier':>8}")
print(f"      {'-'*38}")
print(f"      {'Training':<20} {auc_train_raw:>8.4f} {brier_train_raw:>8.4f}")
print(f"      {'Internal Test':<20} {auc_test_raw:>8.4f} {brier_test_raw:>8.4f}")
print(f"      {'External':<20} {auc_external_raw:>8.4f} {brier_external_raw:>8.4f}")

# ══════════════════════════════════════════════════════════════════════════════
# 4. PLATT CALIBRATION (5-FOLD CV ON TRAINING SET)
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"4. PLATT CALIBRATION (5-FOLD CV ON TRAINING SET)")
print(f"{'='*100}\n")

print(f"   Fitting CalibratedClassifierCV on training set (n={len(X_train_processed)})...")
print(f"      Method: Platt scaling (sigmoid)")
print(f"      Cross-validation: 5-fold")

# Create calibrated classifier
calibrated_model = CalibratedClassifierCV(
    estimator=final_model_obj,
    method='sigmoid',
    cv=5,
    n_jobs=-1
)

# Fit on training set
calibrated_model.fit(X_train_processed, y_train)

print(f"\n   ✓ Calibration complete!")
print(f"      Number of calibrators: {len(calibrated_model.calibrated_classifiers_)}")

# Extract Platt parameters (from first fold as representative)
platt_lr = calibrated_model.calibrated_classifiers_[0].calibrators[0]
platt_slope = platt_lr.coef_[0][0] if hasattr(platt_lr, 'coef_') else None
platt_intercept = platt_lr.intercept_[0] if hasattr(platt_lr, 'intercept_') else None

if platt_slope is not None:
    print(f"      Representative Platt parameters (fold 1):")
    print(f"         Slope:     {platt_slope:.4f}")
    print(f"         Intercept: {platt_intercept:.4f}")

# ══════════════════════════════════════════════════════════════════════════════
# 5. CALIBRATED PREDICTIONS
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"5. CALIBRATED PREDICTIONS")
print(f"{'='*100}\n")

# Get calibrated predictions
y_train_calib = calibrated_model.predict_proba(X_train_processed)[:, 1]
y_test_calib = calibrated_model.predict_proba(X_test_processed)[:, 1]
y_external_calib = calibrated_model.predict_proba(X_external_processed)[:, 1]

print(f"   ✓ Calibrated predictions obtained")

# Calibrated performance
auc_train_calib = roc_auc_score(y_train, y_train_calib)
auc_test_calib = roc_auc_score(y_test, y_test_calib)
auc_external_calib = roc_auc_score(y_external, y_external_calib)

brier_train_calib = brier_score_loss(y_train, y_train_calib)
brier_test_calib = brier_score_loss(y_test, y_test_calib)
brier_external_calib = brier_score_loss(y_external, y_external_calib)

print(f"\n   Calibrated Performance:")
print(f"      {'Dataset':<20} {'AUC':>8} {'Brier':>8} {'ΔAUC':>8} {'ΔBrier':>8}")
print(f"      {'-'*58}")
print(f"      {'Training':<20} {auc_train_calib:>8.4f} {brier_train_calib:>8.4f} {(auc_train_calib-auc_train_raw):>+8.4f} {(brier_train_calib-brier_train_raw):>+8.4f}")
print(f"      {'Internal Test':<20} {auc_test_calib:>8.4f} {brier_test_calib:>8.4f} {(auc_test_calib-auc_test_raw):>+8.4f} {(brier_test_calib-brier_test_raw):>+8.4f}")
print(f"      {'External':<20} {auc_external_calib:>8.4f} {brier_external_calib:>8.4f} {(auc_external_calib-auc_external_raw):>+8.4f} {(brier_external_calib-brier_external_raw):>+8.4f}")

# ══════════════════════════════════════════════════════════════════════════════
# 6. DETAILED CALIBRATION METRICS
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"6. DETAILED CALIBRATION METRICS")
print(f"{'='*100}\n")

def hosmer_lemeshow_test(y_true, y_pred, n_bins=10):
    """Hosmer-Lemeshow goodness-of-fit test"""
    bins = np.linspace(0, 1, n_bins + 1)
    bin_indices = np.digitize(y_pred, bins[:-1]) - 1
    bin_indices = np.clip(bin_indices, 0, n_bins - 1)
    
    observed = np.zeros(n_bins)
    expected = np.zeros(n_bins)
    total = np.zeros(n_bins)
    
    for i in range(n_bins):
        mask = bin_indices == i
        total[i] = np.sum(mask)
        if total[i] > 0:
            observed[i] = np.sum(y_true[mask])
            expected[i] = np.sum(y_pred[mask])
    
    mask = total > 0
    observed = observed[mask]
    expected = expected[mask]
    total = total[mask]
    
    hl_statistic = np.sum((observed - expected) ** 2 / (expected * (1 - expected / total) + 1e-10))
    df = len(observed) - 2
    p_value = 1 - stats.chi2.cdf(hl_statistic, df)
    
    return hl_statistic, p_value, df

def expected_calibration_error(y_true, y_pred, n_bins=10):
    """Expected Calibration Error"""
    bins = np.linspace(0, 1, n_bins + 1)
    bin_indices = np.digitize(y_pred, bins[:-1]) - 1
    bin_indices = np.clip(bin_indices, 0, n_bins - 1)
    
    ece = 0
    for i in range(n_bins):
        mask = bin_indices == i
        if np.sum(mask) > 0:
            bin_acc = np.mean(y_true[mask])
            bin_conf = np.mean(y_pred[mask])
            ece += np.sum(mask) * np.abs(bin_acc - bin_conf)
    
    return ece / len(y_true)

def calibration_slope_intercept(y_true, y_pred):
    """Calibration slope and intercept"""
    logit = np.log(y_pred / (1 - y_pred + 1e-10))
    lr = LogisticRegression(penalty=None, max_iter=1000)
    lr.fit(logit.reshape(-1, 1), y_true)
    return lr.coef_[0][0], lr.intercept_[0]

# Compute metrics for each dataset
print(f"   Computing calibration metrics...\n")

# Internal Test
hl_test_stat, hl_test_p, hl_test_df = hosmer_lemeshow_test(y_test.values, y_test_calib)
ece_test = expected_calibration_error(y_test.values, y_test_calib)
slope_test, intercept_test = calibration_slope_intercept(y_test.values, y_test_calib)
prob_true_test, prob_pred_test = calibration_curve(y_test, y_test_calib, n_bins=10, strategy='quantile')

# External
hl_ext_stat, hl_ext_p, hl_ext_df = hosmer_lemeshow_test(y_external.values, y_external_calib)
ece_ext = expected_calibration_error(y_external.values, y_external_calib)
slope_ext, intercept_ext = calibration_slope_intercept(y_external.values, y_external_calib)
prob_true_ext, prob_pred_ext = calibration_curve(y_external, y_external_calib, n_bins=10, strategy='quantile')

print(f"   CALIBRATION METRICS — INTERNAL TEST (n={len(y_test)}):")
print(f"      {'Metric':<30} {'Value':>12} {'Status'}")
print(f"      {'-'*50}")
print(f"      {'AUC':<30} {auc_test_calib:>12.4f} {'✓' if auc_test_calib >= 0.70 else 'X'}")
print(f"      {'Brier Score':<30} {brier_test_calib:>12.4f} {'✓' if brier_test_calib <= 0.20 else 'X'}")
print(f"      {'ECE':<30} {ece_test:>12.4f} {'✓' if ece_test <= 0.10 else 'X'}")
print(f"      {'Hosmer-Lemeshow p-value':<30} {hl_test_p:>12.4f} {'✓' if hl_test_p > 0.05 else 'X'}")
print(f"      {'Calibration Slope':<30} {slope_test:>12.4f} {'✓' if 0.8 <= slope_test <= 1.2 else 'X'}")
print(f"      {'Calibration Intercept':<30} {intercept_test:>12.4f} {'✓' if abs(intercept_test) <= 0.2 else 'X'}")

print(f"\n   CALIBRATION METRICS — EXTERNAL (n={len(y_external)}) [PRIMARY VALIDATION]:")
print(f"      {'Metric':<30} {'Value':>12} {'Status'}")
print(f"      {'-'*50}")
print(f"      {'AUC':<30} {auc_external_calib:>12.4f} {'✓' if auc_external_calib >= 0.70 else 'X'}")
print(f"      {'Brier Score':<30} {brier_external_calib:>12.4f} {'✓' if brier_external_calib <= 0.20 else 'X'}")
print(f"      {'ECE':<30} {ece_ext:>12.4f} {'✓' if ece_ext <= 0.10 else 'X'}")
print(f"      {'Hosmer-Lemeshow p-value':<30} {hl_ext_p:>12.4f} {'✓' if hl_ext_p > 0.05 else 'X'}")
print(f"      {'Calibration Slope':<30} {slope_ext:>12.4f} {'✓' if 0.8 <= slope_ext <= 1.2 else 'X'}")
print(f"      {'Calibration Intercept':<30} {intercept_ext:>12.4f} {'✓' if abs(intercept_ext) <= 0.2 else 'X'}")

# ══════════════════════════════════════════════════════════════════════════════
# 7. SHAP ANALYSIS — INTERNAL TEST
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"7. SHAP ANALYSIS — INTERNAL TEST")
print(f"{'='*100}\n")

print(f"   Computing SHAP values for internal test set (n={len(X_test_final)})...")
print(f"   Estimated time: ~10-15 minutes (KernelExplainer for SVM)")

# Create SHAP explainer
if FINAL_MODEL['algorithm'] in ['Random Forest', 'Gradient Boosting', 'XGBoost']:
    explainer_test = shap.TreeExplainer(final_model_obj)
    shap_values_test = explainer_test.shap_values(X_test_processed)
    if isinstance(shap_values_test, list):
        shap_values_test = shap_values_test[1]
else:
    # Use KernelExplainer for SVM
    background_test = shap.sample(X_test_processed, min(100, len(X_test_processed)))
    
    def model_predict_test(X):
        return final_model_obj.predict_proba(X)[:, 1]
    
    explainer_test = shap.KernelExplainer(model_predict_test, background_test)
    shap_values_test = explainer_test.shap_values(X_test_processed)

# Ensure 2D
if isinstance(shap_values_test, list):
    shap_values_test = shap_values_test[1] if len(shap_values_test) > 1 else shap_values_test[0]
if shap_values_test.ndim > 2:
    shap_values_test = shap_values_test[:, :, 1] if shap_values_test.shape[2] == 2 else shap_values_test.squeeze()

print(f"\n   ✓ SHAP values computed")
print(f"      Shape: {shap_values_test.shape}")

# Feature importance
shap_importance_test = pd.DataFrame({
    'Feature': final_features,
    'Mean_SHAP': np.abs(shap_values_test).mean(axis=0)
}).sort_values('Mean_SHAP', ascending=False)

print(f"\n   Top 10 Features (Internal Test):\n")
print(f"      {'Rank':<6} {'Feature':<35} {'Mean |SHAP|':>12}")
print(f"      {'-'*55}")
for idx, (i, row) in enumerate(shap_importance_test.head(10).iterrows(), 1):
    print(f"      {idx:<6} {row['Feature']:<35} {row['Mean_SHAP']:>12.4f}")

# ══════════════════════════════════════════════════════════════════════════════
# 8. SHAP ANALYSIS — EXTERNAL
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"8. SHAP ANALYSIS — EXTERNAL [PRIMARY VALIDATION]")
print(f"{'='*100}\n")

print(f"   Computing SHAP values for external set (n={len(X_external_final)})...")
print(f"   Estimated time: ~40-60 minutes (KernelExplainer for SVM)")

# Create SHAP explainer for external
if FINAL_MODEL['algorithm'] in ['Random Forest', 'Gradient Boosting', 'XGBoost']:
    explainer_ext = shap.TreeExplainer(final_model_obj)
    shap_values_ext = explainer_ext.shap_values(X_external_processed)
    if isinstance(shap_values_ext, list):
        shap_values_ext = shap_values_ext[1]
else:
    background_ext = shap.sample(X_external_processed, 100)
    
    def model_predict_ext(X):
        return final_model_obj.predict_proba(X)[:, 1]
    
    explainer_ext = shap.KernelExplainer(model_predict_ext, background_ext)
    shap_values_ext = explainer_ext.shap_values(X_external_processed)

# Ensure 2D
if isinstance(shap_values_ext, list):
    shap_values_ext = shap_values_ext[1] if len(shap_values_ext) > 1 else shap_values_ext[0]
if shap_values_ext.ndim > 2:
    shap_values_ext = shap_values_ext[:, :, 1] if shap_values_ext.shape[2] == 2 else shap_values_ext.squeeze()

print(f"\n   ✓ SHAP values computed")
print(f"      Shape: {shap_values_ext.shape}")

# Feature importance
shap_importance_ext = pd.DataFrame({
    'Feature': final_features,
    'Mean_SHAP': np.abs(shap_values_ext).mean(axis=0)
}).sort_values('Mean_SHAP', ascending=False)

print(f"\n   Top 10 Features (External):\n")
print(f"      {'Rank':<6} {'Feature':<35} {'Mean |SHAP|':>12}")
print(f"      {'-'*55}")
for idx, (i, row) in enumerate(shap_importance_ext.head(10).iterrows(), 1):
    print(f"      {idx:<6} {row['Feature']:<35} {row['Mean_SHAP']:>12.4f}")

# ══════════════════════════════════════════════════════════════════════════════
# 9. FEATURE IMPORTANCE COMPARISON
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"9. FEATURE IMPORTANCE COMPARISON")
print(f"{'='*100}\n")

# Merge importance
importance_comparison = pd.merge(
    shap_importance_test.reset_index(drop=True),
    shap_importance_ext.reset_index(drop=True),
    on='Feature',
    suffixes=('_Internal', '_External')
)

# Correlations
corr_pearson = importance_comparison['Mean_SHAP_Internal'].corr(importance_comparison['Mean_SHAP_External'], method='pearson')
corr_spearman = importance_comparison['Mean_SHAP_Internal'].corr(importance_comparison['Mean_SHAP_External'], method='spearman')

print(f"   Feature Importance Correlation:")
print(f"      Pearson:   {corr_pearson:.4f}")
print(f"      Spearman:  {corr_spearman:.4f}")

if corr_spearman > 0.7:
    print(f"      ✓ Excellent consistency between internal and external")
elif corr_spearman > 0.5:
    print(f"      ✓ Good consistency between internal and external")
elif corr_spearman > 0.3:
    print(f"      Moderate consistency")
else:
    print(f"      Low consistency - features differ in importance")

# Top feature overlap
top5_internal = set(shap_importance_test.head(5)['Feature'].values)
top5_external = set(shap_importance_ext.head(5)['Feature'].values)
top10_internal = set(shap_importance_test.head(10)['Feature'].values)
top10_external = set(shap_importance_ext.head(10)['Feature'].values)

overlap_top5 = len(top5_internal & top5_external)
overlap_top10 = len(top10_internal & top10_external)

print(f"\n   Feature Overlap:")
print(f"      Top 5:  {overlap_top5}/5 ({overlap_top5/5*100:.0f}%)")
print(f"      Top 10: {overlap_top10}/10 ({overlap_top10/10*100:.0f}%)")

print(f"\n   Top 5 Features Comparison:\n")
print(f"      {'Rank':<6} {'Feature':<35} {'Internal':>10} {'External':>10}")
print(f"      {'-'*65}")
for i in range(min(5, len(importance_comparison))):
    row = importance_comparison.iloc[i]
    print(f"      {i+1:<6} {row['Feature']:<35} {row['Mean_SHAP_Internal']:>10.4f} {row['Mean_SHAP_External']:>10.4f}")

# ══════════════════════════════════════════════════════════════════════════════
# 10. VISUALIZATION — FIGURE 12A (REPORTABLE MODEL)
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"10. GENERATING FIGURE 12A — REPORTABLE MODEL")
print(f"{'='*100}\n")

fig = plt.figure(figsize=(28, 20), dpi=300)
gs = fig.add_gridspec(5, 4, hspace=0.45, wspace=0.35)

# ROW 1: ROC CURVES

# Plot 1: Internal Test ROC
ax1 = fig.add_subplot(gs[0, 0])
fpr_test, tpr_test, _ = roc_curve(y_test, y_test_calib)
ax1.plot([0, 1], [0, 1], 'k--', linewidth=2, alpha=0.3, label='Chance')
ax1.plot(fpr_test, tpr_test, linewidth=3, color=COLORS['primary'], 
         label=f'Internal Test\nAUC = {auc_test_calib:.3f}')
ax1.set_xlabel('False Positive Rate', fontsize=12, fontweight='bold')
ax1.set_ylabel('True Positive Rate', fontsize=12, fontweight='bold')
ax1.set_title('A. ROC: Internal Test (n=96)', fontsize=13, fontweight='bold', loc='left')
ax1.legend(fontsize=10, loc='lower right')
ax1.grid(alpha=0.3)

# Plot 2: External ROC
ax2 = fig.add_subplot(gs[0, 1])
fpr_ext, tpr_ext, _ = roc_curve(y_external, y_external_calib)
ax2.plot([0, 1], [0, 1], 'k--', linewidth=2, alpha=0.3, label='Chance')
ax2.plot(fpr_ext, tpr_ext, linewidth=3, color=COLORS['secondary'], 
         label=f'External (Primary)\nAUC = {auc_external_calib:.3f}')
ax2.set_xlabel('False Positive Rate', fontsize=12, fontweight='bold')
ax2.set_ylabel('True Positive Rate', fontsize=12, fontweight='bold')
ax2.set_title('B. ROC: External - Primary (n=354)', fontsize=13, fontweight='bold', loc='left')
ax2.legend(fontsize=10, loc='lower right')
ax2.grid(alpha=0.3)

# Plot 3: AUC Comparison
ax3 = fig.add_subplot(gs[0, 2])
datasets = ['Internal\nTest', 'External\n(Primary)']
aucs = [auc_test_calib, auc_external_calib]
colors = [COLORS['primary'], COLORS['secondary']]
bars = ax3.bar(datasets, aucs, color=colors, alpha=0.7, edgecolor='black', linewidth=2)
for bar, auc in zip(bars, aucs):
    height = bar.get_height()
    ax3.text(bar.get_x() + bar.get_width()/2., height + 0.01,
             f'{auc:.3f}', ha='center', va='bottom', fontsize=11, fontweight='bold')
ax3.set_ylabel('AUC', fontsize=12, fontweight='bold')
ax3.set_title('C. AUC Comparison', fontsize=13, fontweight='bold', loc='left')
ax3.set_ylim([0, 1])
ax3.grid(alpha=0.3, axis='y')

# Plot 4: Performance Metrics
ax4 = fig.add_subplot(gs[0, 3])
metrics = ['AUC', 'Brier', 'ECE']
internal_vals = [auc_test_calib, brier_test_calib, ece_test]
external_vals = [auc_external_calib, brier_external_calib, ece_ext]
x = np.arange(len(metrics))
width = 0.35
ax4.bar(x - width/2, internal_vals, width, label='Internal Test', 
        color=COLORS['primary'], alpha=0.7, edgecolor='black')
ax4.bar(x + width/2, external_vals, width, label='External', 
        color=COLORS['secondary'], alpha=0.7, edgecolor='black')
ax4.set_ylabel('Value', fontsize=12, fontweight='bold')
ax4.set_title('D. Performance Metrics', fontsize=13, fontweight='bold', loc='left')
ax4.set_xticks(x)
ax4.set_xticklabels(metrics)
ax4.legend(fontsize=10)
ax4.grid(alpha=0.3, axis='y')

# ROW 2: CALIBRATION CURVES

# Plot 5: Internal Test Calibration
ax5 = fig.add_subplot(gs[1, 0])
ax5.plot([0, 1], [0, 1], 'k--', linewidth=2, label='Perfect')
ax5.plot(prob_pred_test, prob_true_test, marker='o', linewidth=3, markersize=10,
         color=COLORS['primary'], label=f'Calibrated\nHL p={hl_test_p:.3f}')
ax5.set_xlabel('Predicted Probability', fontsize=12, fontweight='bold')
ax5.set_ylabel('Observed Probability', fontsize=12, fontweight='bold')
ax5.set_title('E. Calibration: Internal Test', fontsize=13, fontweight='bold', loc='left')
ax5.legend(fontsize=10)
ax5.grid(alpha=0.3)
ax5.set_xlim([0, 1])
ax5.set_ylim([0, 1])

# Plot 6: External Calibration
ax6 = fig.add_subplot(gs[1, 1])
ax6.plot([0, 1], [0, 1], 'k--', linewidth=2, label='Perfect')
ax6.plot(prob_pred_ext, prob_true_ext, marker='o', linewidth=3, markersize=10,
         color=COLORS['secondary'], label=f'Calibrated\nHL p={hl_ext_p:.3f}')
ax6.set_xlabel('Predicted Probability', fontsize=12, fontweight='bold')
ax6.set_ylabel('Observed Probability', fontsize=12, fontweight='bold')
ax6.set_title('F. Calibration: External (Primary)', fontsize=13, fontweight='bold', loc='left')
ax6.legend(fontsize=10)
ax6.grid(alpha=0.3)
ax6.set_xlim([0, 1])
ax6.set_ylim([0, 1])

# Plot 7: Calibration Metrics
ax7 = fig.add_subplot(gs[1, 2])
metrics_calib = ['HL p', 'Slope', 'ECE']
internal_calib = [hl_test_p, slope_test, ece_test]
external_calib = [hl_ext_p, slope_ext, ece_ext]
x = np.arange(len(metrics_calib))
width = 0.35
ax7.bar(x - width/2, internal_calib, width, label='Internal', 
        color=COLORS['primary'], alpha=0.7, edgecolor='black')
ax7.bar(x + width/2, external_calib, width, label='External', 
        color=COLORS['secondary'], alpha=0.7, edgecolor='black')
ax7.axhline(y=0.05, color='red', linestyle='--', linewidth=1, alpha=0.5, label='HL p=0.05')
ax7.axhline(y=1.0, color='green', linestyle='--', linewidth=1, alpha=0.5, label='Slope=1.0')
ax7.set_ylabel('Value', fontsize=12, fontweight='bold')
ax7.set_title('G. Calibration Metrics', fontsize=13, fontweight='bold', loc='left')
ax7.set_xticks(x)
ax7.set_xticklabels(metrics_calib)
ax7.legend(fontsize=9)
ax7.grid(alpha=0.3, axis='y')

# Plot 8: Calibration Slope Comparison
ax8 = fig.add_subplot(gs[1, 3])
slopes = [slope_test, slope_ext]
datasets_slope = ['Internal\nTest', 'External']
bars = ax8.bar(datasets_slope, slopes, color=[COLORS['primary'], COLORS['secondary']], 
               alpha=0.7, edgecolor='black', linewidth=2)
ax8.axhspan(0.8, 1.2, alpha=0.2, color='green', label='Ideal range (0.8-1.2)')
ax8.axhline(y=1.0, color='green', linestyle='--', linewidth=2, label='Perfect (1.0)')
for bar, slope in zip(bars, slopes):
    height = bar.get_height()
    ax8.text(bar.get_x() + bar.get_width()/2., height + 0.05,
             f'{slope:.3f}', ha='center', va='bottom', fontsize=11, fontweight='bold')
ax8.set_ylabel('Calibration Slope', fontsize=12, fontweight='bold')
ax8.set_title('H. Calibration Slope', fontsize=13, fontweight='bold', loc='left')
ax8.set_ylim([0, max(slopes) + 0.3])
ax8.legend(fontsize=9)
ax8.grid(alpha=0.3, axis='y')

# ROW 3: SHAP FEATURE IMPORTANCE

# Plot 9: SHAP - Internal Test
ax9 = fig.add_subplot(gs[2, :2])
top_n = 10
shap_top_test = shap_importance_test.head(top_n).iloc[::-1]
ax9.barh(range(len(shap_top_test)), shap_top_test['Mean_SHAP'], 
        color=COLORS['primary'], alpha=0.7, edgecolor='black', linewidth=1.5)
ax9.set_yticks(range(len(shap_top_test)))
ax9.set_yticklabels(shap_top_test['Feature'], fontsize=11)
ax9.set_xlabel('Mean |SHAP Value|', fontsize=12, fontweight='bold')
ax9.set_title('I. Feature Importance: Internal Test (SHAP)', fontsize=13, fontweight='bold', loc='left')
ax9.grid(alpha=0.3, axis='x')

# Plot 10: SHAP - External
ax10 = fig.add_subplot(gs[2, 2:])
shap_top_ext = shap_importance_ext.head(top_n).iloc[::-1]
ax10.barh(range(len(shap_top_ext)), shap_top_ext['Mean_SHAP'], 
        color=COLORS['secondary'], alpha=0.7, edgecolor='black', linewidth=1.5)
ax10.set_yticks(range(len(shap_top_ext)))
ax10.set_yticklabels(shap_top_ext['Feature'], fontsize=11)
ax10.set_xlabel('Mean |SHAP Value|', fontsize=12, fontweight='bold')
ax10.set_title('J. Feature Importance: External (SHAP)', fontsize=13, fontweight='bold', loc='left')
ax10.grid(alpha=0.3, axis='x')

# ROW 4: SHAP BEESWARM

# Plot 11: SHAP Beeswarm - Internal (top 8)
ax11 = fig.add_subplot(gs[3, :2])
top_features_idx = [list(final_features).index(feat) for feat in shap_importance_test.head(8)['Feature'].values]
for idx, feat_idx in enumerate(top_features_idx):
    shap_vals = shap_values_test[:, feat_idx]
    feature_vals = X_test_final.iloc[:, feat_idx].values
    feature_vals_norm = (feature_vals - feature_vals.min()) / (feature_vals.max() - feature_vals.min() + 1e-10)
    y_pos = idx + np.random.normal(0, 0.15, len(shap_vals))
    scatter = ax11.scatter(shap_vals, y_pos, c=feature_vals_norm, cmap='RdBu_r', 
                         s=30, alpha=0.6, edgecolors='none')
ax11.set_yticks(range(len(top_features_idx)))
ax11.set_yticklabels([final_features[i] for i in top_features_idx], fontsize=10)
ax11.set_xlabel('SHAP Value', fontsize=12, fontweight='bold')
ax11.set_title('K. SHAP Beeswarm: Internal Test (Top 8)', fontsize=13, fontweight='bold', loc='left')
ax11.axvline(x=0, color='black', linestyle='--', linewidth=1)
ax11.grid(alpha=0.3, axis='x')
cbar = plt.colorbar(scatter, ax=ax11, pad=0.01)
cbar.set_label('Feature Value\n(Low → High)', fontsize=10)

# Plot 12: SHAP Beeswarm - External (top 8)
ax12 = fig.add_subplot(gs[3, 2:])
top_features_idx_ext = [list(final_features).index(feat) for feat in shap_importance_ext.head(8)['Feature'].values]
for idx, feat_idx in enumerate(top_features_idx_ext):
    shap_vals = shap_values_ext[:, feat_idx]
    feature_vals = X_external_final.iloc[:, feat_idx].values
    feature_vals_norm = (feature_vals - feature_vals.min()) / (feature_vals.max() - feature_vals.min() + 1e-10)
    y_pos = idx + np.random.normal(0, 0.15, len(shap_vals))
    scatter = ax12.scatter(shap_vals, y_pos, c=feature_vals_norm, cmap='RdBu_r', 
                         s=30, alpha=0.6, edgecolors='none')
ax12.set_yticks(range(len(top_features_idx_ext)))
ax12.set_yticklabels([final_features[i] for i in top_features_idx_ext], fontsize=10)
ax12.set_xlabel('SHAP Value', fontsize=12, fontweight='bold')
ax12.set_title('L. SHAP Beeswarm: External (Top 8)', fontsize=13, fontweight='bold', loc='left')
ax12.axvline(x=0, color='black', linestyle='--', linewidth=1)
ax12.grid(alpha=0.3, axis='x')
cbar = plt.colorbar(scatter, ax=ax12, pad=0.01)
cbar.set_label('Feature Value\n(Low → High)', fontsize=10)

# ROW 5: FEATURE IMPORTANCE CORRELATION & RISK DISTRIBUTION

# Plot 13: Feature Importance Correlation
ax13 = fig.add_subplot(gs[4, 0])
ax13.scatter(importance_comparison['Mean_SHAP_Internal'], 
           importance_comparison['Mean_SHAP_External'],
           s=120, alpha=0.6, color=COLORS['primary'], edgecolors='black', linewidth=1.5)
z = np.polyfit(importance_comparison['Mean_SHAP_Internal'], 
              importance_comparison['Mean_SHAP_External'], 1)
p = np.poly1d(z)
x_line = np.linspace(importance_comparison['Mean_SHAP_Internal'].min(),
                     importance_comparison['Mean_SHAP_Internal'].max(), 100)
ax13.plot(x_line, p(x_line), "r--", linewidth=2, 
         label=f'Spearman r={corr_spearman:.3f}')
ax13.set_xlabel('Internal Test SHAP', fontsize=12, fontweight='bold')
ax13.set_ylabel('External SHAP', fontsize=12, fontweight='bold')
ax13.set_title('M. Feature Importance Consistency', fontsize=13, fontweight='bold', loc='left')
ax13.legend(fontsize=10)
ax13.grid(alpha=0.3)

# Plot 14: Risk Distribution - Internal
ax14 = fig.add_subplot(gs[4, 1])
ax14.hist(y_test_calib[y_test == 0], bins=25, alpha=0.6, color='blue', 
         label=f'Survivors (n={np.sum(y_test==0)})', edgecolor='black')
ax14.hist(y_test_calib[y_test == 1], bins=25, alpha=0.6, color='red', 
         label=f'Deaths (n={np.sum(y_test==1)})', edgecolor='black')
ax14.set_xlabel('Predicted Risk', fontsize=12, fontweight='bold')
ax14.set_ylabel('Frequency', fontsize=12, fontweight='bold')
ax14.set_title('N. Risk Distribution: Internal Test', fontsize=13, fontweight='bold', loc='left')
ax14.legend(fontsize=10)
ax14.grid(alpha=0.3, axis='y')

# Plot 15: Risk Distribution - External
ax15 = fig.add_subplot(gs[4, 2])
ax15.hist(y_external_calib[y_external == 0], bins=25, alpha=0.6, color='blue', 
         label=f'Survivors (n={np.sum(y_external==0)})', edgecolor='black')
ax15.hist(y_external_calib[y_external == 1], bins=25, alpha=0.6, color='red', 
         label=f'Deaths (n={np.sum(y_external==1)})', edgecolor='black')
ax15.set_xlabel('Predicted Risk', fontsize=12, fontweight='bold')
ax15.set_ylabel('Frequency', fontsize=12, fontweight='bold')
ax15.set_title('O. Risk Distribution: External', fontsize=13, fontweight='bold', loc='left')
ax15.legend(fontsize=10)
ax15.grid(alpha=0.3, axis='y')

# Plot 16: Top 5 Feature Comparison
ax16 = fig.add_subplot(gs[4, 3])
top5_comp = importance_comparison.head(5)
x_pos = np.arange(len(top5_comp))
width = 0.35
ax16.barh(x_pos - width/2, top5_comp['Mean_SHAP_Internal'], width, 
         label='Internal', color=COLORS['primary'], alpha=0.7, edgecolor='black')
ax16.barh(x_pos + width/2, top5_comp['Mean_SHAP_External'], width, 
         label='External', color=COLORS['secondary'], alpha=0.7, edgecolor='black')
ax16.set_yticks(x_pos)
ax16.set_yticklabels(top5_comp['Feature'], fontsize=10)
ax16.set_xlabel('Mean |SHAP Value|', fontsize=12, fontweight='bold')
ax16.set_title('P. Top 5 Features: Internal vs External', fontsize=13, fontweight='bold', loc='left')
ax16.legend(fontsize=10)
ax16.grid(alpha=0.3, axis='x')

# OVERALL TITLE
fig.suptitle(f'Figure 12A. REPORTABLE MODEL — Platt Calibration & SHAP Interpretability\n'
             f'{FINAL_MODEL["algorithm"]} (Tier {FINAL_MODEL["tier"]}, {FINAL_MODEL["n_features"]} features) | '
             f'Internal Test: AUC={auc_test_calib:.3f}, HL p={hl_test_p:.3f} | '
             f'External (Primary): AUC={auc_external_calib:.3f}, HL p={hl_ext_p:.3f} | '
             f'Feature Correlation: r={corr_spearman:.3f}', 
             fontsize=16, fontweight='bold', y=0.998)

save_figure(fig, 'step16a_fig12a_reportable_model')
plt.show()

print(f"   ✓ Figure 12A saved")

# ══════════════════════════════════════════════════════════════════════════════
# 11. SAVE RESULTS — PHASE A
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"11. SAVING RESULTS — PHASE A (REPORTABLE MODEL)")
print(f"{'='*100}\n")

# Save comprehensive results
REPORTABLE_MODEL_RESULTS = {
    'phase': 'A_REPORTABLE',
    'model_info': {
        'algorithm': FINAL_MODEL['algorithm'],
        'tier': FINAL_MODEL['tier'],
        'n_features': FINAL_MODEL['n_features'],
        'features': final_features,
        'training_size': len(X_train_clean),
        'test_size': len(X_test_clean),
        'external_size': len(X_external),
    },
    'calibrated_model': calibrated_model,
    'base_model': final_model_obj,
    'scaler': final_scaler,
    'platt_parameters': {
        'method': 'sigmoid',
        'cv_folds': 5,
        'representative_slope': platt_slope,
        'representative_intercept': platt_intercept,
    },
    'performance': {
        'internal_test': {
            'auc_raw': float(auc_test_raw),
            'auc_calibrated': float(auc_test_calib),
            'brier_raw': float(brier_test_raw),
            'brier_calibrated': float(brier_test_calib),
            'ece': float(ece_test),
            'hl_statistic': float(hl_test_stat),
            'hl_p_value': float(hl_test_p),
            'hl_df': int(hl_test_df),
            'calibration_slope': float(slope_test),
            'calibration_intercept': float(intercept_test),
        },
        'external_primary': {
            'auc_raw': float(auc_external_raw),
            'auc_calibrated': float(auc_external_calib),
            'brier_raw': float(brier_external_raw),
            'brier_calibrated': float(brier_external_calib),
            'ece': float(ece_ext),
            'hl_statistic': float(hl_ext_stat),
            'hl_p_value': float(hl_ext_p),
            'hl_df': int(hl_ext_df),
            'calibration_slope': float(slope_ext),
            'calibration_intercept': float(intercept_ext),
        },
    },
    'shap': {
        'internal_test': {
            'shap_values': shap_values_test,
            'feature_importance': shap_importance_test,
        },
        'external_primary': {
            'shap_values': shap_values_ext,
            'feature_importance': shap_importance_ext,
        },
        'importance_correlation': {
            'pearson': float(corr_pearson),
            'spearman': float(corr_spearman),
        },
        'importance_comparison': importance_comparison,
        'top5_overlap': int(overlap_top5),
        'top10_overlap': int(overlap_top10),
    },
    'predictions': {
        'internal_test_raw': y_test_raw,
        'internal_test_calibrated': y_test_calib,
        'external_raw': y_external_raw,
        'external_calibrated': y_external_calib,
    },
    'metadata': {
        'timestamp': '2025-10-20 00:24:13 UTC',
        'user': 'zainzampawala786-sudo',
        'tripod_compliance': True,
    }
}

save_pickle(REPORTABLE_MODEL_RESULTS, 'step16a_reportable_model_results')

# Summary tables
performance_table = pd.DataFrame({
    'Dataset': ['Internal Test', 'External (Primary)'],
    'N': [len(y_test), len(y_external)],
    'Events': [np.sum(y_test), np.sum(y_external)],
    'AUC': [auc_test_calib, auc_external_calib],
    'Brier': [brier_test_calib, brier_external_calib],
    'ECE': [ece_test, ece_ext],
    'HL_p_value': [hl_test_p, hl_ext_p],
    'Calibration_Slope': [slope_test, slope_ext],
    'Calibration_Intercept': [intercept_test, intercept_ext],
})

save_csv(performance_table, 'step16a_reportable_performance')
save_csv(importance_comparison, 'step16a_feature_importance_comparison')
save_csv(shap_importance_test, 'step16a_shap_internal_test')
save_csv(shap_importance_ext, 'step16a_shap_external_primary')

append_runlog("16A", {
    "phase": "reportable_development",
    "training_size": len(X_train_clean),
    "internal_test_size": len(X_test_clean),
    "external_size": len(X_external),
    "internal_auc": float(auc_test_calib),
    "external_primary_auc": float(auc_external_calib),
    "internal_brier": float(brier_test_calib),
    "external_primary_brier": float(brier_external_calib),
    "internal_hl_p": float(hl_test_p),
    "external_primary_hl_p": float(hl_ext_p),
    "shap_correlation_spearman": float(corr_spearman),
    "top5_feature_overlap": int(overlap_top5),
    "top10_feature_overlap": int(overlap_top10),
})

print(f"   ✓ Saved: step16a_reportable_model_results.pkl")
print(f"   ✓ Saved: step16a_reportable_performance.csv")
print(f"   ✓ Saved: step16a_feature_importance_comparison.csv")
print(f"   ✓ Saved: step16a_shap_internal_test.csv")
print(f"   ✓ Saved: step16a_shap_external_primary.csv")

print("\nStored: REPORTABLE_MODEL_RESULTS")

# ══════════════════════════════════════════════════════════════════════════════
# SUMMARY
# ══════════════════════════════════════════════════════════════════════════════

print("\n" + "="*100)
print("STEP 16A COMPLETE — REPORTABLE MODEL")
print("="*100)
print(f"\nPHASE A: REPORTABLE DEVELOPMENT")
print(f"   Training Set:              n={len(X_train_clean)} (model development + calibration)")
print(f"   Internal Test Set:         n={len(X_test_clean)} (internal validation)")
print(f"   External Set:              n={len(X_external)} (primary external validation)")

print(f"\n   MODEL: {FINAL_MODEL['algorithm']} (Tier {FINAL_MODEL['tier']}, {FINAL_MODEL['n_features']} features)")
print(f"   CALIBRATION: Platt scaling (5-fold CV on training)")

print(f"\n   INTERNAL TEST PERFORMANCE:")
print(f"      AUC:                    {auc_test_calib:.4f}")
print(f"      Brier Score:            {brier_test_calib:.4f} {'✓' if brier_test_calib <= 0.20 else 'X'}")
print(f"      ECE:                    {ece_test:.4f} {'✓' if ece_test <= 0.10 else 'X'}")
print(f"      HL p-value:             {hl_test_p:.4f} {'✓' if hl_test_p > 0.05 else 'X'}")
print(f"      Calibration Slope:      {slope_test:.4f} {'✓' if 0.8 <= slope_test <= 1.2 else 'X'}")
print(f"      Calibration Intercept:  {intercept_test:+.4f} {'✓' if abs(intercept_test) <= 0.2 else 'X'}")

print(f"\n   EXTERNAL PERFORMANCE (PRIMARY VALIDATION):")
print(f"      AUC:                    {auc_external_calib:.4f}")
print(f"      Brier Score:            {brier_external_calib:.4f} {'✓' if brier_external_calib <= 0.20 else 'X'}")
print(f"      ECE:                    {ece_ext:.4f} {'✓' if ece_ext <= 0.10 else 'X'}")
print(f"      HL p-value:             {hl_ext_p:.4f} {'✓' if hl_ext_p > 0.05 else 'X'}")
print(f"      Calibration Slope:      {slope_ext:.4f} {'✓' if 0.8 <= slope_ext <= 1.2 else 'X'}")
print(f"      Calibration Intercept:  {intercept_ext:+.4f} {'✓' if abs(intercept_ext) <= 0.2 else 'X'}")

print(f"\n   SHAP ANALYSIS:")
print(f"      Feature Importance Correlation (Spearman): {corr_spearman:.4f} {'✓' if corr_spearman > 0.5 else 'X'}")
print(f"      Top 5 Feature Overlap:  {overlap_top5}/5 ({overlap_top5/5*100:.0f}%)")
print(f"      Top 10 Feature Overlap: {overlap_top10}/10 ({overlap_top10/10*100:.0f}%)")

print(f"\n   TOP 3 FEATURES (CONSISTENT ACROSS COHORTS):")
common_top3 = list(set(shap_importance_test.head(3)['Feature'].values) & 
                   set(shap_importance_ext.head(3)['Feature'].values))
if len(common_top3) > 0:
    for i, feat in enumerate(common_top3, 1):
        print(f"      {i}. {feat}")
else:
    print(f"      (No features in both top 3)")

print(f"\nNEXT: Step 16B — Final Deployable Model")
print(f"   Retrain SVM on ALL internal data (TRAIN+TEST = n=476)")
print(f"   Recalibrate via 5-fold CV")
print(f"   Confirmatory external validation")
print(f"   Export deployment bundle")
print("="*100 + "\n")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# STEP 16B: FINAL DEPLOYABLE MODEL — RETRAINED ON ALL INTERNAL DATA
# TRIPOD: 10e (Calibration), 11 (Validation), 15b (Model explanation)
# ═══════════════════════════════════════════════════════════════════════════════
# PHASE B: FINAL DEPLOYABLE MODEL
# - Retrain SVM on ALL internal data (TRAIN + TEST = n=476)
# - Calibrate via 5-fold CV on all internal data
# - Evaluate on EXTERNAL (n=354) → Confirmatory validation
# - Generate SHAP on final calibrated model
# - Export deployment bundle
# ═══════════════════════════════════════════════════════════════════════════════

from sklearn.svm import SVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import roc_auc_score, brier_score_loss, roc_curve
from sklearn.calibration import calibration_curve
from scipy import stats
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import shap
from datetime import datetime
import json
import warnings
warnings.filterwarnings('ignore')

print("\n" + "="*100)
print("STEP 16B: FINAL DEPLOYABLE MODEL — RETRAINED ON ALL INTERNAL DATA")
print("="*100)
print(f"UTC: 2025-10-20 01:54:11")
print(f"User: zainzampawala786-sudo")
print(f"\nPHASE B: FINAL DEPLOYABLE MODEL")
print(f"   Retrain SVM on ALL internal data (TRAIN+TEST = n=476)")
print(f"   Calibrate via 5-fold CV on all internal")
print(f"   Confirmatory external validation (n=354)\n")

# ══════════════════════════════════════════════════════════════════════════════
# 1. LOAD DATA & PHASE A RESULTS
# ══════════════════════════════════════════════════════════════════════════════

print(f"{'='*100}")
print(f"1. DATA & PHASE A RESULTS")
print(f"{'='*100}\n")

# Get data
X_train_clean = CLEAN_FEATURE_DATA["X_train_clean"].copy()
y_train = CLEAN_FEATURE_DATA["y_train"].copy()
X_test_clean = CLEAN_FEATURE_DATA["X_test_clean"].copy()
y_test = CLEAN_FEATURE_DATA["y_test"].copy()
X_external = CLEAN_FEATURE_DATA["X_external_clean"].copy()
y_external = CLEAN_FEATURE_DATA["y_external"].copy()

FINAL_MODEL = EXTERNAL_VALIDATION_RESULTS['final_model']
TRAINED_MODELS = TRAINED_MODELS_3TIER
COLORS = DISTRIBUTION_DATA["colors_enhanced"]
PHASE_A_RESULTS = REPORTABLE_MODEL_RESULTS

print(f"   Dataset Summary:")
print(f"      Training:           {len(X_train_clean):>4} samples, {(y_train==1).sum():>3} events ({(y_train==1).sum()/len(y_train)*100:.1f}%)")
print(f"      Test:               {len(X_test_clean):>4} samples, {(y_test==1).sum():>3} events ({(y_test==1).sum()/len(y_test)*100:.1f}%)")
print(f"      ALL INTERNAL:       {len(X_train_clean)+len(X_test_clean):>4} samples, {(y_train==1).sum()+(y_test==1).sum():>3} events ({((y_train==1).sum()+(y_test==1).sum())/(len(y_train)+len(y_test))*100:.1f}%)")
print(f"      External:           {len(X_external):>4} samples, {(y_external==1).sum():>3} events ({(y_external==1).sum()/len(y_external)*100:.1f}%)")

print(f"\n   Phase A Performance (Primary Validation):")
print(f"      Internal Test AUC:  {PHASE_A_RESULTS['performance']['internal_test']['auc_calibrated']:.4f}")
print(f"      External AUC:       {PHASE_A_RESULTS['performance']['external_primary']['auc_calibrated']:.4f}")

# Get model info
final_tier_key = f"tier_{FINAL_MODEL['tier']}"
final_features = FINAL_MODEL['features']
final_scaler = TRAINED_MODELS[final_tier_key]['models'][FINAL_MODEL['algorithm']]['scaler']

# Get hyperparameters from original model
original_model = TRAINED_MODELS[final_tier_key]['models'][FINAL_MODEL['algorithm']]['model']
model_params = original_model.get_params()

print(f"\n   Model: {FINAL_MODEL['algorithm']} (Tier {FINAL_MODEL['tier']}, {FINAL_MODEL['n_features']} features)")
print(f"   Frozen Hyperparameters:")
for key, value in model_params.items():
    if key not in ['n_jobs', 'verbose', 'random_state']:
        print(f"      {key}: {value}")

# ══════════════════════════════════════════════════════════════════════════════
# 2. COMBINE ALL INTERNAL DATA
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"2. COMBINING ALL INTERNAL DATA")
print(f"{'='*100}\n")

# Combine train + test
X_all_internal = pd.concat([X_train_clean, X_test_clean], axis=0, ignore_index=True)
y_all_internal = pd.concat([y_train, y_test], axis=0, ignore_index=True)

print(f"   ✓ Combined internal data:")
print(f"      Total samples:  {len(X_all_internal)}")
print(f"      Total events:   {(y_all_internal==1).sum()} ({(y_all_internal==1).sum()/len(y_all_internal)*100:.1f}%)")
print(f"      Survivors:      {(y_all_internal==0).sum()} ({(y_all_internal==0).sum()/len(y_all_internal)*100:.1f}%)")

# Select features
X_all_internal_final = X_all_internal[final_features].copy()
X_external_final = X_external[final_features].copy()

# Apply scaling
if final_scaler is not None:
    X_all_internal_processed = final_scaler.transform(X_all_internal_final)
    X_external_processed = final_scaler.transform(X_external_final)
    print(f"\n   ✓ Scaling applied (StandardScaler from Phase A)")
else:
    X_all_internal_processed = X_all_internal_final.values
    X_external_processed = X_external_final.values
    print(f"\n   No scaling required")

print(f"\n   ✓ Datasets prepared:")
print(f"      All Internal:  {X_all_internal_processed.shape}")
print(f"      External:      {X_external_processed.shape}")

# ══════════════════════════════════════════════════════════════════════════════
# 3. RETRAIN SVM ON ALL INTERNAL DATA
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"3. RETRAINING SVM ON ALL INTERNAL DATA (n=476)")
print(f"{'='*100}\n")

print(f"   Retraining SVM-RBF with frozen hyperparameters...")

# Create new SVM with same hyperparameters
final_svm = SVC(
    C=model_params['C'],
    kernel=model_params['kernel'],
    gamma=model_params['gamma'],
    probability=True,
    random_state=42,
    class_weight=model_params.get('class_weight', None)
)

# Fit on all internal data
final_svm.fit(X_all_internal_processed, y_all_internal)

print(f"   ✓ SVM retrained on n={len(X_all_internal_processed)}")
print(f"      Support vectors: {final_svm.n_support_}")

# Get raw predictions
y_all_internal_raw = final_svm.predict_proba(X_all_internal_processed)[:, 1]
y_external_raw_final = final_svm.predict_proba(X_external_processed)[:, 1]

# Raw performance
auc_internal_raw_final = roc_auc_score(y_all_internal, y_all_internal_raw)
auc_external_raw_final = roc_auc_score(y_external, y_external_raw_final)
brier_internal_raw_final = brier_score_loss(y_all_internal, y_all_internal_raw)
brier_external_raw_final = brier_score_loss(y_external, y_external_raw_final)

print(f"\n   Raw Performance (Uncalibrated):")
print(f"      {'Dataset':<20} {'AUC':>8} {'Brier':>8}")
print(f"      {'-'*38}")
print(f"      {'All Internal':<20} {auc_internal_raw_final:>8.4f} {brier_internal_raw_final:>8.4f}")
print(f"      {'External':<20} {auc_external_raw_final:>8.4f} {brier_external_raw_final:>8.4f}")

# ══════════════════════════════════════════════════════════════════════════════
# 4. CALIBRATE FINAL MODEL (5-FOLD CV)
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"4. CALIBRATING FINAL MODEL (5-FOLD CV ON ALL INTERNAL)")
print(f"{'='*100}\n")

print(f"   Fitting CalibratedClassifierCV on all internal data (n={len(X_all_internal_processed)})...")
print(f"      Method: Platt scaling (sigmoid)")
print(f"      Cross-validation: 5-fold")

# Create calibrated classifier
final_calibrated_model = CalibratedClassifierCV(
    estimator=final_svm,
    method='sigmoid',
    cv=5,
    n_jobs=-1
)

# Fit on all internal data
final_calibrated_model.fit(X_all_internal_processed, y_all_internal)

print(f"\n   ✓ Calibration complete!")
print(f"      Number of calibrators: {len(final_calibrated_model.calibrated_classifiers_)}")

# Extract average Platt parameters
platt_slopes = []
platt_intercepts = []
for cal_clf in final_calibrated_model.calibrated_classifiers_:
    platt_lr = cal_clf.calibrators[0]
    if hasattr(platt_lr, 'coef_'):
        platt_slopes.append(platt_lr.coef_[0][0])
        platt_intercepts.append(platt_lr.intercept_[0])

avg_platt_slope = np.mean(platt_slopes)
avg_platt_intercept = np.mean(platt_intercepts)

print(f"      Average Platt parameters (across 5 folds):")
print(f"         Slope:     {avg_platt_slope:.4f} (±{np.std(platt_slopes):.4f})")
print(f"         Intercept: {avg_platt_intercept:.4f} (±{np.std(platt_intercepts):.4f})")

# ══════════════════════════════════════════════════════════════════════════════
# 5. CALIBRATED PREDICTIONS - CONFIRMATORY EXTERNAL VALIDATION
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"5. CONFIRMATORY EXTERNAL VALIDATION")
print(f"{'='*100}\n")

# Get calibrated predictions
y_all_internal_calib_final = final_calibrated_model.predict_proba(X_all_internal_processed)[:, 1]
y_external_calib_final = final_calibrated_model.predict_proba(X_external_processed)[:, 1]

print(f"   ✓ Calibrated predictions obtained")

# Calibrated performance
auc_internal_calib_final = roc_auc_score(y_all_internal, y_all_internal_calib_final)
auc_external_calib_final = roc_auc_score(y_external, y_external_calib_final)
brier_internal_calib_final = brier_score_loss(y_all_internal, y_all_internal_calib_final)
brier_external_calib_final = brier_score_loss(y_external, y_external_calib_final)

print(f"\n   Calibrated Performance:")
print(f"      {'Dataset':<20} {'AUC':>8} {'Brier':>8} {'ΔAUC':>8} {'ΔBrier':>8}")
print(f"      {'-'*58}")
print(f"      {'All Internal':<20} {auc_internal_calib_final:>8.4f} {brier_internal_calib_final:>8.4f} {(auc_internal_calib_final-auc_internal_raw_final):>+8.4f} {(brier_internal_calib_final-brier_internal_raw_final):>+8.4f}")
print(f"      {'External (Final)':<20} {auc_external_calib_final:>8.4f} {brier_external_calib_final:>8.4f} {(auc_external_calib_final-auc_external_raw_final):>+8.4f} {(brier_external_calib_final-brier_external_raw_final):>+8.4f}")

# ══════════════════════════════════════════════════════════════════════════════
# 6. DETAILED CALIBRATION METRICS
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"6. DETAILED CALIBRATION METRICS")
print(f"{'='*100}\n")

def hosmer_lemeshow_test(y_true, y_pred, n_bins=10):
    """Hosmer-Lemeshow goodness-of-fit test"""
    bins = np.linspace(0, 1, n_bins + 1)
    bin_indices = np.digitize(y_pred, bins[:-1]) - 1
    bin_indices = np.clip(bin_indices, 0, n_bins - 1)
    
    observed = np.zeros(n_bins)
    expected = np.zeros(n_bins)
    total = np.zeros(n_bins)
    
    for i in range(n_bins):
        mask = bin_indices == i
        total[i] = np.sum(mask)
        if total[i] > 0:
            observed[i] = np.sum(y_true[mask])
            expected[i] = np.sum(y_pred[mask])
    
    mask = total > 0
    observed = observed[mask]
    expected = expected[mask]
    total = total[mask]
    
    hl_statistic = np.sum((observed - expected) ** 2 / (expected * (1 - expected / total) + 1e-10))
    df = len(observed) - 2
    p_value = 1 - stats.chi2.cdf(hl_statistic, df)
    
    return hl_statistic, p_value, df

def expected_calibration_error(y_true, y_pred, n_bins=10):
    """Expected Calibration Error"""
    bins = np.linspace(0, 1, n_bins + 1)
    bin_indices = np.digitize(y_pred, bins[:-1]) - 1
    bin_indices = np.clip(bin_indices, 0, n_bins - 1)
    
    ece = 0
    for i in range(n_bins):
        mask = bin_indices == i
        if np.sum(mask) > 0:
            bin_acc = np.mean(y_true[mask])
            bin_conf = np.mean(y_pred[mask])
            ece += np.sum(mask) * np.abs(bin_acc - bin_conf)
    
    return ece / len(y_true)

def calibration_slope_intercept(y_true, y_pred):
    """Calibration slope and intercept"""
    from sklearn.linear_model import LogisticRegression
    logit = np.log(y_pred / (1 - y_pred + 1e-10))
    lr = LogisticRegression(penalty=None, max_iter=1000)
    lr.fit(logit.reshape(-1, 1), y_true)
    return lr.coef_[0][0], lr.intercept_[0]

print(f"   Computing calibration metrics...\n")

# External (Final)
hl_ext_final_stat, hl_ext_final_p, hl_ext_final_df = hosmer_lemeshow_test(y_external.values, y_external_calib_final)
ece_ext_final = expected_calibration_error(y_external.values, y_external_calib_final)
slope_ext_final, intercept_ext_final = calibration_slope_intercept(y_external.values, y_external_calib_final)
prob_true_ext_final, prob_pred_ext_final = calibration_curve(y_external, y_external_calib_final, n_bins=10, strategy='quantile')

print(f"   CALIBRATION METRICS — EXTERNAL (CONFIRMATORY) (n={len(y_external)}):")
print(f"      {'Metric':<30} {'Value':>12} {'Status'}")
print(f"      {'-'*50}")
print(f"      {'AUC':<30} {auc_external_calib_final:>12.4f} {'✓' if auc_external_calib_final >= 0.70 else 'X'}")
print(f"      {'Brier Score':<30} {brier_external_calib_final:>12.4f} {'✓' if brier_external_calib_final <= 0.20 else 'X'}")
print(f"      {'ECE':<30} {ece_ext_final:>12.4f} {'✓' if ece_ext_final <= 0.10 else 'X'}")
print(f"      {'Hosmer-Lemeshow p-value':<30} {hl_ext_final_p:>12.4f} {'✓' if hl_ext_final_p > 0.05 else 'X'}")
print(f"      {'Calibration Slope':<30} {slope_ext_final:>12.4f} {'✓' if 0.8 <= slope_ext_final <= 1.2 else 'X'}")
print(f"      {'Calibration Intercept':<30} {intercept_ext_final:>12.4f} {'✓' if abs(intercept_ext_final) <= 0.2 else 'X'}")

# ══════════════════════════════════════════════════════════════════════════════
# 7. PHASE A vs PHASE B COMPARISON
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"7. PHASE A vs PHASE B COMPARISON (EXTERNAL)")
print(f"{'='*100}\n")

# Get Phase A external results
phase_a_auc = PHASE_A_RESULTS['performance']['external_primary']['auc_calibrated']
phase_a_brier = PHASE_A_RESULTS['performance']['external_primary']['brier_calibrated']
phase_a_hl_p = PHASE_A_RESULTS['performance']['external_primary']['hl_p_value']
phase_a_slope = PHASE_A_RESULTS['performance']['external_primary']['calibration_slope']
phase_a_intercept = PHASE_A_RESULTS['performance']['external_primary']['calibration_intercept']

comparison_df = pd.DataFrame({
    'Metric': ['Training Size', 'AUC', 'Brier Score', 'HL p-value', 'Calibration Slope', 'Calibration Intercept'],
    'Phase A (Primary)': [
        '380',
        f'{phase_a_auc:.4f}',
        f'{phase_a_brier:.4f}',
        f'{phase_a_hl_p:.4f}',
        f'{phase_a_slope:.4f}',
        f'{phase_a_intercept:.4f}'
    ],
    'Phase B (Final)': [
        '476',
        f'{auc_external_calib_final:.4f}',
        f'{brier_external_calib_final:.4f}',
        f'{hl_ext_final_p:.4f}',
        f'{slope_ext_final:.4f}',
        f'{intercept_ext_final:.4f}'
    ],
    'Change': [
        '+96',
        f'{(auc_external_calib_final - phase_a_auc):+.4f}',
        f'{(brier_external_calib_final - phase_a_brier):+.4f}',
        f'{(hl_ext_final_p - phase_a_hl_p):+.4f}',
        f'{(slope_ext_final - phase_a_slope):+.4f}',
        f'{(intercept_ext_final - phase_a_intercept):+.4f}'
    ]
})

print(comparison_df.to_string(index=False))

print(f"\n   Performance Summary:")
if auc_external_calib_final > phase_a_auc:
    print(f"      ✓ AUC improved by {(auc_external_calib_final - phase_a_auc):.4f}")
else:
    print(f"      AUC changed by {(auc_external_calib_final - phase_a_auc):+.4f}")

if hl_ext_final_p > phase_a_hl_p:
    print(f"      ✓ Calibration improved (HL p: {phase_a_hl_p:.4f} → {hl_ext_final_p:.4f})")
else:
    print(f"      Calibration: HL p {phase_a_hl_p:.4f} → {hl_ext_final_p:.4f}")

if abs(slope_ext_final - 1.0) < abs(phase_a_slope - 1.0):
    print(f"      ✓ Slope closer to ideal (1.0): {phase_a_slope:.4f} → {slope_ext_final:.4f}")
else:
    print(f"      Slope: {phase_a_slope:.4f} → {slope_ext_final:.4f}")

# ══════════════════════════════════════════════════════════════════════════════
# 8. SHAP ANALYSIS — FINAL MODEL (EXTERNAL)
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"8. SHAP ANALYSIS — FINAL MODEL (EXTERNAL)")
print(f"{'='*100}\n")

print(f"   Computing SHAP values for external set (n={len(X_external_final)})...")
print(f"   Estimated time: ~40-60 minutes (KernelExplainer for SVM)")

# Create SHAP explainer for final model
background_ext_final = shap.sample(X_external_processed, 100)

def model_predict_ext_final(X):
    return final_svm.predict_proba(X)[:, 1]

explainer_ext_final = shap.KernelExplainer(model_predict_ext_final, background_ext_final)
shap_values_ext_final = explainer_ext_final.shap_values(X_external_processed)

# Ensure 2D
if isinstance(shap_values_ext_final, list):
    shap_values_ext_final = shap_values_ext_final[1] if len(shap_values_ext_final) > 1 else shap_values_ext_final[0]
if shap_values_ext_final.ndim > 2:
    shap_values_ext_final = shap_values_ext_final[:, :, 1] if shap_values_ext_final.shape[2] == 2 else shap_values_ext_final.squeeze()

print(f"\n   ✓ SHAP values computed")
print(f"      Shape: {shap_values_ext_final.shape}")

# Feature importance
shap_importance_ext_final = pd.DataFrame({
    'Feature': final_features,
    'Mean_SHAP': np.abs(shap_values_ext_final).mean(axis=0)
}).sort_values('Mean_SHAP', ascending=False)

print(f"\n   Top 10 Features (External - Final Model):\n")
print(f"      {'Rank':<6} {'Feature':<35} {'Mean |SHAP|':>12}")
print(f"      {'-'*55}")
for idx, (i, row) in enumerate(shap_importance_ext_final.head(10).iterrows(), 1):
    print(f"      {idx:<6} {row['Feature']:<35} {row['Mean_SHAP']:>12.4f}")

# Compare with Phase A
phase_a_shap = PHASE_A_RESULTS['shap']['external_primary']['feature_importance']

shap_comparison = pd.merge(
    phase_a_shap[['Feature', 'Mean_SHAP']].rename(columns={'Mean_SHAP': 'Phase_A'}),
    shap_importance_ext_final[['Feature', 'Mean_SHAP']].rename(columns={'Mean_SHAP': 'Phase_B'}),
    on='Feature'
)

corr_phase_ab = shap_comparison['Phase_A'].corr(shap_comparison['Phase_B'], method='spearman')

print(f"\n   Phase A vs Phase B SHAP Correlation:")
print(f"      Spearman: {corr_phase_ab:.4f}")
if corr_phase_ab > 0.9:
    print(f"      ✓ Excellent consistency - feature importance unchanged")
elif corr_phase_ab > 0.7:
    print(f"      ✓ Good consistency")
else:
    print(f"      Moderate consistency")

# ══════════════════════════════════════════════════════════════════════════════
# 9. VISUALIZATION — FIGURE 12B (FINAL MODEL)
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"9. GENERATING FIGURE 12B — FINAL DEPLOYABLE MODEL")
print(f"{'='*100}\n")

fig = plt.figure(figsize=(28, 16), dpi=300)
gs = fig.add_gridspec(4, 4, hspace=0.40, wspace=0.35)

# ROW 1: EXTERNAL VALIDATION - PHASE COMPARISON

# Plot 1: External ROC - Phase A vs Phase B
ax1 = fig.add_subplot(gs[0, 0])
fpr_ext_final, tpr_ext_final, _ = roc_curve(y_external, y_external_calib_final)
fpr_ext_a = roc_curve(y_external, PHASE_A_RESULTS['predictions']['external_calibrated'])[0:2]
ax1.plot([0, 1], [0, 1], 'k--', linewidth=2, alpha=0.3, label='Chance')
ax1.plot(fpr_ext_a[0], fpr_ext_a[1], linewidth=3, color=COLORS['primary'], alpha=0.6,
         label=f'Phase A (n=380)\nAUC = {phase_a_auc:.3f}')
ax1.plot(fpr_ext_final, tpr_ext_final, linewidth=3, color=COLORS['secondary'],
         label=f'Phase B (n=476)\nAUC = {auc_external_calib_final:.3f}')
ax1.set_xlabel('False Positive Rate', fontsize=12, fontweight='bold')
ax1.set_ylabel('True Positive Rate', fontsize=12, fontweight='bold')
ax1.set_title('A. External ROC: Phase A vs Phase B', fontsize=13, fontweight='bold', loc='left')
ax1.legend(fontsize=10, loc='lower right')
ax1.grid(alpha=0.3)

# Plot 2: Calibration Curves - Phase A vs Phase B
ax2 = fig.add_subplot(gs[0, 1])
prob_true_ext_a, prob_pred_ext_a = calibration_curve(y_external, PHASE_A_RESULTS['predictions']['external_calibrated'], n_bins=10, strategy='quantile')
ax2.plot([0, 1], [0, 1], 'k--', linewidth=2, label='Perfect')
ax2.plot(prob_pred_ext_a, prob_true_ext_a, marker='o', linewidth=3, markersize=8,
         color=COLORS['primary'], alpha=0.6, label=f'Phase A\nHL p={phase_a_hl_p:.3f}')
ax2.plot(prob_pred_ext_final, prob_true_ext_final, marker='s', linewidth=3, markersize=8,
         color=COLORS['secondary'], label=f'Phase B\nHL p={hl_ext_final_p:.3f}')
ax2.set_xlabel('Predicted Probability', fontsize=12, fontweight='bold')
ax2.set_ylabel('Observed Probability', fontsize=12, fontweight='bold')
ax2.set_title('B. External Calibration: Phase A vs Phase B', fontsize=13, fontweight='bold', loc='left')
ax2.legend(fontsize=10)
ax2.grid(alpha=0.3)
ax2.set_xlim([0, 1])
ax2.set_ylim([0, 1])

# Plot 3: Performance Improvement
ax3 = fig.add_subplot(gs[0, 2])
metrics = ['AUC', 'Brier', 'HL p']
phase_a_vals = [phase_a_auc, phase_a_brier, phase_a_hl_p]
phase_b_vals = [auc_external_calib_final, brier_external_calib_final, hl_ext_final_p]
x = np.arange(len(metrics))
width = 0.35
ax3.bar(x - width/2, phase_a_vals, width, label='Phase A (n=380)', 
        color=COLORS['primary'], alpha=0.7, edgecolor='black')
ax3.bar(x + width/2, phase_b_vals, width, label='Phase B (n=476)', 
        color=COLORS['secondary'], alpha=0.7, edgecolor='black')
ax3.set_ylabel('Value', fontsize=12, fontweight='bold')
ax3.set_title('C. External Performance: Phase A vs B', fontsize=13, fontweight='bold', loc='left')
ax3.set_xticks(x)
ax3.set_xticklabels(metrics)
ax3.legend(fontsize=10)
ax3.grid(alpha=0.3, axis='y')

# Plot 4: Calibration Slope Comparison
ax4 = fig.add_subplot(gs[0, 3])
slopes = [phase_a_slope, slope_ext_final]
datasets = ['Phase A', 'Phase B']
bars = ax4.bar(datasets, slopes, color=[COLORS['primary'], COLORS['secondary']], 
               alpha=0.7, edgecolor='black', linewidth=2)
ax4.axhspan(0.8, 1.2, alpha=0.2, color='green', label='Ideal range')
ax4.axhline(y=1.0, color='green', linestyle='--', linewidth=2, label='Perfect')
for bar, slope in zip(bars, slopes):
    height = bar.get_height()
    ax4.text(bar.get_x() + bar.get_width()/2., height + 0.05,
             f'{slope:.3f}', ha='center', va='bottom', fontsize=11, fontweight='bold')
ax4.set_ylabel('Calibration Slope', fontsize=12, fontweight='bold')
ax4.set_title('D. Calibration Slope Comparison', fontsize=13, fontweight='bold', loc='left')
ax4.set_ylim([0, max(slopes) + 0.3])
ax4.legend(fontsize=9)
ax4.grid(alpha=0.3, axis='y')

# ROW 2: SHAP FEATURE IMPORTANCE - FINAL MODEL

# Plot 5: SHAP - External (Final Model)
ax5 = fig.add_subplot(gs[1, :2])
top_n = 10
shap_top_ext_final = shap_importance_ext_final.head(top_n).iloc[::-1]
ax5.barh(range(len(shap_top_ext_final)), shap_top_ext_final['Mean_SHAP'], 
        color=COLORS['secondary'], alpha=0.7, edgecolor='black', linewidth=1.5)
ax5.set_yticks(range(len(shap_top_ext_final)))
ax5.set_yticklabels(shap_top_ext_final['Feature'], fontsize=11)
ax5.set_xlabel('Mean |SHAP Value|', fontsize=12, fontweight='bold')
ax5.set_title('E. Feature Importance: Final Model (External)', fontsize=13, fontweight='bold', loc='left')
ax5.grid(alpha=0.3, axis='x')

# Plot 6: SHAP Comparison - Phase A vs Phase B
ax6 = fig.add_subplot(gs[1, 2:])
top5_shap_comp = shap_comparison.head(5)
x_pos = np.arange(len(top5_shap_comp))
width = 0.35
ax6.barh(x_pos - width/2, top5_shap_comp['Phase_A'], width, 
         label='Phase A', color=COLORS['primary'], alpha=0.7, edgecolor='black')
ax6.barh(x_pos + width/2, top5_shap_comp['Phase_B'], width, 
         label='Phase B', color=COLORS['secondary'], alpha=0.7, edgecolor='black')
ax6.set_yticks(x_pos)
ax6.set_yticklabels(top5_shap_comp['Feature'], fontsize=10)
ax6.set_xlabel('Mean |SHAP Value|', fontsize=12, fontweight='bold')
ax6.set_title(f'F. Top 5 Features: Phase A vs B (r={corr_phase_ab:.3f})', fontsize=13, fontweight='bold', loc='left')
ax6.legend(fontsize=10)
ax6.grid(alpha=0.3, axis='x')

# ROW 3: SHAP BEESWARM - FINAL MODEL

# Plot 7: SHAP Beeswarm - External (Final Model, top 8)
ax7 = fig.add_subplot(gs[2, :])
top_features_idx_final = [list(final_features).index(feat) for feat in shap_importance_ext_final.head(8)['Feature'].values]
for idx, feat_idx in enumerate(top_features_idx_final):
    shap_vals = shap_values_ext_final[:, feat_idx]
    feature_vals = X_external_final.iloc[:, feat_idx].values
    feature_vals_norm = (feature_vals - feature_vals.min()) / (feature_vals.max() - feature_vals.min() + 1e-10)
    y_pos = idx + np.random.normal(0, 0.15, len(shap_vals))
    scatter = ax7.scatter(shap_vals, y_pos, c=feature_vals_norm, cmap='RdBu_r', 
                         s=30, alpha=0.6, edgecolors='none')
ax7.set_yticks(range(len(top_features_idx_final)))
ax7.set_yticklabels([final_features[i] for i in top_features_idx_final], fontsize=11)
ax7.set_xlabel('SHAP Value', fontsize=12, fontweight='bold')
ax7.set_title('G. SHAP Beeswarm: Final Model (External, Top 8 Features)', fontsize=13, fontweight='bold', loc='left')
ax7.axvline(x=0, color='black', linestyle='--', linewidth=1)
ax7.grid(alpha=0.3, axis='x')
cbar = plt.colorbar(scatter, ax=ax7, pad=0.01)
cbar.set_label('Feature Value (Low → High)', fontsize=10)

# ROW 4: RISK DISTRIBUTIONS

# Plot 8: Risk Distribution - External (Phase A)
ax8 = fig.add_subplot(gs[3, 0])
ax8.hist(PHASE_A_RESULTS['predictions']['external_calibrated'][y_external == 0], bins=25, alpha=0.6, color='blue', 
         label=f'Survivors (n={np.sum(y_external==0)})', edgecolor='black')
ax8.hist(PHASE_A_RESULTS['predictions']['external_calibrated'][y_external == 1], bins=25, alpha=0.6, color='red', 
         label=f'Deaths (n={np.sum(y_external==1)})', edgecolor='black')
ax8.set_xlabel('Predicted Risk', fontsize=12, fontweight='bold')
ax8.set_ylabel('Frequency', fontsize=12, fontweight='bold')
ax8.set_title('H. Risk Distribution: Phase A', fontsize=13, fontweight='bold', loc='left')
ax8.legend(fontsize=10)
ax8.grid(alpha=0.3, axis='y')

# Plot 9: Risk Distribution - External (Phase B)
ax9 = fig.add_subplot(gs[3, 1])
ax9.hist(y_external_calib_final[y_external == 0], bins=25, alpha=0.6, color='blue', 
         label=f'Survivors (n={np.sum(y_external==0)})', edgecolor='black')
ax9.hist(y_external_calib_final[y_external == 1], bins=25, alpha=0.6, color='red', 
         label=f'Deaths (n={np.sum(y_external==1)})', edgecolor='black')
ax9.set_xlabel('Predicted Risk', fontsize=12, fontweight='bold')
ax9.set_ylabel('Frequency', fontsize=12, fontweight='bold')
ax9.set_title('I. Risk Distribution: Phase B (Final)', fontsize=13, fontweight='bold', loc='left')
ax9.legend(fontsize=10)
ax9.grid(alpha=0.3, axis='y')

# Plot 10: AUC Progression
ax10 = fig.add_subplot(gs[3, 2])
phases = ['Phase A\n(n=380)', 'Phase B\n(n=476)']
aucs_ext = [phase_a_auc, auc_external_calib_final]
ax10.plot(phases, aucs_ext, marker='o', linewidth=3, markersize=12, color=COLORS['secondary'])
for i, (phase, auc) in enumerate(zip(phases, aucs_ext)):
    ax10.text(i, auc + 0.005, f'{auc:.4f}', ha='center', va='bottom', fontsize=11, fontweight='bold')
ax10.set_ylabel('AUC', fontsize=12, fontweight='bold')
ax10.set_title('J. External AUC Progression', fontsize=13, fontweight='bold', loc='left')
ax10.set_ylim([0.75, 0.80])
ax10.grid(alpha=0.3, axis='y')

# Plot 11: Calibration Metrics Summary
ax11 = fig.add_subplot(gs[3, 3])
cal_metrics = ['Slope', 'Intercept', 'ECE']
phase_a_cal = [phase_a_slope, abs(phase_a_intercept), PHASE_A_RESULTS['performance']['external_primary']['ece']]
phase_b_cal = [slope_ext_final, abs(intercept_ext_final), ece_ext_final]
x = np.arange(len(cal_metrics))
width = 0.35
ax11.bar(x - width/2, phase_a_cal, width, label='Phase A', 
        color=COLORS['primary'], alpha=0.7, edgecolor='black')
ax11.bar(x + width/2, phase_b_cal, width, label='Phase B', 
        color=COLORS['secondary'], alpha=0.7, edgecolor='black')
ax11.set_ylabel('Value', fontsize=12, fontweight='bold')
ax11.set_title('K. Calibration Metrics Comparison', fontsize=13, fontweight='bold', loc='left')
ax11.set_xticks(x)
ax11.set_xticklabels(cal_metrics)
ax11.legend(fontsize=10)
ax11.grid(alpha=0.3, axis='y')

# OVERALL TITLE
fig.suptitle(f'Figure 12B. FINAL DEPLOYABLE MODEL — Phase B Confirmatory Validation\n'
             f'{FINAL_MODEL["algorithm"]} (Tier {FINAL_MODEL["tier"]}, {FINAL_MODEL["n_features"]} features) | '
             f'Phase A (n=380): AUC={phase_a_auc:.3f} | '
             f'Phase B (n=476): AUC={auc_external_calib_final:.3f} | '
             f'External Calibration: HL p={hl_ext_final_p:.3f}, Slope={slope_ext_final:.3f}', 
             fontsize=16, fontweight='bold', y=0.995)

save_figure(fig, 'step16b_fig12b_final_model')
plt.show()

print(f"   ✓ Figure 12B saved")

# ══════════════════════════════════════════════════════════════════════════════
# 10. EXPORT DEPLOYMENT BUNDLE
# ══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"10. EXPORTING DEPLOYMENT BUNDLE")
print(f"{'='*100}\n")

# Create deployment bundle
DEPLOYMENT_BUNDLE = {
    'model_info': {
        'name': 'PULSE-IABP',
        'algorithm': FINAL_MODEL['algorithm'],
        'tier': FINAL_MODEL['tier'],
        'n_features': FINAL_MODEL['n_features'],
        'features': final_features,
        'version': '1.0.0',
        'timestamp': '2025-10-20 01:54:11 UTC',
        'user': 'zainzampawala786-sudo',
    },
    'training_info': {
        'internal_training_size': len(X_all_internal),
        'internal_events': int((y_all_internal==1).sum()),
        'external_validation_size': len(X_external),
        'external_events': int((y_external==1).sum()),
        'hyperparameters': {k: v for k, v in model_params.items() if k not in ['verbose', 'n_jobs']},
        'random_state': 42,
    },
    'models': {
        'base_svm': final_svm,
        'calibrated_svm': final_calibrated_model,
        'scaler': final_scaler,
    },
    'calibration': {
        'method': 'Platt scaling (sigmoid)',
        'cv_folds': 5,
        'average_slope': float(avg_platt_slope),
        'average_intercept': float(avg_platt_intercept),
        'slopes_std': float(np.std(platt_slopes)),
        'intercepts_std': float(np.std(platt_intercepts)),
    },
    'performance': {
        'phase_a': PHASE_A_RESULTS['performance'],
        'phase_b': {
            'external_confirmatory': {
                'auc': float(auc_external_calib_final),
                'brier': float(brier_external_calib_final),
                'ece': float(ece_ext_final),
                'hl_statistic': float(hl_ext_final_stat),
                'hl_p_value': float(hl_ext_final_p),
                'hl_df': int(hl_ext_final_df),
                'calibration_slope': float(slope_ext_final),
                'calibration_intercept': float(intercept_ext_final),
            }
        }
    },
    'shap': {
        'feature_importance': shap_importance_ext_final,
        'shap_values': shap_values_ext_final,
        'phase_a_b_correlation': float(corr_phase_ab),
    },
    'predictions': {
        'all_internal_calibrated': y_all_internal_calib_final,
        'external_calibrated': y_external_calib_final,
    },
    'tripod_compliance': True,
    'tripod_type': 'Type 2b - External validation',
}

save_pickle(DEPLOYMENT_BUNDLE, 'step16b_deployment_bundle')

# Save metadata as JSON
metadata = {
    'model_name': 'PULSE-IABP',
    'version': '1.0.0',
    'algorithm': FINAL_MODEL['algorithm'],
    'features': final_features,
    'training_size': len(X_all_internal),
    'external_validation_size': len(X_external),
    'performance': {
        'external_auc': float(auc_external_calib_final),
        'external_brier': float(brier_external_calib_final),
        'external_hl_p': float(hl_ext_final_p),
    },
    'timestamp': '2025-10-20 01:54:11 UTC',
    'user': 'zainzampawala786-sudo',
}

with open(f"{OUTPUT_DIR}/step16b_deployment_metadata.json", 'w') as f:
    json.dump(metadata, f, indent=2)

# Summary tables
performance_table_final = pd.DataFrame({
    'Phase': ['A (Primary)', 'B (Confirmatory)'],
    'Training_Size': [380, 476],
    'External_N': [len(y_external), len(y_external)],
    'AUC': [phase_a_auc, auc_external_calib_final],
    'Brier': [phase_a_brier, brier_external_calib_final],
    'ECE': [PHASE_A_RESULTS['performance']['external_primary']['ece'], ece_ext_final],
    'HL_p_value': [phase_a_hl_p, hl_ext_final_p],
    'Calibration_Slope': [phase_a_slope, slope_ext_final],
    'Calibration_Intercept': [phase_a_intercept, intercept_ext_final],
})

save_csv(performance_table_final, 'step16b_phase_comparison')
save_csv(shap_importance_ext_final, 'step16b_shap_final_model')
save_csv(shap_comparison, 'step16b_shap_phase_comparison')

append_runlog("16B", {
    "phase": "final_deployable_model",
    "training_size": len(X_all_internal),
    "external_size": len(X_external),
    "external_confirmatory_auc": float(auc_external_calib_final),
    "external_confirmatory_brier": float(brier_external_calib_final),
    "external_confirmatory_hl_p": float(hl_ext_final_p),
    "phase_a_external_auc": float(phase_a_auc),
    "auc_improvement": float(auc_external_calib_final - phase_a_auc),
    "shap_phase_correlation": float(corr_phase_ab),
})

print(f"   ✓ Saved: step16b_deployment_bundle.pkl")
print(f"   ✓ Saved: step16b_deployment_metadata.json")
print(f"   ✓ Saved: step16b_phase_comparison.csv")
print(f"   ✓ Saved: step16b_shap_final_model.csv")
print(f"   ✓ Saved: step16b_shap_phase_comparison.csv")

print("\nStored: DEPLOYMENT_BUNDLE")

# ══════════════════════════════════════════════════════════════════════════════
# SUMMARY
# ══════════════════════════════════════════════════════════════════════════════

print("\n" + "="*100)
print("STEP 16B COMPLETE — FINAL DEPLOYABLE MODEL")
print("="*100)
print(f"\nPHASE B: FINAL DEPLOYABLE MODEL")
print(f"   Training Set:              n={len(X_all_internal)} (ALL internal data)")
print(f"   External Set:              n={len(X_external)} (confirmatory validation)")

print(f"\n   MODEL: {FINAL_MODEL['algorithm']} (Tier {FINAL_MODEL['tier']}, {FINAL_MODEL['n_features']} features)")
print(f"   CALIBRATION: Platt scaling (5-fold CV on all internal)")

print(f"\n   EXTERNAL PERFORMANCE (CONFIRMATORY VALIDATION):")
print(f"      AUC:                    {auc_external_calib_final:.4f}")
print(f"      Brier Score:            {brier_external_calib_final:.4f} {'✓' if brier_external_calib_final <= 0.20 else 'X'}")
print(f"      ECE:                    {ece_ext_final:.4f} {'✓' if ece_ext_final <= 0.10 else 'X'}")
print(f"      HL p-value:             {hl_ext_final_p:.4f} {'✓' if hl_ext_final_p > 0.05 else 'X'}")
print(f"      Calibration Slope:      {slope_ext_final:.4f} {'✓' if 0.8 <= slope_ext_final <= 1.2 else 'X'}")
print(f"      Calibration Intercept:  {intercept_ext_final:+.4f} {'✓' if abs(intercept_ext_final) <= 0.2 else 'X'}")

print(f"\n   PHASE A vs PHASE B (EXTERNAL):")
print(f"      AUC Change:             {(auc_external_calib_final - phase_a_auc):+.4f}")
print(f"      Brier Change:           {(brier_external_calib_final - phase_a_brier):+.4f}")
print(f"      HL p Change:            {(hl_ext_final_p - phase_a_hl_p):+.4f}")
print(f"      Slope Change:           {(slope_ext_final - phase_a_slope):+.4f}")

print(f"\n   SHAP CONSISTENCY:")
print(f"      Phase A-B Correlation:  {corr_phase_ab:.4f} {'✓' if corr_phase_ab > 0.7 else 'X'}")

print(f"\n   DEPLOYMENT BUNDLE:")
print(f"      Base SVM:               ✓ Saved")
print(f"      Calibrated SVM:         ✓ Saved")
print(f"      Scaler:                 ✓ Saved")
print(f"      Metadata (JSON):        ✓ Saved")
print(f"      SHAP Values:            ✓ Saved")

print(f"\nNEXT: Step 17 — PULSE-IABP Risk Score Development")
print(f"   Translate SVM to clinical point-based score")
print(f"   Create risk categories (Low/Medium/High/Very High)")
print(f"   Validate score on internal + external")
print(f"   Generate nomogram and web calculator")
print("="*100 + "\n")

In [None]:
# Save metadata as JSON (FIX)
with open("step16b_deployment_metadata.json", 'w') as f:
    json.dump(metadata, f, indent=2)

# Summary tables
performance_table_final = pd.DataFrame({
    'Phase': ['A (Primary)', 'B (Confirmatory)'],
    'Training_Size': [380, 476],
    'External_N': [len(y_external), len(y_external)],
    'AUC': [phase_a_auc, auc_external_calib_final],
    'Brier': [phase_a_brier, brier_external_calib_final],
    'ECE': [PHASE_A_RESULTS['performance']['external_primary']['ece'], ece_ext_final],
    'HL_p_value': [phase_a_hl_p, hl_ext_final_p],
    'Calibration_Slope': [phase_a_slope, slope_ext_final],
    'Calibration_Intercept': [phase_a_intercept, intercept_ext_final],
})

save_csv(performance_table_final, 'step16b_phase_comparison')
save_csv(shap_importance_ext_final, 'step16b_shap_final_model')
save_csv(shap_comparison, 'step16b_shap_phase_comparison')

append_runlog("16B", {
    "phase": "final_deployable_model",
    "training_size": len(X_all_internal),
    "external_size": len(X_external),
    "external_confirmatory_auc": float(auc_external_calib_final),
    "external_confirmatory_brier": float(brier_external_calib_final),
    "external_confirmatory_hl_p": float(hl_ext_final_p),
    "phase_a_external_auc": float(phase_a_auc),
    "auc_improvement": float(auc_external_calib_final - phase_a_auc),
    "shap_phase_correlation": float(corr_phase_ab),
})

print(f"   ✓ Saved: step16b_deployment_bundle.pkl")
print(f"   ✓ Saved: step16b_deployment_metadata.json")
print(f"   ✓ Saved: step16b_phase_comparison.csv")
print(f"   ✓ Saved: step16b_shap_final_model.csv")
print(f"   ✓ Saved: step16b_shap_phase_comparison.csv")

print("\nStored: DEPLOYMENT_BUNDLE")

print("\n" + "="*100)
print("STEP 16B COMPLETE — FINAL DEPLOYABLE MODEL")
print("="*100)
print(f"\nPHASE B: FINAL DEPLOYABLE MODEL")
print(f"   Training Set:              n={len(X_all_internal)} (ALL internal data)")
print(f"   External Set:              n={len(X_external)} (confirmatory validation)")

print(f"\n   MODEL: {FINAL_MODEL['algorithm']} (Tier {FINAL_MODEL['tier']}, {FINAL_MODEL['n_features']} features)")
print(f"   CALIBRATION: Platt scaling (5-fold CV on all internal)")

print(f"\n   EXTERNAL PERFORMANCE (CONFIRMATORY VALIDATION):")
print(f"      AUC:                    {auc_external_calib_final:.4f}")
print(f"      Brier Score:            {brier_external_calib_final:.4f} {'✓' if brier_external_calib_final <= 0.20 else 'X'}")
print(f"      ECE:                    {ece_ext_final:.4f} {'✓' if ece_ext_final <= 0.10 else 'X'}")
print(f"      HL p-value:             {hl_ext_final_p:.4f} {'✓' if hl_ext_final_p > 0.05 else 'X'}")
print(f"      Calibration Slope:      {slope_ext_final:.4f} {'✓' if 0.8 <= slope_ext_final <= 1.2 else 'X'}")
print(f"      Calibration Intercept:  {intercept_ext_final:+.4f} {'✓' if abs(intercept_ext_final) <= 0.2 else 'X'}")

print(f"\n   PHASE A vs PHASE B (EXTERNAL):")
print(f"      AUC Change:             {(auc_external_calib_final - phase_a_auc):+.4f}")
print(f"      Brier Change:           {(brier_external_calib_final - phase_a_brier):+.4f}")
print(f"      HL p Change:            {(hl_ext_final_p - phase_a_hl_p):+.4f}")
print(f"      Slope Change:           {(slope_ext_final - phase_a_slope):+.4f}")

print(f"\n   SHAP CONSISTENCY:")
print(f"      Phase A-B Correlation:  {corr_phase_ab:.4f} {'✓' if corr_phase_ab > 0.7 else 'X'}")

print(f"\n   DEPLOYMENT BUNDLE:")
print(f"      Base SVM:               ✓ Saved")
print(f"      Calibrated SVM:         ✓ Saved")
print(f"      Scaler:                 ✓ Saved")
print(f"      Metadata (JSON):        ✓ Saved")
print(f"      SHAP Values:            ✓ Saved")

print(f"\nNEXT: Step 17 — PULSE-IABP Risk Score Development")
print(f"   Translate SVM to clinical point-based score")
print(f"   Create risk categories (Low/Medium/High/Very High)")
print(f"   Validate score on internal + external")
print(f"   Generate nomogram and web calculator")
print("="*100 + "\n")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# QUICK ANALYSIS: PERCENTILE-BASED RISK SCORE DISTRIBUTION
# ═══════════════════════════════════════════════════════════════════════════════
# User: zainzampawala786-sudo
# Date: 2025-10-20 08:59:57 UTC
# ═══════════════════════════════════════════════════════════════════════════════

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

print("\n" + "="*100)
print("PERCENTILE-BASED RISK SCORE ANALYSIS")
print("="*100)
print(f"UTC: 2025-10-20 08:59:57")
print(f"User: zainzampawala786-sudo\n")

# ═══════════════════════════════════════════════════════════════════════════════
# 1. GET SVM PREDICTIONS (ACTUAL MORTALITY PROBABILITIES)
# ═══════════════════════════════════════════════════════════════════════════════

print(f"{'='*100}")
print(f"1. EXTRACTING SVM PREDICTIONS")
print(f"{'='*100}\n")

# Use the calibrated predictions from DEPLOYMENT_BUNDLE
y_internal_prob = DEPLOYMENT['predictions']['all_internal_calibrated']
y_external_prob = DEPLOYMENT['predictions']['external_calibrated']

print(f"   Internal Cohort (n={len(y_internal_prob)}):")
print(f"      Mean mortality risk:   {y_internal_prob.mean():.1%}")
print(f"      Median mortality risk: {np.median(y_internal_prob):.1%}")
print(f"      Range: [{y_internal_prob.min():.1%}, {y_internal_prob.max():.1%}]")
print(f"      IQR: [{np.percentile(y_internal_prob, 25):.1%}, {np.percentile(y_internal_prob, 75):.1%}]")

print(f"\n   External Cohort (n={len(y_external_prob)}):")
print(f"      Mean mortality risk:   {y_external_prob.mean():.1%}")
print(f"      Median mortality risk: {np.median(y_external_prob):.1%}")
print(f"      Range: [{y_external_prob.min():.1%}, {y_external_prob.max():.1%}]")
print(f"      IQR: [{np.percentile(y_external_prob, 25):.1%}, {np.percentile(y_external_prob, 75):.1%}]")

# ═══════════════════════════════════════════════════════════════════════════════
# 2. CALCULATE PERCENTILE-BASED SCORES (0-100)
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"2. CALCULATING PERCENTILE-BASED RISK SCORES")
print(f"{'='*100}\n")

# Reference distribution = Internal cohort
reference_risks = y_internal_prob

# Calculate percentile scores
def calculate_percentile_score(risk_prob, reference):
    """Calculate percentile rank: what % of reference is below this risk"""
    percentile = np.sum(reference < risk_prob) / len(reference) * 100
    return percentile

# Vectorized for all patients
scores_internal = np.array([calculate_percentile_score(r, reference_risks) for r in y_internal_prob])
scores_external = np.array([calculate_percentile_score(r, reference_risks) for r in y_external_prob])

print(f"   Internal Scores (n={len(scores_internal)}):")
print(f"      Mean score:   {scores_internal.mean():.1f}/100")
print(f"      Median score: {np.median(scores_internal):.1f}/100")
print(f"      Range: [{scores_internal.min():.1f}, {scores_internal.max():.1f}]")
print(f"      IQR: [{np.percentile(scores_internal, 25):.1f}, {np.percentile(scores_internal, 75):.1f}]")

print(f"\n   External Scores (n={len(scores_external)}):")
print(f"      Mean score:   {scores_external.mean():.1f}/100")
print(f"      Median score: {np.median(scores_external):.1f}/100")
print(f"      Range: [{scores_external.min():.1f}, {scores_external.max():.1f}]")
print(f"      IQR: [{np.percentile(scores_external, 25):.1f}, {np.percentile(scores_external, 75):.1f}]")

# ═══════════════════════════════════════════════════════════════════════════════
# 3. DEFINE RISK CATEGORIES
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"3. RISK CATEGORY DEFINITIONS")
print(f"{'='*100}\n")

def assign_category(score):
    """Assign risk category based on score"""
    if score < 25:
        return "Low Risk"
    elif score < 50:
        return "Medium Risk"
    elif score < 75:
        return "High Risk"
    else:
        return "Very High Risk"

categories_internal = np.array([assign_category(s) for s in scores_internal])
categories_external = np.array([assign_category(s) for s in scores_external])

print(f"   Risk Category Thresholds:")
print(f"      Low Risk:       0-24 /100")
print(f"      Medium Risk:   25-49 /100")
print(f"      High Risk:     50-74 /100")
print(f"      Very High Risk: 75-100 /100")

# ═══════════════════════════════════════════════════════════════════════════════
# 4. CATEGORY DISTRIBUTION & PERFORMANCE
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"4. CATEGORY DISTRIBUTION & MORTALITY RATES")
print(f"{'='*100}\n")

def analyze_categories(scores, categories, y_true, y_prob, cohort_name):
    """Analyze category distribution and performance"""
    print(f"   {cohort_name}:\n")
    print(f"      {'Category':<18} {'N':>6} {'%':>6} {'Deaths':>8} {'Mort %':>8} {'Avg Risk':>10} {'Avg Score':>11}")
    print(f"      {'-'*85}")
    
    results = []
    for cat in ["Low Risk", "Medium Risk", "High Risk", "Very High Risk"]:
        mask = categories == cat
        n = mask.sum()
        pct = n / len(categories) * 100
        deaths = y_true[mask].sum()
        mort_rate = deaths / n if n > 0 else 0
        avg_risk = y_prob[mask].mean() if n > 0 else 0
        avg_score = scores[mask].mean() if n > 0 else 0
        
        print(f"      {cat:<18} {n:>6} {pct:>5.1f}% {deaths:>8} {mort_rate:>7.1%} {avg_risk:>9.1%} {avg_score:>10.1f}/100")
        
        results.append({
            'Category': cat,
            'N': n,
            'Percent': pct,
            'Deaths': deaths,
            'Mortality_Rate': mort_rate,
            'Avg_Risk_Prob': avg_risk,
            'Avg_Score': avg_score
        })
    
    return pd.DataFrame(results)

# Internal
df_internal = analyze_categories(scores_internal, categories_internal, 
                                 y_all_internal.values, y_internal_prob, 
                                 "INTERNAL COHORT")

print()

# External
df_external = analyze_categories(scores_external, categories_external, 
                                 y_external.values, y_external_prob, 
                                 "EXTERNAL COHORT")

# ═══════════════════════════════════════════════════════════════════════════════
# 5. EXAMPLE PATIENTS
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"5. EXAMPLE PATIENT MAPPINGS")
print(f"{'='*100}\n")

# Show examples across risk spectrum
print(f"   Examples of Risk → Score Mapping:\n")
print(f"      {'Actual Risk':>15} {'→':>5} {'Score':>10} {'Category':<20}")
print(f"      {'-'*60}")

example_risks = [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75]
for risk in example_risks:
    score = calculate_percentile_score(risk, reference_risks)
    category = assign_category(score)
    print(f"      {risk:>14.1%}  →  {score:>9.1f}/100  {category:<20}")

# ═══════════════════════════════════════════════════════════════════════════════
# 6. PERFORMANCE VALIDATION (AUC PRESERVED?)
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"6. PERFORMANCE VALIDATION (SCORES VS PROBABILITIES)")
print(f"{'='*100}\n")

from sklearn.metrics import roc_auc_score, brier_score_loss

# Calculate AUC using scores vs probabilities
auc_internal_prob = roc_auc_score(y_all_internal, y_internal_prob)
auc_internal_score = roc_auc_score(y_all_internal, scores_internal)

auc_external_prob = roc_auc_score(y_external, y_external_prob)
auc_external_score = roc_auc_score(y_external, scores_external)

brier_internal_prob = brier_score_loss(y_all_internal, y_internal_prob)
brier_internal_score = brier_score_loss(y_all_internal, scores_internal / 100)  # Normalize to 0-1

brier_external_prob = brier_score_loss(y_external, y_external_prob)
brier_external_score = brier_score_loss(y_external, scores_external / 100)

print(f"   INTERNAL COHORT:")
print(f"      Using Probabilities:  AUC = {auc_internal_prob:.4f}, Brier = {brier_internal_prob:.4f}")
print(f"      Using Scores:         AUC = {auc_internal_score:.4f}, Brier = {brier_internal_score:.4f}")
print(f"      ΔAUC:  {auc_internal_score - auc_internal_prob:+.4f}")
print(f"      ΔBrier: {brier_internal_score - brier_internal_prob:+.4f}")

print(f"\n   EXTERNAL COHORT:")
print(f"      Using Probabilities:  AUC = {auc_external_prob:.4f}, Brier = {brier_external_prob:.4f}")
print(f"      Using Scores:         AUC = {auc_external_score:.4f}, Brier = {brier_external_score:.4f}")
print(f"      ΔAUC:  {auc_external_score - auc_external_prob:+.4f}")
print(f"      ΔBrier: {brier_external_score - brier_external_prob:+.4f}")

print(f"\n   ✓ AUC is PRESERVED (scores are monotonic transformation)")

# ═══════════════════════════════════════════════════════════════════════════════
# 7. VISUALIZATION
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"7. GENERATING VISUALIZATION")
print(f"{'='*100}\n")

fig, axes = plt.subplots(2, 3, figsize=(20, 12), dpi=300)
fig.suptitle('Percentile-Based Risk Score Analysis\nSVM Probability → 0-100 Score Transformation',
             fontsize=16, fontweight='bold', y=0.995)

# Panel A: Risk probability distribution
ax = axes[0, 0]
ax.hist(y_internal_prob, bins=30, alpha=0.6, color='#3498db', edgecolor='black', label='Internal')
ax.hist(y_external_prob, bins=30, alpha=0.6, color='#e74c3c', edgecolor='black', label='External')
ax.set_xlabel('Mortality Risk (Probability)', fontsize=11, fontweight='bold')
ax.set_ylabel('Frequency', fontsize=11, fontweight='bold')
ax.set_title('A. Original SVM Risk Predictions', fontsize=12, fontweight='bold', loc='left')
ax.legend(fontsize=10)
ax.grid(alpha=0.3, axis='y')

# Panel B: Score distribution
ax = axes[0, 1]
ax.hist(scores_internal, bins=30, alpha=0.6, color='#3498db', edgecolor='black', label='Internal')
ax.hist(scores_external, bins=30, alpha=0.6, color='#e74c3c', edgecolor='black', label='External')
ax.set_xlabel('Risk Score (0-100)', fontsize=11, fontweight='bold')
ax.set_ylabel('Frequency', fontsize=11, fontweight='bold')
ax.set_title('B. Percentile-Based Scores', fontsize=12, fontweight='bold', loc='left')
ax.legend(fontsize=10)
ax.grid(alpha=0.3, axis='y')

# Panel C: Transformation mapping
ax = axes[0, 2]
sorted_idx = np.argsort(y_internal_prob)
ax.plot(y_internal_prob[sorted_idx], scores_internal[sorted_idx], 
        linewidth=2, color='#2c3e50', alpha=0.7)
ax.axhline(25, color='#f39c12', linestyle='--', linewidth=1.5, alpha=0.5, label='Category thresholds')
ax.axhline(50, color='#e74c3c', linestyle='--', linewidth=1.5, alpha=0.5)
ax.axhline(75, color='#c0392b', linestyle='--', linewidth=1.5, alpha=0.5)
ax.set_xlabel('Mortality Risk (Probability)', fontsize=11, fontweight='bold')
ax.set_ylabel('Risk Score (0-100)', fontsize=11, fontweight='bold')
ax.set_title('C. Risk → Score Transformation', fontsize=12, fontweight='bold', loc='left')
ax.legend(fontsize=9)
ax.grid(alpha=0.3)

# Panel D: Category distribution - Internal
ax = axes[1, 0]
cat_counts_int = [np.sum(categories_internal == cat) for cat in ["Low Risk", "Medium Risk", "High Risk", "Very High Risk"]]
colors = ['#2ecc71', '#f39c12', '#e74c3c', '#c0392b']
bars = ax.bar(range(4), cat_counts_int, color=colors, alpha=0.7, edgecolor='black', linewidth=2)
for i, (bar, count) in enumerate(zip(bars, cat_counts_int)):
    pct = count / len(categories_internal) * 100
    deaths = y_all_internal.values[categories_internal == ["Low Risk", "Medium Risk", "High Risk", "Very High Risk"][i]].sum()
    mort_rate = deaths / count if count > 0 else 0
    ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 3,
            f'n={count}\n({pct:.0f}%)\n{mort_rate:.0%} mort',
            ha='center', va='bottom', fontsize=9, fontweight='bold')
ax.set_xticks(range(4))
ax.set_xticklabels(["Low", "Medium", "High", "Very High"], fontweight='bold')
ax.set_ylabel('Number of Patients', fontsize=11, fontweight='bold')
ax.set_title('D. Risk Categories: Internal', fontsize=12, fontweight='bold', loc='left')
ax.grid(alpha=0.3, axis='y')

# Panel E: Category distribution - External
ax = axes[1, 1]
cat_counts_ext = [np.sum(categories_external == cat) for cat in ["Low Risk", "Medium Risk", "High Risk", "Very High Risk"]]
bars = ax.bar(range(4), cat_counts_ext, color=colors, alpha=0.7, edgecolor='black', linewidth=2)
for i, (bar, count) in enumerate(zip(bars, cat_counts_ext)):
    pct = count / len(categories_external) * 100
    deaths = y_external.values[categories_external == ["Low Risk", "Medium Risk", "High Risk", "Very High Risk"][i]].sum()
    mort_rate = deaths / count if count > 0 else 0
    ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 3,
            f'n={count}\n({pct:.0f}%)\n{mort_rate:.0%} mort',
            ha='center', va='bottom', fontsize=9, fontweight='bold')
ax.set_xticks(range(4))
ax.set_xticklabels(["Low", "Medium", "High", "Very High"], fontweight='bold')
ax.set_ylabel('Number of Patients', fontsize=11, fontweight='bold')
ax.set_title('E. Risk Categories: External', fontsize=12, fontweight='bold', loc='left')
ax.grid(alpha=0.3, axis='y')

# Panel F: Mortality by category comparison
ax = axes[1, 2]
x = np.arange(4)
width = 0.35
mort_rates_int = []
mort_rates_ext = []
for cat in ["Low Risk", "Medium Risk", "High Risk", "Very High Risk"]:
    mask_int = categories_internal == cat
    mask_ext = categories_external == cat
    mort_int = y_all_internal.values[mask_int].mean() if mask_int.sum() > 0 else 0
    mort_ext = y_external.values[mask_ext].mean() if mask_ext.sum() > 0 else 0
    mort_rates_int.append(mort_int)
    mort_rates_ext.append(mort_ext)

bars1 = ax.bar(x - width/2, mort_rates_int, width, label='Internal', color='#3498db', alpha=0.7, edgecolor='black', linewidth=1.5)
bars2 = ax.bar(x + width/2, mort_rates_ext, width, label='External', color='#e74c3c', alpha=0.7, edgecolor='black', linewidth=1.5)

for bars in [bars1, bars2]:
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height + 0.02,
               f'{height:.0%}', ha='center', va='bottom', fontsize=9, fontweight='bold')

ax.set_ylabel('Observed Mortality Rate', fontsize=11, fontweight='bold')
ax.set_title('F. Mortality Rates by Category', fontsize=12, fontweight='bold', loc='left')
ax.set_xticks(x)
ax.set_xticklabels(["Low", "Medium", "High", "Very High"], fontweight='bold')
ax.legend(fontsize=10)
ax.grid(alpha=0.3, axis='y')
ax.set_ylim([0, 1])

plt.tight_layout()
save_figure(fig, 'step17_percentile_score_analysis')
plt.show()

print(f"   ✓ Visualization saved")

# ═══════════════════════════════════════════════════════════════════════════════
# 8. SUMMARY
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"8. SUMMARY & RECOMMENDATION")
print(f"{'='*100}\n")

print(f"   ✅ PERCENTILE-BASED SCORING IS VALID:")
print(f"      • AUC perfectly preserved (monotonic transformation)")
print(f"      • Full 0-100 range naturally utilized")
print(f"      • Clear clinical interpretation")
print(f"      • Risk categories show good stratification\n")

print(f"   📊 KEY FINDINGS:")
print(f"      • Internal: {(categories_internal == 'Very High Risk').sum()} ({(categories_internal == 'Very High Risk').sum()/len(categories_internal)*100:.0f}%) classified as Very High Risk")
print(f"      • External: {(categories_external == 'Very High Risk').sum()} ({(categories_external == 'Very High Risk').sum()/len(categories_external)*100:.0f}%) classified as Very High Risk")
print(f"      • Mortality increases with category (internal): {mort_rates_int[0]:.0%} → {mort_rates_int[-1]:.0%}")
print(f"      • Mortality increases with category (external): {mort_rates_ext[0]:.0%} → {mort_rates_ext[-1]:.0%}\n")

print(f"   🎯 READY FOR OPTION B DEPLOYMENT:")
print(f"      ✓ SVM Direct deployment")
print(f"      ✓ Percentile-based 0-100 score display")
print(f"      ✓ Feature importance explanation")
print(f"      ✓ Performance validated (AUC = {auc_external_prob:.3f})")

print(f"\n{'='*100}")
print(f"ANALYSIS COMPLETE - READY TO BUILD STREAMLIT APP")
print(f"{'='*100}\n")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# CHECK AVAILABLE ARTIFACTS FROM PHASE B (STEP 16)
# ═══════════════════════════════════════════════════════════════════════════════
# User: zainzampawala786-sudo
# UTC: 2025-10-20 09:17:51
# ═══════════════════════════════════════════════════════════════════════════════

import os
import glob

print("\n" + "="*100)
print("CHECKING AVAILABLE FILES & VARIABLES FROM PHASE B")
print("="*100)
print(f"UTC: 2025-10-20 09:17:51")
print(f"User: zainzampawala786-sudo\n")

# ═══════════════════════════════════════════════════════════════════════════════
# 1. CHECK FOR .PKL FILES
# ═══════════════════════════════════════════════════════════════════════════════

print(f"{'='*100}")
print(f"1. CHECKING FOR .PKL FILES IN CURRENT DIRECTORY")
print(f"{'='*100}\n")

pkl_files = glob.glob("*.pkl")
if pkl_files:
    print(f"   Found {len(pkl_files)} .pkl file(s):\n")
    for f in sorted(pkl_files):
        size = os.path.getsize(f) / 1024  # KB
        print(f"      • {f:<50} ({size:.1f} KB)")
else:
    print(f"   ⚠️  No .pkl files found in current directory")

# Check step16 specific files
step16_files = glob.glob("*step16*.pkl")
if step16_files:
    print(f"\n   Step 16 specific files:")
    for f in sorted(step16_files):
        size = os.path.getsize(f) / 1024
        print(f"      • {f:<50} ({size:.1f} KB)")

# ═══════════════════════════════════════════════════════════════════════════════
# 2. CHECK DEPLOYMENT_BUNDLE CONTENTS
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"2. CHECKING DEPLOYMENT_BUNDLE CONTENTS")
print(f"{'='*100}\n")

if 'DEPLOYMENT_BUNDLE' in globals():
    print(f"   ✓ DEPLOYMENT_BUNDLE exists in memory\n")
    
    # Check models
    if 'models' in DEPLOYMENT_BUNDLE:
        print(f"   Models available:")
        for key in DEPLOYMENT_BUNDLE['models'].keys():
            print(f"      • {key}")
    
    # Check predictions
    if 'predictions' in DEPLOYMENT_BUNDLE:
        print(f"\n   Predictions available:")
        for key, value in DEPLOYMENT_BUNDLE['predictions'].items():
            if hasattr(value, 'shape'):
                print(f"      • {key:<35} shape: {value.shape}")
            elif hasattr(value, '__len__'):
                print(f"      • {key:<35} length: {len(value)}")
    
    # Check SHAP
    if 'shap' in DEPLOYMENT_BUNDLE:
        print(f"\n   SHAP data available:")
        for key in DEPLOYMENT_BUNDLE['shap'].keys():
            print(f"      • {key}")
    
    # Check performance
    if 'performance' in DEPLOYMENT_BUNDLE:
        print(f"\n   Performance metrics available:")
        for phase in DEPLOYMENT_BUNDLE['performance'].keys():
            print(f"      • {phase}")
            if isinstance(DEPLOYMENT_BUNDLE['performance'][phase], dict):
                for dataset in DEPLOYMENT_BUNDLE['performance'][phase].keys():
                    print(f"         - {dataset}")
    
else:
    print(f"   ⚠️  DEPLOYMENT_BUNDLE not found in memory")

# ═══════════════════════════════════════════════════════════════════════════════
# 3. CHECK REQUIRED COMPONENTS FOR CALCULATOR
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"3. CHECKING REQUIRED COMPONENTS FOR CALCULATOR")
print(f"{'='*100}\n")

required_components = {
    'calibrated_svm': False,
    'scaler': False,
    'internal_predictions': False,
    'feature_names': False,
    'shap_values': False
}

if 'DEPLOYMENT_BUNDLE' in globals():
    # Check calibrated SVM
    if 'models' in DEPLOYMENT_BUNDLE and 'calibrated_svm' in DEPLOYMENT_BUNDLE['models']:
        required_components['calibrated_svm'] = True
        model = DEPLOYMENT_BUNDLE['models']['calibrated_svm']
        print(f"   ✓ Calibrated SVM: {type(model).__name__}")
    
    # Check scaler
    if 'models' in DEPLOYMENT_BUNDLE and 'scaler' in DEPLOYMENT_BUNDLE['models']:
        required_components['scaler'] = True
        scaler = DEPLOYMENT_BUNDLE['models']['scaler']
        print(f"   ✓ Scaler: {type(scaler).__name__}")
    
    # Check internal predictions
    if 'predictions' in DEPLOYMENT_BUNDLE and 'all_internal_calibrated' in DEPLOYMENT_BUNDLE['predictions']:
        required_components['internal_predictions'] = True
        preds = DEPLOYMENT_BUNDLE['predictions']['all_internal_calibrated']
        print(f"   ✓ Internal predictions: {len(preds)} samples")
    
    # Check feature names
    if 'model_info' in DEPLOYMENT_BUNDLE and 'features' in DEPLOYMENT_BUNDLE['model_info']:
        required_components['feature_names'] = True
        features = DEPLOYMENT_BUNDLE['model_info']['features']
        print(f"   ✓ Feature names: {len(features)} features")
    
    # Check SHAP values
    if 'shap' in DEPLOYMENT_BUNDLE and 'shap_values' in DEPLOYMENT_BUNDLE['shap']:
        required_components['shap_values'] = True
        shap_vals = DEPLOYMENT_BUNDLE['shap']['shap_values']
        print(f"   ✓ SHAP values: {shap_vals.shape if hasattr(shap_vals, 'shape') else 'available'}")

# ═══════════════════════════════════════════════════════════════════════════════
# 4. SUMMARY & RECOMMENDATION
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"4. SUMMARY")
print(f"{'='*100}\n")

all_available = all(required_components.values())

if all_available:
    print(f"   ✅ ALL REQUIRED COMPONENTS AVAILABLE")
    print(f"\n   Ready to generate calculator with:")
    print(f"      • Pre-trained calibrated SVM")
    print(f"      • Feature scaler")
    print(f"      • Reference risks for percentile calculation")
    print(f"      • SHAP values for feature importance")
    print(f"\n   No .pkl files needed - everything in DEPLOYMENT_BUNDLE ✓")
else:
    print(f"   ⚠️  MISSING COMPONENTS:")
    for component, available in required_components.items():
        status = "✓" if available else "✗"
        print(f"      {status} {component}")
    
    print(f"\n   Action needed: Re-run Phase B (Steps 12-16) to generate components")

print(f"\n{'='*100}")
print(f"CHECK COMPLETE")
print(f"{'='*100}\n")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# VERIFY DEPLOYMENT FILE FROM SPECIFIED PATH
# ═══════════════════════════════════════════════════════════════════════════════
# User: zainzampawala786-sudo
# UTC: 2025-10-20 09:38:03
# File: C:\Users\zainz\Desktop\Second Analysis\ZAINY\data\step16b_deployment_bundle.pkl
# ═══════════════════════════════════════════════════════════════════════════════

import pickle
import numpy as np
import os

print("\n" + "="*100)
print("DEPLOYMENT FILE VERIFICATION")
print("="*100)
print(f"UTC: 2025-10-20 09:38:03")
print(f"User: zainzampawala786-sudo")
print(f"File: step16b_deployment_bundle.pkl\n")

file_path = r"C:\Users\zainz\Desktop\Second Analysis\ZAINY\data\step16b_deployment_bundle.pkl"

# ═══════════════════════════════════════════════════════════════════════════════
# 1. FILE EXISTENCE & SIZE
# ═══════════════════════════════════════════════════════════════════════════════

print(f"{'='*100}")
print(f"1. FILE STATUS")
print(f"{'='*100}\n")

if os.path.exists(file_path):
    file_size = os.path.getsize(file_path) / (1024 * 1024)  # MB
    print(f"   ✓ File exists")
    print(f"   📁 Location: {file_path}")
    print(f"   📊 Size: {file_size:.2f} MB")
else:
    print(f"   ✗ File not found at specified path")
    print(f"   Check path: {file_path}")

# ═══════════════════════════════════════════════════════════════════════════════
# 2. LOAD AND INSPECT STRUCTURE
# ═══════════════════════════════════════════════════════════════════════════════

if os.path.exists(file_path):
    print(f"\n{'='*100}")
    print(f"2. LOADING FILE")
    print(f"{'='*100}\n")
    
    try:
        with open(file_path, 'rb') as f:
            deployment_file = pickle.load(f)
        
        print(f"   ✓ File loaded successfully")
        print(f"   📦 Top-level keys: {list(deployment_file.keys())}")
        
    except Exception as e:
        print(f"   ✗ Error loading file: {e}")
        deployment_file = None

# ═══════════════════════════════════════════════════════════════════════════════
# 3. CHECK REQUIRED COMPONENTS FOR CALCULATOR
# ═══════════════════════════════════════════════════════════════════════════════

if os.path.exists(file_path) and deployment_file is not None:
    print(f"\n{'='*100}")
    print(f"3. CHECKING DEPLOYMENT COMPONENTS")
    print(f"{'='*100}\n")
    
    required_components = {
        'calibrated_svm': False,
        'scaler': False,
        'internal_predictions': False,
        'external_predictions': False,
        'feature_names': False,
        'shap_values': False,
        'performance_metrics': False
    }
    
    # Check models
    if 'models' in deployment_file:
        print(f"   ✓ Models section found")
        models = deployment_file['models']
        
        if 'calibrated_svm' in models:
            required_components['calibrated_svm'] = True
            print(f"      • calibrated_svm: {type(models['calibrated_svm']).__name__}")
        else:
            print(f"      ✗ calibrated_svm: NOT FOUND")
        
        if 'scaler' in models:
            required_components['scaler'] = True
            print(f"      • scaler: {type(models['scaler']).__name__}")
        else:
            print(f"      ✗ scaler: NOT FOUND")
    else:
        print(f"   ✗ Models section NOT FOUND")
    
    # Check predictions
    if 'predictions' in deployment_file:
        print(f"\n   ✓ Predictions section found")
        predictions = deployment_file['predictions']
        
        if 'all_internal_calibrated' in predictions:
            required_components['internal_predictions'] = True
            preds = predictions['all_internal_calibrated']
            print(f"      • all_internal_calibrated: shape {preds.shape}, mean={preds.mean():.3f}")
        else:
            print(f"      ✗ all_internal_calibrated: NOT FOUND")
        
        if 'external_calibrated' in predictions:
            required_components['external_predictions'] = True
            preds = predictions['external_calibrated']
            print(f"      • external_calibrated: shape {preds.shape}, mean={preds.mean():.3f}")
        else:
            print(f"      ✗ external_calibrated: NOT FOUND")
    else:
        print(f"\n   ✗ Predictions section NOT FOUND")
    
    # Check model info
    if 'model_info' in deployment_file:
        print(f"\n   ✓ Model info section found")
        
        if 'features' in deployment_file['model_info']:
            required_components['feature_names'] = True
            features = deployment_file['model_info']['features']
            print(f"      • features: {len(features)} features")
            print(f"         First 5: {features[:5]}")
        else:
            print(f"      ✗ features: NOT FOUND")
    else:
        print(f"\n   ✗ Model info section NOT FOUND")
    
    # Check SHAP
    if 'shap' in deployment_file:
        print(f"\n   ✓ SHAP section found")
        
        if 'shap_values' in deployment_file['shap']:
            required_components['shap_values'] = True
            shap_vals = deployment_file['shap']['shap_values']
            print(f"      • shap_values: shape {shap_vals.shape}")
        else:
            print(f"      ✗ shap_values: NOT FOUND")
    else:
        print(f"\n   ✗ SHAP section NOT FOUND")
    
    # Check performance
    if 'performance' in deployment_file:
        required_components['performance_metrics'] = True
        print(f"\n   ✓ Performance section found")
        if 'phase_b' in deployment_file['performance']:
            phase_b = deployment_file['performance']['phase_b']
            print(f"      Phase B datasets: {list(phase_b.keys())}")
            if 'external_confirmatory' in phase_b:
                ext_perf = phase_b['external_confirmatory']
                if 'auc' in ext_perf:
                    print(f"      • External AUC: {ext_perf['auc']:.4f}")
    else:
        print(f"\n   ✗ Performance section NOT FOUND")

# ═══════════════════════════════════════════════════════════════════════════════
# 4. FUNCTIONAL TEST - MAKE A PREDICTION
# ═══════════════════════════════════════════════════════════════════════════════

if os.path.exists(file_path) and deployment_file is not None:
    print(f"\n{'='*100}")
    print(f"4. FUNCTIONAL TEST - MODEL PREDICTION")
    print(f"{'='*100}\n")
    
    try:
        # Get models
        scaler = deployment_file['models']['scaler']
        model = deployment_file['models']['calibrated_svm']
        features = deployment_file['model_info']['features']
        
        # Create test patient (use median values)
        print(f"   Creating synthetic test patient...")
        
        # Get reference data to calculate medians
        if 'CLEAN_FEATURE_DATA' in globals():
            X_ref = CLEAN_FEATURE_DATA['X_external_clean']
            test_patient = X_ref[features].median().values.reshape(1, -1)
        else:
            # Use dummy median values
            test_patient = np.array([[65, 1, 0, 0, 0, 1, 0, 110, 130, 4.5, 5.0, 70, 75, 8.0, 2.5, 140]]).reshape(1, -1)
        
        # Scale and predict
        test_scaled = scaler.transform(test_patient)
        test_prob = model.predict_proba(test_scaled)[0, 1]
        
        # Calculate percentile score
        ref_risks = deployment_file['predictions']['all_internal_calibrated']
        percentile_score = (test_prob > ref_risks).mean() * 100
        
        print(f"   ✓ Prediction successful!")
        print(f"\n   Test Patient Results:")
        print(f"      Mortality probability: {test_prob:.1%}")
        print(f"      Percentile score: {percentile_score:.0f}/100")
        
        if percentile_score < 25:
            category = "LOW RISK"
        elif percentile_score < 50:
            category = "MEDIUM RISK"
        elif percentile_score < 75:
            category = "HIGH RISK"
        else:
            category = "VERY HIGH RISK"
        
        print(f"      Risk category: {category}")
        
        functional_test_passed = True
        
    except Exception as e:
        print(f"   ✗ Functional test failed: {e}")
        functional_test_passed = False

# ═══════════════════════════════════════════════════════════════════════════════
# 5. COMPARE WITH MEMORY (IF AVAILABLE)
# ═══════════════════════════════════════════════════════════════════════════════

if os.path.exists(file_path) and deployment_file is not None:
    print(f"\n{'='*100}")
    print(f"5. COMPARISON WITH MEMORY VARIABLE")
    print(f"{'='*100}\n")
    
    if 'DEPLOYMENT_BUNDLE' in globals():
        print(f"   ✓ DEPLOYMENT_BUNDLE exists in memory")
        
        # Compare predictions
        file_preds = deployment_file['predictions']['all_internal_calibrated']
        mem_preds = DEPLOYMENT_BUNDLE['predictions']['all_internal_calibrated']
        
        preds_match = np.allclose(file_preds, mem_preds, rtol=1e-9)
        
        print(f"\n   Internal predictions:")
        print(f"      File shape:   {file_preds.shape}")
        print(f"      Memory shape: {mem_preds.shape}")
        print(f"      Values match: {'✓ YES' if preds_match else '✗ NO'}")
        
        if preds_match:
            print(f"\n   ✅ File and memory are IDENTICAL")
        else:
            print(f"\n   ⚠️  File and memory DIFFER")
    else:
        print(f"   ⚠️  DEPLOYMENT_BUNDLE not in memory (cannot compare)")

# ═══════════════════════════════════════════════════════════════════════════════
# 6. FINAL VERDICT
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"6. FINAL VERDICT")
print(f"{'='*100}\n")

if os.path.exists(file_path) and deployment_file is not None:
    all_required = all(required_components.values())
    
    print(f"   Component Checklist:")
    for component, status in required_components.items():
        icon = "✓" if status else "✗"
        print(f"      {icon} {component}")
    
    print(f"\n   Overall Status:")
    if all_required and 'functional_test_passed' in locals() and functional_test_passed:
        print(f"      ✅ FILE IS READY FOR DEPLOYMENT")
        print(f"\n   This file contains:")
        print(f"      • Trained and calibrated SVM model")
        print(f"      • Feature scaler")
        print(f"      • Reference predictions (n={len(deployment_file['predictions']['all_internal_calibrated'])})")
        print(f"      • SHAP values for explanations")
        print(f"      • Validated performance (AUC = 0.768)")
        print(f"\n   ✅ APPROVED FOR CALCULATOR DEPLOYMENT")
    else:
        print(f"      ⚠️  FILE HAS ISSUES")
        print(f"      Missing components or functional test failed")
else:
    print(f"   ✗ Cannot verify - file not accessible")

print(f"\n{'='*100}")
print(f"VERIFICATION COMPLETE")
print(f"{'='*100}\n")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# PULSE-IABP CALCULATOR - DEPLOYMENT PACKAGE (ENCODING FIXED)
# ═══════════════════════════════════════════════════════════════════════════════
# User: zainzampawala786-sudo
# Date: 2025-10-20 10:26:54 UTC
# Target: C:\Users\zainz\Desktop\Second Analysis\ZAINY\models\mortalitybundlecalculator
# Fix: UTF-8 encoding for all file writes
# ═══════════════════════════════════════════════════════════════════════════════

import os
import shutil
import json
import sys
from pathlib import Path

print("\n" + "="*100)
print("PULSE-IABP CALCULATOR - DEPLOYMENT PACKAGE (ENCODING FIXED)")
print("="*100)
print("UTC: 2025-10-20 10:26:54")
print("User: zainzampawala786-sudo\n")

# ═══════════════════════════════════════════════════════════════════════════════
# 1. AUTO-DETECT PACKAGE VERSIONS
# ═══════════════════════════════════════════════════════════════════════════════

print(f"{'='*100}")
print(f"1. AUTO-DETECTING PACKAGE VERSIONS")
print(f"{'='*100}\n")

def get_package_version(package_name):
    """Get installed version of a package"""
    try:
        import importlib.metadata
        return importlib.metadata.version(package_name)
    except:
        try:
            import pkg_resources
            return pkg_resources.get_distribution(package_name).version
        except:
            return None

required_packages = {
    'streamlit': 'streamlit',
    'numpy': 'numpy',
    'pandas': 'pandas',
    'scikit-learn': 'scikit-learn',
    'shap': 'shap'
}

detected_versions = {}
print("   Detecting versions:\n")

for display_name, pip_name in required_packages.items():
    version = get_package_version(pip_name)
    if version:
        detected_versions[pip_name] = version
        print(f"   ✓ {display_name:<20} {version}")
    else:
        recommended = {
            'streamlit': '1.31.0',
            'numpy': '1.24.3',
            'pandas': '2.0.3',
            'scikit-learn': '1.3.2',
            'shap': '0.44.0'
        }
        detected_versions[pip_name] = recommended[pip_name]
        print(f"   → {display_name:<20} {recommended[pip_name]} (recommended)")

# ═══════════════════════════════════════════════════════════════════════════════
# 2. CREATE DIRECTORIES
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"2. CREATING DIRECTORIES")
print(f"{'='*100}\n")

target_dir = Path(r"C:\Users\zainz\Desktop\Second Analysis\ZAINY\models\mortalitybundlecalculator")
target_dir.mkdir(parents=True, exist_ok=True)

streamlit_dir = target_dir / ".streamlit"
streamlit_dir.mkdir(exist_ok=True)

print(f"   ✓ Target: {target_dir}")
print(f"   ✓ Created: mortalitybundlecalculator/")
print(f"   ✓ Created: .streamlit/")

# ═══════════════════════════════════════════════════════════════════════════════
# 3. COPY MODEL
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"3. COPYING MODEL FILE")
print(f"{'='*100}\n")

source = Path(r"C:\Users\zainz\Desktop\Second Analysis\ZAINY\data\step16b_deployment_bundle.pkl")
dest = target_dir / "model_bundle.pkl"

if source.exists():
    shutil.copy(source, dest)
    size_mb = dest.stat().st_size / (1024 * 1024)
    print(f"   ✓ Copied: model_bundle.pkl ({size_mb:.2f} MB)")
else:
    print(f"   ⚠ Warning: Source not found")
    size_mb = 0

# ═══════════════════════════════════════════════════════════════════════════════
# 4. REQUIREMENTS.TXT (UTF-8 ENCODING)
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"4. GENERATING REQUIREMENTS.TXT")
print(f"{'='*100}\n")

req_file = target_dir / "requirements.txt"
with open(req_file, 'w', encoding='utf-8') as f:  # ← UTF-8 ENCODING
    f.write("# PULSE-IABP Risk Calculator Dependencies\n")
    f.write(f"# Auto-generated: 2025-10-20 10:26:54 UTC\n")
    f.write(f"# Python version: {sys.version.split()[0]}\n\n")
    for package, version in detected_versions.items():
        f.write(f"{package}=={version}\n")

print("   ✓ Generated: requirements.txt")

# ═══════════════════════════════════════════════════════════════════════════════
# 5. STREAMLIT CONFIG (UTF-8 ENCODING)
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"5. GENERATING STREAMLIT CONFIG")
print(f"{'='*100}\n")

config_file = streamlit_dir / "config.toml"
with open(config_file, 'w', encoding='utf-8') as f:  # ← UTF-8 ENCODING
    f.write("[theme]\n")
    f.write('primaryColor = "#667eea"\n')
    f.write('backgroundColor = "#f8f9fa"\n')
    f.write('secondaryBackgroundColor = "#ffffff"\n')
    f.write('textColor = "#2c3e50"\n')
    f.write('font = "sans serif"\n\n')
    f.write("[server]\n")
    f.write("headless = true\n")
    f.write("port = 8501\n")
    f.write("enableCORS = false\n")

print("   ✓ Generated: .streamlit/config.toml")

# ═══════════════════════════════════════════════════════════════════════════════
# 6. README.MD (UTF-8 ENCODING - NO EMOJIS)
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"6. GENERATING README.MD")
print(f"{'='*100}\n")

readme_file = target_dir / "README.md"
with open(readme_file, 'w', encoding='utf-8') as f:  # ← UTF-8 ENCODING
    f.write("# PULSE-IABP Risk Calculator\n\n")
    f.write("**One-Year Mortality Risk Assessment for AMI Patients with IABP Support**\n\n")
    f.write("Version: 1.0.0\n")
    f.write("Date: 2025-10-20\n")
    f.write("Author: Z. Zampawala et al.\n\n")
    f.write("---\n\n")
    f.write("## Quick Start\n\n")
    f.write("```bash\n")
    f.write("pip install -r requirements.txt\n")
    f.write("streamlit run pulse_iabp_calculator.py\n")
    f.write("```\n\n")
    f.write("Open browser: http://localhost:8501\n\n")
    f.write("---\n\n")
    f.write("## Deploy to Streamlit Cloud\n\n")
    f.write("1. Push to GitHub\n")
    f.write("2. Go to share.streamlit.io\n")
    f.write("3. Connect repository\n")
    f.write("4. Set main file: pulse_iabp_calculator.py\n")
    f.write("5. Deploy\n\n")
    f.write("---\n\n")
    f.write("## Model Information\n\n")
    f.write("- Training: n=476 (internal cohort only)\n")
    f.write("- Validation: n=354 (external cohort)\n")
    f.write("- External AUC: 0.768\n")
    f.write("- Display: Risk Level 0-100 (percentile-based)\n")
    f.write("- TRIPOD Type: 3 (External validation)\n\n")
    f.write("---\n\n")
    f.write("## Features\n\n")
    f.write("- Risk Level: 0-100 (percentile score)\n")
    f.write("- Categories: LOW / MEDIUM / ELEVATED / CRITICAL\n")
    f.write("- Top 3 risk factors displayed\n")
    f.write("- Units included for all variables\n")
    f.write("- Professional medical interface\n\n")
    f.write("---\n\n")
    f.write("## Disclaimer\n\n")
    f.write("WARNING: For research and educational purposes only.\n")
    f.write("NOT approved for clinical decision-making.\n\n")

print("   ✓ Generated: README.md")

# ═══════════════════════════════════════════════════════════════════════════════
# 7. STREAMLIT APP (UTF-8 ENCODING)
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"7. GENERATING STREAMLIT APP")
print(f"{'='*100}\n")

app_file = target_dir / "pulse_iabp_calculator.py"

with open(app_file, 'w', encoding='utf-8') as f:  # ← UTF-8 ENCODING
    f.write('# PULSE-IABP Risk Calculator\n')
    f.write('# Version: 1.0.0 | Date: 2025-10-20\n')
    f.write('# Training: n=476 (internal only) | External AUC: 0.768\n\n')
    
    f.write('import streamlit as st\n')
    f.write('import pickle\n')
    f.write('import numpy as np\n')
    f.write('import pandas as pd\n\n')
    
    f.write('st.set_page_config(\n')
    f.write('    page_title="PULSE-IABP Risk Calculator",\n')
    f.write('    page_icon=":heart:",\n')
    f.write('    layout="wide",\n')
    f.write('    initial_sidebar_state="expanded"\n')
    f.write(')\n\n')
    
    f.write('# Load model\n')
    f.write('@st.cache_resource\n')
    f.write('def load_model():\n')
    f.write('    try:\n')
    f.write('        with open("model_bundle.pkl", "rb") as f:\n')
    f.write('            return pickle.load(f)\n')
    f.write('    except Exception as e:\n')
    f.write('        st.error(f"Error loading model: {e}")\n')
    f.write('        st.stop()\n\n')
    
    f.write('bundle = load_model()\n')
    f.write('model = bundle["models"]["calibrated_svm"]\n')
    f.write('scaler = bundle["models"]["scaler"]\n')
    f.write('ref_risks = bundle["predictions"]["all_internal_calibrated"]\n')
    f.write('features = bundle["model_info"]["features"]\n\n')
    
    f.write('def calculate_risk_level(prob, ref):\n')
    f.write('    return (prob > ref).mean() * 100\n\n')
    
    f.write('def get_risk_category(level):\n')
    f.write('    if level < 25:\n')
    f.write('        return "LOW RISK", ":green_circle:"\n')
    f.write('    elif level < 50:\n')
    f.write('        return "MEDIUM RISK", ":yellow_circle:"\n')
    f.write('    elif level < 75:\n')
    f.write('        return "ELEVATED RISK", ":orange_circle:"\n')
    f.write('    return "CRITICAL RISK", ":red_circle:"\n\n')
    
    f.write('def get_risk_factors(inp):\n')
    f.write('    factors = []\n')
    f.write('    if inp["lactate"] > 4.0:\n')
    f.write('        factors.append(f"Peak Lactate ({inp[\'lactate\']:.1f} mmol/L) - Elevated")\n')
    f.write('    if inp["age"] > 70:\n')
    f.write('        factors.append(f"Age ({inp[\'age\']:.0f} years) - Advanced")\n')
    f.write('    if inp["egfr"] < 45:\n')
    f.write('        factors.append(f"eGFR ({inp[\'egfr\']:.0f} mL/min/1.73m2) - Impaired")\n')
    f.write('    if inp["cpr"]:\n')
    f.write('        factors.append("CPR performed")\n')
    f.write('    if inp["crrt"]:\n')
    f.write('        factors.append("CRRT required")\n')
    f.write('    if inp["vent"]:\n')
    f.write('        factors.append("Invasive ventilation")\n')
    f.write('    return factors[:3]\n\n')
    
    f.write('# Header\n')
    f.write('st.title(":heart: PULSE-IABP Risk Calculator")\n')
    f.write('st.caption("One-Year Mortality Risk Assessment for AMI Patients with IABP Support")\n')
    f.write('st.markdown("---")\n\n')
    
    f.write('# Sidebar\n')
    f.write('with st.sidebar:\n')
    f.write('    st.header("Patient Information")\n')
    f.write('    st.markdown("---")\n')
    f.write('    st.subheader("Demographics")\n')
    f.write('    age = st.slider("Age (years)", 18, 100, 65)\n')
    f.write('    st.markdown("---")\n')
    f.write('    st.subheader("Medications")\n')
    f.write('    beta_blocker = st.checkbox("Beta-Blocker")\n')
    f.write('    ace_inhibitor = st.checkbox("ACE Inhibitor")\n')
    f.write('    ticagrelor = st.checkbox("Ticagrelor")\n')
    f.write('    st.markdown("---")\n')
    f.write('    st.subheader("Interventions")\n')
    f.write('    invasive_vent = st.checkbox("Invasive Ventilation")\n')
    f.write('    cpr = st.checkbox("CPR Performed")\n')
    f.write('    crrt = st.checkbox("CRRT")\n')
    f.write('    st.markdown("---")\n')
    f.write('    st.subheader("Laboratory Values")\n')
    f.write('    st.markdown("**Hematology**")\n')
    f.write('    hgb_min = st.slider("Min Hemoglobin (g/L)", 40, 180, 110)\n')
    f.write('    hgb_max = st.slider("Peak Hemoglobin (g/L)", 40, 180, 135)\n')
    f.write('    rbc_max = st.slider("Peak RBC (x10^12/L)", 2.0, 7.0, 4.5, 0.1)\n')
    f.write('    neut_abs = st.slider("Min Neutrophils (x10^9/L)", 0.0, 30.0, 5.0, 0.1)\n')
    f.write('    neut_pct = st.slider("Min Neutrophils (%)", 0, 100, 70)\n')
    f.write('    st.markdown("**Renal Function**")\n')
    f.write('    egfr = st.slider("eGFR (mL/min/1.73m2)", 5, 120, 75)\n')
    f.write('    st.markdown("**Metabolic**")\n')
    f.write('    glucose_min = st.slider("Min Glucose (mmol/L)", 2.0, 25.0, 6.0, 0.1)\n')
    f.write('    lactate_max = st.slider("Peak Lactate (mmol/L)", 0.0, 20.0, 2.5, 0.1)\n')
    f.write('    sodium_max = st.slider("Peak Sodium (mmol/L)", 120, 160, 140)\n')
    f.write('    st.markdown("---")\n')
    f.write('    calc_btn = st.button("CALCULATE RISK", type="primary", use_container_width=True)\n\n')
    
    f.write('# Main\n')
    f.write('if not calc_btn:\n')
    f.write('    st.info("Enter patient information in sidebar and click CALCULATE RISK")\n')
    f.write('    with st.expander("About"):\n')
    f.write('        st.write("Training: n=476 (internal) | Validation: n=354 (external) | AUC: 0.768")\n')
    f.write('else:\n')
    f.write('    feat_map = {\n')
    f.write('        "beta_blocker_use": beta_blocker, "invasive_ventilation": invasive_vent,\n')
    f.write('        "ticagrelor_use": ticagrelor, "neutrophils_abs_min": neut_abs,\n')
    f.write('        "underwent_CPR": cpr, "ace_inhibitor_use": ace_inhibitor,\n')
    f.write('        "crrt": crrt, "hemoglobin_min": hgb_min, "age": age,\n')
    f.write('        "neutrophils_pct_min": neut_pct, "hemoglobin_max": hgb_max,\n')
    f.write('        "eGFR_CKD_EPI_21": egfr, "glucose_min": glucose_min,\n')
    f.write('        "lactate_max": lactate_max, "sodium_max": sodium_max,\n')
    f.write('        "rbc_count_max": rbc_max\n')
    f.write('    }\n')
    f.write('    X = np.array([[feat_map[f] for f in features]])\n')
    f.write('    X_scaled = scaler.transform(X)\n')
    f.write('    prob = model.predict_proba(X_scaled)[0, 1]\n')
    f.write('    risk_level = calculate_risk_level(prob, ref_risks)\n')
    f.write('    category, emoji = get_risk_category(risk_level)\n')
    f.write('    st.markdown(f"### {emoji} PULSE-IABP Risk Level: **{risk_level:.0f}**")\n')
    f.write('    st.markdown(f"### Category: **{category}**")\n')
    f.write('    st.progress(risk_level / 100)\n')
    f.write('    st.info(f"Higher risk than {risk_level:.0f}% of similar patients with AMI requiring IABP support.")\n')
    f.write('    st.markdown("---")\n')
    f.write('    inp = {"age": age, "egfr": egfr, "lactate": lactate_max, "cpr": cpr, "crrt": crrt, "vent": invasive_vent}\n')
    f.write('    factors = get_risk_factors(inp)\n')
    f.write('    if factors:\n')
    f.write('        st.markdown("#### Key Risk Factors")\n')
    f.write('        for fac in factors:\n')
    f.write('            st.markdown(f"- {fac}")\n')
    f.write('    st.markdown("---")\n')
    f.write('    st.markdown("#### Risk Categories")\n')
    f.write('    c1, c2, c3, c4 = st.columns(4)\n')
    f.write('    c1.metric("LOW", "0-24")\n')
    f.write('    c2.metric("MEDIUM", "25-49")\n')
    f.write('    c3.metric("ELEVATED", "50-74")\n')
    f.write('    c4.metric("CRITICAL", "75-100")\n\n')
    
    f.write('st.markdown("---")\n')
    f.write('st.warning("DISCLAIMER: For research and educational purposes only. NOT for clinical decision-making.")\n')
    f.write('st.caption("Model: SVM-RBF + Platt | Training: n=476 | External AUC: 0.768 | Version: 1.0.0")\n')

print("   ✓ Generated: pulse_iabp_calculator.py")

# ═══════════════════════════════════════════════════════════════════════════════
# 8. DEPLOYMENT GUIDE (UTF-8 ENCODING)
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"8. GENERATING DEPLOYMENT GUIDE")
print(f"{'='*100}\n")

deploy_file = target_dir / "DEPLOYMENT_GUIDE.md"
with open(deploy_file, 'w', encoding='utf-8') as f:  # ← UTF-8 ENCODING
    f.write("# Streamlit Cloud Deployment\n\n")
    f.write("## Step 1: Push to GitHub\n\n")
    f.write("```bash\n")
    f.write('cd "C:\\Users\\zainz\\Desktop\\Second Analysis\\ZAINY\\models\\mortalitybundlecalculator"\n')
    f.write("git init\n")
    f.write("git add .\n")
    f.write('git commit -m "Initial commit"\n')
    f.write("git remote add origin https://github.com/YOUR_USERNAME/pulse-iabp.git\n")
    f.write("git push -u origin main\n")
    f.write("```\n\n")
    f.write("## Step 2: Deploy\n\n")
    f.write("1. Go to: share.streamlit.io\n")
    f.write("2. Sign in with GitHub\n")
    f.write("3. Click New app\n")
    f.write("4. Select repository\n")
    f.write("5. Main file: pulse_iabp_calculator.py\n")
    f.write("6. Deploy\n")

print("   ✓ Generated: DEPLOYMENT_GUIDE.md")

# ═══════════════════════════════════════════════════════════════════════════════
# 9. SUMMARY
# ═══════════════════════════════════════════════════════════════════════════════

print(f"\n{'='*100}")
print(f"DEPLOYMENT PACKAGE COMPLETE")
print(f"{'='*100}\n")

print(f"✅ ALL FILES GENERATED\n")
print(f"📦 Location: {target_dir}\n")
print(f"📁 Files:")
print(f"   1. pulse_iabp_calculator.py")
print(f"   2. model_bundle.pkl ({size_mb:.2f} MB)")
print(f"   3. requirements.txt (auto-detected)")
print(f"   4. README.md")
print(f"   5. DEPLOYMENT_GUIDE.md")
print(f"   6. .streamlit/config.toml\n")

print(f"🔧 Dependencies:")
for pkg, ver in detected_versions.items():
    print(f"   {pkg}=={ver}")

print(f"\n🚀 Test: streamlit run pulse_iabp_calculator.py\n")
print(f"{'='*100}\n")

In [None]:
import pickle
import numpy as np
import pandas as pd

# ═══════════════════════════════════════════════════════════════════════════════
# LOAD YOUR MODEL BUNDLE
# ═══════════════════════════════════════════════════════════════════════════════

bundle_path = r"C:\Users\zainz\Desktop\Second Analysis\ZAINY\data\step16b_deployment_bundle.pkl"

with open(bundle_path, "rb") as f:
    bundle = pickle.load(f)

print("="*80)
print("PULSE-IABP FEATURE RANGE ANALYSIS")
print("="*80)
print(f"UTC: 2025-10-20 12:14:44")
print(f"User: zainzampawala786-sudo")
print("="*80)

# ═══════════════════════════════════════════════════════════════════════════════
# YOUR EXACT 16 FEATURES FROM MODEL
# ═══════════════════════════════════════════════════════════════════════════════

features = bundle["model_info"]["features"]
print("\nYour Model Features:")
for i, feat in enumerate(features, 1):
    print(f"  {i:2d}. {feat}")

# ═══════════════════════════════════════════════════════════════════════════════
# CHECK FOR TRAINING DATA IN BUNDLE
# ═══════════════════════════════════════════════════════════════════════════════

print("\n" + "="*80)
print("CHECKING BUNDLE FOR TRAINING DATA")
print("="*80)

available_keys = list(bundle.keys())
print("Available keys in bundle:", available_keys)

df_train = None

# Method 1: Check for direct training data
if "training_data" in bundle:
    df_train = bundle["training_data"]
    print("✅ Found: training_data")

# Method 2: Check for X_train
elif "X_train" in bundle:
    X_train = bundle["X_train"]
    df_train = pd.DataFrame(X_train, columns=features)
    print("✅ Found: X_train (converted to DataFrame)")

# Method 3: Check for raw_data
elif "raw_data" in bundle:
    df_train = bundle["raw_data"]
    print("✅ Found: raw_data")

# Method 4: Check for data in models section
elif "models" in bundle and "training_data" in bundle["models"]:
    df_train = bundle["models"]["training_data"]
    print("✅ Found: training_data in models section")

else:
    print("❌ Training data NOT found in bundle")
    print("\nYou need to load your original training dataset.")
    print("Please specify the path to your Tongji training data file:")

# ═══════════════════════════════════════════════════════════════════════════════
# IF NO DATA IN BUNDLE, LOAD FROM ORIGINAL FILE
# ═══════════════════════════════════════════════════════════════════════════════

if df_train is None:
    print("\n" + "="*80)
    print("LOADING ORIGINAL TRAINING DATA")
    print("="*80)
    
    # Try common file paths
    possible_paths = [
        r"C:\Users\zainz\Desktop\Second Analysis\ZAINY\data\tongji_training.csv",
        r"C:\Users\zainz\Desktop\Second Analysis\ZAINY\data\training_data.csv",
        r"C:\Users\zainz\Desktop\Second Analysis\ZAINY\data\internal_cohort.csv",
        r"C:\Users\zainz\Desktop\Second Analysis\ZAINY\data\step15_final_training.csv",
    ]
    
    for path in possible_paths:
        try:
            df_train = pd.read_csv(path)
            print(f"✅ Loaded from: {path}")
            break
        except:
            continue
    
    if df_train is None:
        print("\n⚠️ MANUAL INPUT REQUIRED")
        print("Please provide the exact path to your training data file:")
        print("Example: r'C:\\Users\\zainz\\Desktop\\...\\your_file.csv'")
        import sys
        sys.exit()

# ═══════════════════════════════════════════════════════════════════════════════
# ANALYZE CONTINUOUS FEATURES ONLY
# ═══════════════════════════════════════════════════════════════════════════════

print("\n" + "="*80)
print("CONTINUOUS FEATURES ANALYSIS (EXCLUDING BINARY VARIABLES)")
print("="*80)

# Separate continuous from binary features
continuous_features = [
    "age",
    "neutrophils_abs_min",
    "glucose_min",
    "sodium_max",
    "neutrophils_pct_min",
    "lactate_max",
    "eGFR_CKD_EPI_21",
    "hemoglobin_max",
    "rbc_count_max",
    "hemoglobin_min"
]

binary_features = [
    "beta_blocker_use",
    "invasive_ventilation",
    "ticagrelor_use",
    "underwent_CPR",
    "acei_use",
    "underwent_CRRT"
]

print(f"\nContinuous features: {len(continuous_features)}")
print(f"Binary features: {len(binary_features)}")

# ═══════════════════════════════════════════════════════════════════════════════
# CALCULATE RANGES FOR CONTINUOUS FEATURES
# ═══════════════════════════════════════════════════════════════════════════════

results = []

print("\n" + "="*80)
print("FEATURE RANGES (5th to 95th PERCENTILE)")
print("="*80)

for feature in continuous_features:
    if feature in df_train.columns:
        data = df_train[feature].dropna()
        
        if len(data) == 0:
            print(f"\n⚠️ {feature}: NO DATA")
            continue
        
        min_val = data.min()
        max_val = data.max()
        p5 = np.percentile(data, 5)
        p95 = np.percentile(data, 95)
        p25 = np.percentile(data, 25)
        p75 = np.percentile(data, 75)
        median = data.median()
        mean = data.mean()
        std = data.std()
        
        # Calculate recommended range (with 10% buffer, avoiding negatives)
        rec_min = max(0, int(p5 - abs(p5 * 0.1)))
        rec_max = int(p95 + abs(p95 * 0.1))
        
        # Round to nearest 5 for cleaner sliders
        if rec_max > 100:
            rec_min = int(rec_min / 5) * 5
            rec_max = int(rec_max / 5) * 5
        
        results.append({
            "Feature": feature,
            "Min": round(min_val, 2),
            "Max": round(max_val, 2),
            "P5": round(p5, 2),
            "P95": round(p95, 2),
            "Median": round(median, 2),
            "Mean": round(mean, 2),
            "Std": round(std, 2),
            "Rec_Min": rec_min,
            "Rec_Max": rec_max,
            "Default": int(median)
        })
        
        print(f"\n{feature}:")
        print(f"  Current range: {min_val:.1f} - {max_val:.1f}")
        print(f"  5th-95th percentile: {p5:.1f} - {p95:.1f}")
        print(f"  IQR (25th-75th): {p25:.1f} - {p75:.1f}")
        print(f"  Mean ± SD: {mean:.1f} ± {std:.1f}")
        print(f"  Median: {median:.1f}")
        print(f"  ✅ RECOMMENDED: {rec_min} to {rec_max} (default: {int(median)})")
    else:
        print(f"\n❌ {feature}: NOT FOUND IN DATASET")

# ═══════════════════════════════════════════════════════════════════════════════
# CREATE SUMMARY TABLE
# ═══════════════════════════════════════════════════════════════════════════════

if results:
    df_results = pd.DataFrame(results)
    
    print("\n" + "="*80)
    print("SUMMARY TABLE")
    print("="*80)
    print(df_results.to_string(index=False))
    
    # Save to CSV
    output_path = r"C:\Users\zainz\Desktop\Second Analysis\ZAINY\models\mortalitybundlecalculator\feature_ranges_analysis.csv"
    df_results.to_csv(output_path, index=False)
    print(f"\n✅ Saved to: {output_path}")

# ═══════════════════════════════════════════════════════════════════════════════
# GENERATE STREAMLIT SLIDER CODE
# ═══════════════════════════════════════════════════════════════════════════════

print("\n" + "="*80)
print("STREAMLIT SLIDER CODE (COPY-PASTE READY)")
print("="*80)

slider_mapping = {
    "age": ("Age (years)", None),
    "hemoglobin_min": ("Hemoglobin, minimum (g/L)", None),
    "hemoglobin_max": ("Hemoglobin, peak (g/L)", None),
    "rbc_count_max": ("RBC count, peak (×10¹²/L)", 0.1),
    "neutrophils_abs_min": ("Neutrophils, minimum (×10⁹/L)", 0.1),
    "neutrophils_pct_min": ("Neutrophils, minimum (%)", None),
    "eGFR_CKD_EPI_21": ("eGFR CKD-EPI 2021 (mL/min/1.73m²)", None),
    "glucose_min": ("Glucose, minimum (mmol/L)", 0.1),
    "lactate_max": ("Lactate, peak (mmol/L)", 0.1),
    "sodium_max": ("Sodium, peak (mmol/L)", None)
}

print("\n# PATIENT DEMOGRAPHICS")
print("st.markdown('<div class=\"section-header\">PATIENT DEMOGRAPHICS</div>', unsafe_allow_html=True)")

for _, row in df_results.iterrows():
    feature = row["Feature"]
    rec_min = row["Rec_Min"]
    rec_max = row["Rec_Max"]
    default = row["Default"]
    
    if feature in slider_mapping:
        label, step = slider_mapping[feature]
        
        if feature == "age":
            if step:
                print(f'age = st.slider("{label}", {rec_min}, {rec_max}, {default}, {step}, key="age")')
            else:
                print(f'age = st.slider("{label}", {rec_min}, {rec_max}, {default}, key="age")')
            print()

print("\n# HEMATOLOGY")
print("st.markdown('<div class=\"section-header\">HEMATOLOGY</div>', unsafe_allow_html=True)")
print("col1, col2 = st.columns(2)")
print("with col1:")

for _, row in df_results.iterrows():
    feature = row["Feature"]
    if feature in ["hemoglobin_min", "hemoglobin_max", "rbc_count_max"]:
        rec_min = row["Rec_Min"]
        rec_max = row["Rec_Max"]
        default = row["Default"]
        label, step = slider_mapping[feature]
        
        if step:
            print(f'    {feature.split("_")[0]}_{"_".join(feature.split("_")[1:])} = st.slider("{label}", {rec_min}, {rec_max}, {default}, {step}, key="{feature}")')
        else:
            print(f'    {feature.split("_")[0]}_{"_".join(feature.split("_")[1:])} = st.slider("{label}", {rec_min}, {rec_max}, {default}, key="{feature}")')

print("with col2:")

for _, row in df_results.iterrows():
    feature = row["Feature"]
    if feature in ["neutrophils_abs_min", "neutrophils_pct_min"]:
        rec_min = row["Rec_Min"]
        rec_max = row["Rec_Max"]
        default = row["Default"]
        label, step = slider_mapping[feature]
        
        if step:
            print(f'    neut_{"_".join(feature.split("_")[1:])} = st.slider("{label}", {rec_min}, {rec_max}, {default}, {step}, key="{feature}")')
        else:
            print(f'    neut_{"_".join(feature.split("_")[1:])} = st.slider("{label}", {rec_min}, {rec_max}, {default}, key="{feature}")')

print("\n# RENAL FUNCTION")
print("st.markdown('<div class=\"section-header\">RENAL FUNCTION</div>', unsafe_allow_html=True)")

for _, row in df_results.iterrows():
    feature = row["Feature"]
    if "eGFR" in feature:
        rec_min = row["Rec_Min"]
        rec_max = row["Rec_Max"]
        default = row["Default"]
        label, step = slider_mapping[feature]
        
        print(f'egfr = st.slider("{label}", {rec_min}, {rec_max}, {default}, key="egfr")')

print("\n# METABOLIC & ELECTROLYTES")
print("st.markdown('<div class=\"section-header\">METABOLIC & ELECTROLYTES</div>', unsafe_allow_html=True)")
print("col1, col2, col3 = st.columns(3)")

metab_features = ["glucose_min", "lactate_max", "sodium_max"]
for i, feature in enumerate(metab_features):
    col = f"col{i+1}"
    print(f"with {col}:")
    
    row = df_results[df_results["Feature"] == feature].iloc[0]
    rec_min = row["Rec_Min"]
    rec_max = row["Rec_Max"]
    default = row["Default"]
    label, step = slider_mapping[feature]
    
    var_name = feature.split("_")[0] + "_" + feature.split("_")[1] if len(feature.split("_")) > 1 else feature
    
    if step:
        print(f'    {var_name} = st.slider("{label}", {rec_min}, {rec_max}, {default}, {step}, key="{feature}")')
    else:
        print(f'    {var_name} = st.slider("{label}", {rec_min}, {rec_max}, {default}, key="{feature}")')

# ═══════════════════════════════════════════════════════════════════════════════
# BINARY FEATURES SUMMARY
# ═══════════════════════════════════════════════════════════════════════════════

print("\n" + "="*80)
print("BINARY FEATURES (NO RANGE NEEDED)")
print("="*80)

for feature in binary_features:
    if feature in df_train.columns:
        data = df_train[feature].dropna()
        count_yes = (data == 1).sum()
        count_no = (data == 0).sum()
        pct_yes = (count_yes / len(data)) * 100
        
        print(f"\n{feature}:")
        print(f"  Yes: {count_yes} ({pct_yes:.1f}%)")
        print(f"  No: {count_no} ({100-pct_yes:.1f}%)")

print("\n" + "="*80)
print("ANALYSIS COMPLETE")
print("="*80)
print(f"UTC: 2025-10-20 12:14:44")
print(f"User: zainzampawala786-sudo")