In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Make sure the project folders exist
import os, glob
BASE = "/content/drive/MyDrive/heartriskx"
MODEL_DIR = f"{BASE}/models/final"
OUT_DIRS = [
    f"{BASE}/outputs/day6", f"{BASE}/outputs/day7", f"{BASE}/outputs/day8",
    f"{BASE}/outputs/day9", f"{BASE}/outputs/day10", f"{BASE}/outputs/day11/day11_shap",
]
for d in [BASE, MODEL_DIR, *OUT_DIRS]:
    os.makedirs(d, exist_ok=True)

# Quick sanity about bundles (just to warn if missing)
expected = {"heart2020_bundle.json","cardio_bundle.json","uci_bundle.json"}
present  = set(os.path.basename(p) for p in glob.glob(f"{MODEL_DIR}/*_bundle.json"))
print("MODEL_DIR:", MODEL_DIR)
print("Bundles found:", present)
if not expected.issubset(present):
    print("‚ö†Ô∏è Missing bundles in", MODEL_DIR, "‚Üí", expected - present)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
MODEL_DIR: /content/drive/MyDrive/heartriskx/models/final
Bundles found: {'cardio_bundle.json', 'heart2020_bundle.json', 'uci_bundle.json'}


In [None]:
import os, textwrap

APP_PATH = "/content/drive/MyDrive/heartriskx/app.py"

APP_CODE = r"""
import os, json, joblib, numpy as np, pandas as pd
import streamlit as st

# ---------- Config ----------
MODEL_DIR  = "/content/drive/MyDrive/heartriskx/models/final"
OUT_DIRS   = [
    "/content/drive/MyDrive/heartriskx/outputs/day6",
    "/content/drive/MyDrive/heartriskx/outputs/day7",
    "/content/drive/MyDrive/heartriskx/outputs/day8",
    "/content/drive/MyDrive/heartriskx/outputs/day9",
    "/content/drive/MyDrive/heartriskx/outputs/day10",
    "/content/drive/MyDrive/heartriskx/outputs/day11/day11_shap",
]
BUNDLES = {
    "Heart2020": "heart2020_bundle.json",
    "Cardio":    "cardio_bundle.json",
    "UCI":       "uci_bundle.json",
}

st.set_page_config(page_title="HeartRiskX", layout="wide")

# ---------- Utils ----------
@st.cache_resource(show_spinner=False)
def load_bundle(bundle_path):
    with open(bundle_path, "r") as f:
        info = json.load(f)
    pipe = joblib.load(info["pipeline_path"])
    thr  = info.get("threshold", 0.5)
    return pipe, thr, info

def get_cols_by_role(prep):
    num_cols, cat_cols = [], []
    try:
        for name, transformer, cols in prep.transformers_:
            if name == "num":
                num_cols += list(cols)
            elif name == "cat":
                cat_cols += list(cols)
        # dedupe keep-order
        seen = set(); uniq = []
        for x in num_cols + cat_cols:
            if x not in seen:
                uniq.append(x); seen.add(x)
        return num_cols, cat_cols
    except Exception:
        return None, None

def expected_raw_cols(prep):
    nc, cc = get_cols_by_role(prep)
    if nc is None: return None
    return list(nc) + list(cc)

def align_like_pipeline(pipe, X):
    #Align incoming X to the raw schema expected by the pipeline ColumnTransformer
    prep = pipe.named_steps.get("prep", None)
    if prep is None: return X.copy()
    exp = expected_raw_cols(prep)
    if exp is None: return X.copy()

    X2 = X.copy()
    num_cols, cat_cols = get_cols_by_role(prep)

    for c in num_cols:
        if c not in X2.columns: X2[c] = 0.0
    for c in cat_cols:
        if c not in X2.columns: X2[c] = "missing"

    for c in num_cols:
        X2[c] = pd.to_numeric(X2[c], errors="coerce").fillna(0.0)
    for c in cat_cols:
        X2[c] = X2[c].astype("string").fillna("missing")

    X2 = X2.reindex(columns=exp)
    return X2

def predict_with_threshold(pipe, thr, X_df):
    X_aligned = align_like_pipeline(pipe, X_df)
    proba = pipe.predict_proba(X_aligned)[:, 1]
    pred  = (proba >= thr).astype(int)
    out   = X_df.copy()
    out["risk_proba"] = proba
    out["risk_label"] = pred
    return out

# ---------- Sidebar ----------
st.sidebar.title("HeartRiskX")
st.sidebar.code(MODEL_DIR)

choice = st.sidebar.selectbox("Choose dataset/model", list(BUNDLES.keys()))
bundle_path = os.path.join(MODEL_DIR, BUNDLES[choice])

try:
    pipe, thr, meta = load_bundle(bundle_path)
    st.sidebar.success(f"Loaded {os.path.basename(bundle_path)} | thr={thr:.3f}")
except Exception as e:
    st.sidebar.error(f"Failed to load bundle: {e}")
    st.stop()

# ---------- Tabs ----------
tab_pred, tab_batch, tab_visuals, tab_about = st.tabs(
    ["üîÆ Single Predict", "üì¶ Batch CSV", "üñºÔ∏è Visuals", "‚ÑπÔ∏è About"]
)

# ---------- Single Predict ----------
with tab_pred:
    st.header(f"Single Prediction ‚Äî {choice}")
    examples = {
        "Cardio": {
            "age": 17505, "gender": 2, "height": 170, "weight": 70,
            "ap_hi": 120, "ap_lo": 80, "cholesterol": 1, "gluc": 1,
            "smoke": 0, "alco": 0, "active": 1
        },
        "Heart2020": {
            "BMI": 27.5, "Smoking": "No", "AlcoholDrinking": "No", "Stroke": "No",
            "PhysicalHealth": 0.0, "MentalHealth": 0.0, "DiffWalking": "No", "Sex": "Female",
            "AgeCategory": "60-64", "Race": "White", "Diabetic": "No",
            "PhysicalActivity": "Yes", "GenHealth": "Good", "SleepTime": 7.0,
            "Asthma": "No", "KidneyDisease": "No", "SkinCancer": "No"
        },
        "UCI": {
            "age": 54, "sex": 1, "cp": 3, "trestbps": 130, "chol": 246, "fbs": 0,
            "restecg": 1, "thalach": 150, "exang": 0, "oldpeak": 1.0, "slope": 2, "ca": 0, "thal": 3
        }
    }
    example = json.dumps(examples.get(choice, {}), indent=2)
    txt = st.text_area("Record JSON", value=example, height=240)
    if st.button("Predict"):
        try:
            rec = json.loads(txt); X = pd.DataFrame([rec])
            out = predict_with_threshold(pipe, thr, X)
            st.success("Done.")
            st.write(out)
            st.metric("Risk probability", f"{float(out['risk_proba'].iloc[0]):.3f}")
            st.metric("Risk label", int(out['risk_label'].iloc[0]))
        except Exception as e:
            st.error(f"Prediction failed: {e}")

# ---------- Batch ----------
with tab_batch:
    st.header(f"Batch CSV ‚Äî {choice}")
    up = st.file_uploader("Upload CSV with RAW training columns", type=["csv"])
    if up is not None and st.button("Run batch prediction"):
        try:
            df = pd.read_csv(up)
            out = predict_with_threshold(pipe, thr, df)
            st.dataframe(out.head(20))
            st.download_button("Download predictions.csv",
                               out.to_csv(index=False).encode("utf-8"),
                               file_name="predictions.csv", mime="text/csv")
        except Exception as e:
            st.error(f"Batch failed: {e}")

# ---------- Visuals ----------
with tab_visuals:
    st.header("Saved Visuals")
    valid = {".png",".jpg",".jpeg",".gif"}
    imgs = []
    for d in OUT_DIRS:
        if os.path.isdir(d):
            for root,_,files in os.walk(d):
                for f in files:
                    if os.path.splitext(f)[1].lower() in valid:
                        imgs.append(os.path.join(root,f))
    imgs = sorted(imgs)
    if not imgs:
        st.info("No plots found. Run Day 6‚Äì11 to generate SHAP/ROC/PR/PDP images.")
    else:
        n = 3
        for i in range(0, len(imgs), n):
            cols = st.columns(n)
            for c, p in zip(cols, imgs[i:i+n]):
                try: c.image(p, caption=os.path.basename(p), use_container_width=True)
                except Exception as e: c.warning(f"Could not render {os.path.basename(p)}: {e}")

# ---------- About ----------
# ---------- About ----------
with tab_about:
    st.header("About")
    st.markdown(
        "**HeartRiskX** ‚Äî Multi-dataset heart risk demo (Heart2020, Cardio, UCI).  \n"
        "Uses saved sklearn pipelines + thresholds. This app aligns incoming raw columns "
        "to the pipeline‚Äôs expected schema so you can score JSON or CSV quickly.  \n"
        "The Visuals tab auto-loads your PNGs from Day 6‚Äì11 outputs."
    )

"""

with open(APP_PATH, "w") as f:
    f.write(textwrap.dedent(APP_CODE))

print("‚úÖ Wrote", APP_PATH)


‚úÖ Wrote /content/drive/MyDrive/heartriskx/app.py


In [None]:
# ---- Install Cloudflare tunnel binary (Colab-safe) ----
import os, subprocess, sys, textwrap

# Download the static linux binary and put it on PATH
!curl -L https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 \
  -o /usr/local/bin/cloudflared
!chmod +x /usr/local/bin/cloudflared
!cloudflared --version


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0 39.3M    0  1413    0     0   7186      0  1:35:35 --:--:--  1:35:35  7186
curl: (23) Failure writing output to destination
cloudflared version 2025.10.0 (built 2025-10-14-19:01 UTC)


In [None]:
import os, subprocess, time, re

APP   = "/content/drive/MyDrive/heartriskx/app.py"
PORT  = 8501

if not os.path.exists(APP):
    raise FileNotFoundError(f"{APP} not found. Run Cell B to write the file first.")

# Clean only our own processes (do NOT kill 'node')
for p in ("cloudflared", "streamlit"):
    try: subprocess.call(["pkill", "-f", p])
    except: pass

# Install (fast if already cached)
!pip -q install -U streamlit cloudflared

os.environ["STREAMLIT_BROWSER_GATHERUSAGESTATS"] = "false"

# Start Streamlit
st_proc = subprocess.Popen(
    ["streamlit","run",APP,"--server.port",str(PORT),"--server.address","0.0.0.0","--server.headless","true"],
    stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
)

time.sleep(3)

# Start Cloudflare tunnel
cf_proc = subprocess.Popen(
    ["cloudflared","tunnel","--url",f"http://localhost:{PORT}","--no-autoupdate"],
    stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
)

public_url = None
deadline = time.time() + 90
while time.time() < deadline:
    line = cf_proc.stdout.readline()
    if not line: time.sleep(0.2); continue
    m = re.search(r"(https://[a-zA-Z0-9\-]+\.trycloudflare\.com)", line)
    if m: public_url = m.group(1); break

if not public_url:
    print("‚ùå Could not obtain public URL. Check tunnel logs above.")
else:
    print("\nüåê PUBLIC URL:", public_url)
    print("Tip: A first 502 is normal‚Äîwait ~10‚Äì15s and refresh once.")



üåê PUBLIC URL: https://boulevard-prince-lime-rachel.trycloudflare.com
Tip: A first 502 is normal‚Äîwait ~10‚Äì15s and refresh once.
