In [1]:
# NB21 — repo root
from pathlib import Path
def find_repo_root(start: Path, must_have=("data","artifacts")) -> Path:
    cur = start.resolve()
    for _ in range(6):
        if all((cur/m).exists() for m in must_have): return cur
        cur = cur.parent
    if start.name.lower()=="notebooks" and all((start.parent/m).exists() for m in must_have):
        return start.parent.resolve()
    raise FileNotFoundError("Repo root not found")
ROOT = find_repo_root(Path.cwd())
print("Repo root →", ROOT)


Repo root → C:\.projects\stock-direction-ml\stock-direction-ml\notebooks


In [2]:
# NB21 — write app/lib_monitor.py
from textwrap import dedent
code = dedent("""
# app/lib_monitor.py
from __future__ import annotations
import json
from pathlib import Path
from typing import Dict, Any, Tuple

def _load_json(p: Path) -> Dict[str, Any]:
    if not p.exists(): return {}
    try:
        return json.loads(p.read_text(encoding="utf-8"))
    except Exception:
        return {}

def load_monitoring(root: Path) -> Tuple[dict, dict, dict]:
    art = root / "artifacts"
    monitor   = _load_json(art / "monitor_snapshot.json")
    paper     = _load_json(art / "paper_trade.json")
    backtest  = _load_json(art / "backtest_summary.json")
    return monitor, paper, backtest

def _get(d: dict, *keys, default=None):
    cur = d
    for k in keys:
        if not isinstance(cur, dict) or k not in cur: return default
        cur = cur[k]
    return cur

def summarize_monitor(m: dict) -> dict:
    # Flexible: pull whatever exists
    return {
        "updated_at":        _get(m, "updated_at") or _get(m, "timestamp"),
        "window_days":       _get(m, "window_days") or _get(m, "config", "window_days"),
        "trades_60d":        _get(m, "kpis", "num_trades_60d") or _get(m, "num_trades_60d"),
        "winrate_60d":       _get(m, "kpis", "winrate_60d") or _get(m, "winrate_60d"),
        "avg_prob_60d":      _get(m, "kpis", "avg_prob_60d") or _get(m, "avg_prob_60d"),
        "avg_ret_60d":       _get(m, "kpis", "avg_ret_60d") or _get(m, "avg_ret_60d"),
        "psi_max":           _max_numeric(_get(m, "drift", "psi_by_feature")),
        "ks_max":            _max_numeric(_get(m, "drift", "ks_by_feature")),
    }

def summarize_paper(p: dict) -> dict:
    return {
        "start":      _get(p, "start") or _get(p, "period", "start"),
        "end":        _get(p, "end")   or _get(p, "period", "end"),
        "final_equity": _get(p, "final_equity") or _get(p, "equity", "final"),
        "num_trades": _get(p, "num_trades") or (len(_get(p, "trades", default=[])) if isinstance(_get(p,"trades"), list) else None),
    }

def summarize_backtest(b: dict) -> dict:
    return {
        "fee_bps":        _get(b, "fee_bps"),
        "auc":            _get(b, "metrics", "auc") or _get(b, "auc"),
        "threshold_tau":  _get(b, "threshold", "tau") or _get(b, "tau"),
        "parity_ok":      bool(_get(b, "parity_ok") or _get(b, "parity", "ok") or _get(b, "matched")),
    }

def _max_numeric(d: dict | None):
    if not isinstance(d, dict): return None
    try:
        vals = [float(v) for v in d.values() if v is not None]
        return max(vals) if vals else None
    except Exception:
        return None

def promotion_checks(mon: dict, back: dict) -> dict:
    # Heuristic rules; only enforce if metric is present.
    rules = []
    status = "PASS"
    # R1: winrate >= 0.52 (if we have it)
    wr = mon.get("winrate_60d")
    if wr is not None:
        ok = wr >= 0.52
        rules.append({"rule":"winrate_60d >= 0.52", "value":wr, "pass":ok})
        if not ok: status = "HOLD"
    # R2: psi_max <= 0.2 (drift)
    psi = mon.get("psi_max")
    if psi is not None:
        ok = psi <= 0.2
        rules.append({"rule":"psi_max <= 0.2", "value":psi, "pass":ok})
        if not ok: status = "HOLD"
    # R3: backtest parity ok (if available)
    parity = back.get("parity_ok")
    if parity is not None:
        ok = bool(parity)
        rules.append({"rule":"backtest parity ok", "value":parity, "pass":ok})
        if not ok: status = "HOLD"
    # If no rules were evaluated, mark as UNKNOWN
    if not rules: status = "UNKNOWN"
    return {"status": status, "rules": rules}
""").lstrip()
out = (ROOT/"app"/"lib_monitor.py")
out.write_text(code, encoding="utf-8")
print("Wrote:", out.resolve())


Wrote: C:\.projects\stock-direction-ml\stock-direction-ml\notebooks\app\lib_monitor.py


In [3]:
# NB21 — overwrite app/streamlit_app.py with Monitoring tab
from textwrap import dedent
code = dedent("""
# app/streamlit_app.py — model & monitoring tabs
import numpy as np, pandas as pd, matplotlib.pyplot as plt, streamlit as st

from app.config import ROOT, DEFAULT_TAU
from app.lib_artifacts import load_artifacts
from app.lib_fetch import load_repo_df, fetch_equity_df, fetch_crypto_df
from app.lib_features import make_dataset
from app.lib_eval import predict_proba, metrics_all, tau_sweep
from app.lib_monitor import load_monitoring, summarize_monitor, summarize_paper, summarize_backtest, promotion_checks

st.set_page_config(page_title="Direction Classifier", layout="wide")
st.title("📈 Direction Classifier — Any Ticker (Equities & Crypto)")

# ---- Sidebar: data source ----
with st.sidebar:
    st.header("Data source")
    src = st.radio("Choose", ["Repo file","Fetch (Yahoo)"], index=0)
    asset_class = st.selectbox("Asset class", ["equity","crypto"], index=0)

    if src == "Repo file":
        df = load_repo_df()
        if "ticker" in df.columns:
            ticks = sorted(df["ticker"].dropna().unique().tolist())
            default = df["ticker"].value_counts().idxmax()
            ticker = st.selectbox("Ticker", ticks, index=max(0, ticks.index(default)))
            df = df.loc[df["ticker"]==ticker].copy()
            st.caption(f"Ticker: **{ticker}**  •  Rows: {len(df)}")
        else:
            ticker = None
            st.caption("No 'ticker' column; using all rows.")
        if "date" in df.columns:
            dmin, dmax = df["date"].min(), df["date"].max()
            start, end = st.date_input("Date range", value=(dmin.date(), dmax.date()),
                                       min_value=dmin.date(), max_value=dmax.date())
            df = df.loc[df["date"].dt.date.between(start, end)].copy()
    else:
        ticker = st.text_input("Ticker", value=("AAPL" if asset_class=="equity" else "BTC-USD"))
        dates = st.date_input("Fetch range (UTC)", value=(pd.to_datetime("2023-01-01").date(), pd.Timestamp.today().date()))
        btn = st.button("Fetch data")
        if not btn:
            st.stop()
        try:
            if asset_class=="equity":
                df = fetch_equity_df(ticker, dates[0], dates[1])
            else:
                df = fetch_crypto_df(ticker, dates[0], dates[1])
            st.success(f"Fetched {len(df)} rows for {ticker}")
        except Exception as e:
            st.error(f"Fetch failed: {e}"); st.stop()

# ---- Artifacts (with crypto folder support for tau_map) ----
try:
    feature_list, scaler, model, tau_art, tau_map = load_artifacts(asset_class=("equity" if asset_class=="equity" else "crypto"))
except Exception:
    st.warning("Artifacts not found for selected asset class; falling back to equity artifacts.")
    feature_list, scaler, model, tau_art, tau_map = load_artifacts("equity")
default_tau = float(tau_map.get(ticker, tau_art if tau_art is not None else DEFAULT_TAU)) if ticker else (tau_art or DEFAULT_TAU)

# ---- Dataset & predictions ----
X, y, retn, idx, used_cols = make_dataset(df, feature_list)
if len(X)==0: st.error("No usable rows after feature alignment/NA drop."); st.stop()
Xs = scaler.transform(X)
p  = np.clip(predict_proba(model, Xs), 1e-6, 1-1e-6)

# ---- Tabs ----
tab_model, tab_monitor = st.tabs(["🔮 Model", "🛡️ Monitoring & Promotion"])

with tab_model:
    with st.sidebar:
        tau     = st.slider("Decision threshold (τ)", 0.00, 1.00, value=float(round(default_tau,2)), step=0.01)
        fee_bps = st.number_input("Fee (bps) per position flip", value=5, min_value=0, max_value=100, step=1)

    # Metrics
    c1,c2,c3,c4 = st.columns(4)
    if y is not None and len(y)==len(p):
        m = metrics_all(y, p)
        c1.metric("ROC AUC", f"{m['auc']:.3f}" if np.isfinite(m['auc']) else "n/a")
        c2.metric("PR AUC",  f"{m['ap']:.3f}" if np.isfinite(m['ap']) else "n/a")
        c3.metric("Brier",    f"{m['brier']:.4f}" if np.isfinite(m['brier']) else "n/a")
        c4.metric("Log Loss", f"{m['logloss']:.4f}" if np.isfinite(m['logloss']) else "n/a")
    else:
        for c in (c1,c2,c3,c4): c.metric("—","—")
        st.info("Labels not available; showing predictions/equity only.")

    # Equity vs B&H
    sig = (p >= tau).astype(int)
    flips = np.zeros_like(sig)
    if len(flips)>1: flips[1:] = (sig[1:] != sig[:-1]).astype(int)
    fee = flips * (fee_bps/10000.0)
    eq  = np.cumprod(1 + (retn*sig - fee))
    bh  = np.cumprod(1 + retn)

    dates_axis = (df.iloc[idx]["date"].values if "date" in df.columns else df.index.values)
    st.subheader("Equity Curve vs. Buy & Hold")
    fig, ax = plt.subplots()
    ax.plot(dates_axis, bh,  label="Buy & Hold")
    ax.plot(dates_axis, eq,  label=f"Strategy (τ={tau:.2f}, fee={fee_bps}bps)")
    ax.set_xlabel("Date" if "date" in df.columns else "Index"); ax.set_ylabel("Equity (×)")
    ax.legend(); st.pyplot(fig)

    # τ-sweep
    with st.expander("τ-sweep (F1 & Final Equity)"):
        if y is not None and len(y)==len(p):
            grid, f1s, finals = tau_sweep(y, p, retn, fee_bps=fee_bps)
            best_f1_tau = float(grid[int(np.nanargmax(f1s))])
            best_eq_tau = float(grid[int(np.nanargmax(finals))])
            st.write({"best_f1_tau":best_f1_tau, "best_final_equity_tau":best_eq_tau})
            f, axf = plt.subplots(); axf.plot(grid, f1s, label="F1 vs τ"); axf.set_xlabel("τ"); axf.set_ylabel("F1"); axf.legend(); st.pyplot(f)
        else:
            st.info("Labels not available; τ-sweep (F1) disabled.")

    # Tail & CSV
    pred_df = pd.DataFrame({"date": dates_axis, "proba": p, "signal": sig})
    if "close" in df.columns: pred_df["close"] = df.iloc[idx]["close"].values
    st.subheader("Latest predictions (tail)")
    st.dataframe(pred_df.tail(min(12, len(pred_df))))
    st.download_button("Download predictions CSV",
        data=pred_df.to_csv(index=False).encode("utf-8"),
        file_name="predictions.csv", mime="text/csv")

with tab_monitor:
    st.subheader("Monitoring snapshot & Promotion readiness")
    mon_raw, paper_raw, back_raw = load_monitoring(ROOT)
    mon = summarize_monitor(mon_raw) if mon_raw else {}
    pap = summarize_paper(paper_raw) if paper_raw else {}
    bak = summarize_backtest(back_raw) if back_raw else {}

    c1,c2,c3,c4 = st.columns(4)
    c1.metric("Winrate (60d)", f"{mon.get('winrate_60d'):.2%}" if isinstance(mon.get('winrate_60d'), (int,float)) else "n/a")
    c2.metric("Trades (60d)",  f"{mon.get('trades_60d')}" if mon.get('trades_60d') is not None else "n/a")
    c3.metric("PSI max",       f"{mon.get('psi_max'):.3f}" if isinstance(mon.get('psi_max'), (int,float)) else "n/a")
    c4.metric("KS max",        f"{mon.get('ks_max'):.3f}" if isinstance(mon.get('ks_max'), (int,float)) else "n/a")

    c5,c6,c7 = st.columns(3)
    c5.metric("Paper final equity", f"{pap.get('final_equity'):.3f}" if isinstance(pap.get('final_equity'), (int,float)) else "n/a")
    c6.metric("Backtest AUC",       f"{bak.get('auc'):.3f}" if isinstance(bak.get('auc'), (int,float)) else "n/a")
    c7.metric("Backtest parity",    "OK" if bak.get('parity_ok') else ("n/a" if bak.get('parity_ok') is None else "FAIL"))

    checks = promotion_checks(mon, bak)
    st.markdown(f"**Promotion status:** {'🟢 PASS' if checks['status']=='PASS' else ('🟡 UNKNOWN' if checks['status']=='UNKNOWN' else '🟠 HOLD')}")
    if checks["rules"]:
        st.write(pd.DataFrame(checks["rules"]))

    with st.expander("Raw artifacts"):
        colA, colB, colC = st.columns(3)
        colA.download_button("monitor_snapshot.json", data=(str(mon_raw).encode('utf-8') if mon_raw else b''), file_name="monitor_snapshot.json")
        colB.download_button("paper_trade.json", data=(str(paper_raw).encode('utf-8') if paper_raw else b''), file_name="paper_trade.json")
        colC.download_button("backtest_summary.json", data=(str(back_raw).encode('utf-8') if back_raw else b''), file_name="backtest_summary.json")

    if not mon_raw:  st.info("No artifacts/monitor_snapshot.json found — NB11 creates it.")
    if not paper_raw: st.info("No artifacts/paper_trade.json found — NB10/NB14 create it.")
    if not back_raw:  st.info("No artifacts/backtest_summary.json found — NB7 writes it.")

st.caption("Monitoring shows 60d KPIs, drift, backtest parity, and a promotion recommendation.")
""").lstrip()
out = (ROOT/"app"/"streamlit_app.py")
out.write_text(code, encoding="utf-8")
print("Wrote:", out.resolve())


Wrote: C:\.projects\stock-direction-ml\stock-direction-ml\notebooks\app\streamlit_app.py


In [4]:
# NB21 — syntax check
for t in ["app/lib_monitor.py", "app/streamlit_app.py"]:
    src = (ROOT/t).read_text(encoding="utf-8")
    compile(src, str(ROOT/t), "exec")
print("Syntax OK")


Syntax OK


In [5]:
# NB21 — presence check for monitoring artifacts
arts = ["artifacts/monitor_snapshot.json", "artifacts/paper_trade.json", "artifacts/backtest_summary.json"]
for a in arts:
    print(f"{a:35s}:", (ROOT/a).exists())


artifacts/monitor_snapshot.json    : True
artifacts/paper_trade.json         : True
artifacts/backtest_summary.json    : True


In [6]:
print("Run the app with the new Monitoring tab:")
print("  streamlit run app/streamlit_app.py")
print("Then open the '🛡️ Monitoring & Promotion' tab.")


Run the app with the new Monitoring tab:
  streamlit run app/streamlit_app.py
Then open the '🛡️ Monitoring & Promotion' tab.
