In [28]:
# NB12 — Auto-Retrain & Promotion

import json, os, shutil
from pathlib import Path
from datetime import datetime, timezone

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, average_precision_score, brier_score_loss, log_loss
from joblib import dump, load

# Optional XGBoost (baseline-only; promotion still based on LR unless you flip a flag)
try:
    from xgboost import XGBClassifier
    HAS_XGB = True
except Exception:
    HAS_XGB = False

DATA = Path("data"); ART = Path("artifacts"); FIG = Path("reports/figures")
for p in [DATA, ART, FIG]: p.mkdir(parents=True, exist_ok=True)

# Retrain policy
SEED        = 42
FEE_BPS     = 5.0             # used in Sharpe-based threshold tuning
TARGET_COV  = 0.20            # coverage target if you prefer quantile threshold
SPLIT       = (0.60, 0.20, 0.20)  # train/val/test (time-ordered)
PROMOTE_RULE = dict(
    min_auc_gain = 0.00,      # require >= this AUC gain vs prod on TEST
    min_sharpe   = 0.00       # require new Sharpe >= this on TEST (>=0.0 means non-negative)
)

RUN_ID = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H%M%SZ")
print("NB12 config → run_id:", RUN_ID, "| fee_bps:", FEE_BPS, "| target_cov:", TARGET_COV)


NB12 config → run_id: 2025-10-23T004239Z | fee_bps: 5.0 | target_cov: 0.2


In [29]:
# Load df (CSV/Parquet) and features from ART
df_path = DATA/"df_nb02.parquet" if (DATA/"df_nb02.parquet").exists() else DATA/"df_nb02.csv"
assert df_path.exists(), "df_nb02 not found — run earlier notebooks"
df = pd.read_parquet(df_path) if df_path.suffix==".parquet" else pd.read_csv(df_path, parse_dates=["date"])
df["date"] = pd.to_datetime(df["date"], errors="coerce")

feat_cols = json.loads((ART/"feature_list.json").read_text(encoding="utf-8"))
feat_cols = [c for c in feat_cols if c in df.columns]  # intersect
assert feat_cols, "feature_list.json produced empty feature set against df"

# Label: use existing y or derive next-day up
if "y" not in df.columns:
    if "ret1" in df.columns:
        df["y"] = (df["ret1"].shift(-1) > 0).astype(int)
    elif "close" in df.columns:
        df["y"] = (df["close"].pct_change().shift(-1) > 0).astype(int)
    else:
        raise RuntimeError("No label present and cannot derive from prices.")

df = df.sort_values("date").dropna(subset=["date"]).reset_index(drop=True)

n = len(df); n_tr = int(SPLIT[0]*n); n_va = int(SPLIT[1]*n)
df_tr = df.iloc[:n_tr].copy()
df_va = df.iloc[n_tr:n_tr+n_va].copy()
df_te = df.iloc[n_tr+n_va:].copy()

X_tr, y_tr = df_tr[feat_cols].to_numpy(), df_tr["y"].astype(int).to_numpy()
X_va, y_va = df_va[feat_cols].to_numpy(), df_va["y"].astype(int).to_numpy()
X_te, y_te = df_te[feat_cols].to_numpy(), df_te["y"].astype(int).to_numpy()

scaler = StandardScaler().fit(X_tr)
Xs_tr = scaler.transform(X_tr)
Xs_va = scaler.transform(X_va)
Xs_te = scaler.transform(X_te)

print("Rows/Features:", n, "/", len(feat_cols))
print("Splits:", df_tr.shape, df_va.shape, df_te.shape)
print("Pos rate (train/val/test):", y_tr.mean().round(3), y_va.mean().round(3), y_te.mean().round(3))
print("Span:", df["date"].min().date(), "→", df["date"].max().date())


Rows/Features: 2686 / 16
Splits: (1611, 22) (537, 22) (538, 22)
Pos rate (train/val/test): 0.529 0.514 0.548
Span: 2015-02-06 → 2025-10-10


In [30]:
def metric_block(y, p):
    p = np.clip(np.asarray(p), 1e-15, 1-1e-15)
    return dict(
        AUC=float(roc_auc_score(y, p)) if len(np.unique(y))>1 else np.nan,
        PR_AUC=float(average_precision_score(y, p)),
        Brier=float(brier_score_loss(y, p)),
        LogLoss=float(log_loss(y, p)),
        PosRate=float(np.mean(y)),
    )

candidates = {}

# Logistic Regression (standardized)
lr = LogisticRegression(max_iter=2000, class_weight="balanced", random_state=SEED)
lr.fit(Xs_tr, y_tr)
p_va_lr = lr.predict_proba(Xs_va)[:,1]
p_te_lr = lr.predict_proba(Xs_te)[:,1]
candidates["LR"] = dict(model=lr, scaler=scaler, p_va=p_va_lr, p_te=p_te_lr, std=True)

# Optional XGB (often without standardization)
if HAS_XGB:
    xgb = XGBClassifier(
        n_estimators=400, max_depth=4, learning_rate=0.05,
        subsample=0.8, colsample_bytree=0.8, reg_lambda=1.0,
        random_state=SEED, n_jobs=0, objective="binary:logistic", eval_metric="logloss"
    )
    xgb.fit(X_tr, y_tr)
    p_va_xgb = xgb.predict_proba(X_va)[:,1]
    p_te_xgb = xgb.predict_proba(X_te)[:,1]
    candidates["XGB"] = dict(model=xgb, scaler=None, p_va=p_va_xgb, p_te=p_te_xgb, std=False)

rows = []
for name, d in candidates.items():
    rows += [
        dict(model=name, split="val", **metric_block(y_va, d["p_va"])),
        dict(model=name, split="test", **metric_block(y_te, d["p_te"])),
    ]
valtest = pd.DataFrame(rows)
display(valtest)


Unnamed: 0,model,split,AUC,PR_AUC,Brier,LogLoss,PosRate
0,LR,val,0.458104,0.487851,0.262018,0.718572,0.513966
1,LR,test,0.47918,0.520515,0.25771,0.708665,0.548327
2,XGB,val,0.532317,0.554066,0.282554,0.78234,0.513966
3,XGB,test,0.461924,0.526903,0.300178,0.810264,0.548327


In [31]:
FEE = FEE_BPS/10000.0

def sharpe_from(p, ret_next, thr):
    pos = (p >= thr).astype(int)
    flips = np.abs(np.diff(np.r_[0, pos]))>0
    r = pos*ret_next - flips*FEE
    r = pd.Series(r).fillna(0.0)
    if r.std()==0: return 0.0
    return (r.mean()/r.std()) * np.sqrt(252)

def tune_thr_on_val(p_val, ret_next_val, grid=None, target_cov=None):
    if target_cov is not None:
        q = 1.0 - float(target_cov)
        return float(np.quantile(p_val, q))
    if grid is None:
        grid = np.linspace(0.05, 0.95, 181)
    scores = [(t, sharpe_from(p_val, ret_next_val, t)) for t in grid]
    return max(scores, key=lambda x: x[1])[0]

# Build ret_next on val/test from close if missing
def next_returns(df_):
    if "ret_next" in df_.columns:
        return df_["ret_next"].to_numpy()
    if "close" in df_.columns:
        return pd.Series(df_["close"]).pct_change().shift(-1).to_numpy()
    raise KeyError("Need ret_next or close to compute returns.")

ret_va = next_returns(df_va)
ret_te = next_returns(df_te)

bt_rows = []
for name, d in candidates.items():
    thr = tune_thr_on_val(d["p_va"], ret_va, target_cov=None if TARGET_COV is None else TARGET_COV)
    pos_te = (d["p_te"] >= thr).astype(int)
    flips  = (np.abs(np.diff(np.r_[0, pos_te]))>0)
    r_te   = pos_te*ret_te - flips*FEE
    r_te   = pd.Series(r_te).fillna(0.0)
    eq     = (1 + r_te).cumprod()
    cagr   = (1 + r_te).prod() ** (252/max(len(r_te),1)) - 1
    vol    = r_te.std() * np.sqrt(252)
    sharpe = (cagr/vol) if vol>0 else np.nan
    mdd    = (eq/eq.cummax() - 1).min()
    bt_rows.append(dict(
        model=name, thr=float(thr),
        Sharpe=float(sharpe), CAGR=float(cagr), total_return=float(eq.iloc[-1]-1),
        max_drawdown=float(mdd), vol_annual=float(vol),
        AUC_test=float(roc_auc_score(y_te, d["p_te"])) if len(np.unique(y_te))>1 else np.nan
    ))

bt = pd.DataFrame(bt_rows)
display(bt.sort_values("Sharpe", ascending=False))


Unnamed: 0,model,thr,Sharpe,CAGR,total_return,max_drawdown,vol_annual,AUC_test
0,LR,0.559691,-0.835022,-0.074592,-0.152531,-0.187996,0.08933,0.47918
1,XGB,0.561802,-0.906763,-0.119593,-0.23809,-0.315987,0.13189,0.461924


In [32]:
# Load current prod model/scaler & compute TEST AUC + backtest
prod_lr = load(ART/"lr.joblib")
prod_scaler = load(ART/"scaler.joblib") if (ART/"scaler.joblib").exists() else None
prod_thr = None
thr_json_p = ART/"threshold.json"
if thr_json_p.exists():
    tj = json.loads(thr_json_p.read_text(encoding="utf-8"))
    if "threshold" in tj: prod_thr = float(tj["threshold"])
    elif "LR" in tj and isinstance(tj["LR"], dict) and "tau" in tj["LR"]: prod_thr = float(tj["LR"]["tau"])
    elif "tau" in tj: prod_thr = float(tj["tau"])

X_te_prod = prod_scaler.transform(X_te) if prod_scaler is not None else X_te
p_te_prod = prod_lr.predict_proba(X_te_prod)[:,1]
auc_prod  = float(roc_auc_score(y_te, p_te_prod)) if len(np.unique(y_te))>1 else np.nan

# Backtest with prod threshold (fallback: quantile @ 1-TARGET_COV)
thr_prod = prod_thr if prod_thr is not None else float(np.quantile(p_te_prod, 1.0 - TARGET_COV))
pos_te   = (p_te_prod >= thr_prod).astype(int)
flips    = (np.abs(np.diff(np.r_[0, pos_te]))>0)
r_te     = pos_te*ret_te - flips*(FEE_BPS/10000.0)
r_te     = pd.Series(r_te).fillna(0.0)
eq       = (1+r_te).cumprod()
cagr_p   = (1+r_te).prod() ** (252/max(len(r_te),1)) - 1
vol_p    = r_te.std()*np.sqrt(252)
shp_p    = (cagr_p/vol_p) if vol_p>0 else np.nan
mdd_p    = (eq/eq.cummax() - 1).min()

prod_metrics = dict(model="PROD", thr=float(thr_prod), AUC_test=auc_prod,
                    Sharpe=float(shp_p), CAGR=float(cagr_p), total_return=float(eq.iloc[-1]-1),
                    max_drawdown=float(mdd_p), vol_annual=float(vol_p))
print("Production metrics (recomputed on current TEST):")
prod_metrics


Production metrics (recomputed on current TEST):


{'model': 'PROD',
 'thr': 0.59,
 'AUC_test': 0.47899839575922437,
 'Sharpe': -0.8414327300269396,
 'CAGR': -0.06994977516750267,
 'total_return': -0.1434284062079053,
 'max_drawdown': -0.16464420453018258,
 'vol_annual': 0.08313174977786178}

In [33]:
# Pick best candidate by Sharpe (you can choose AUC instead)
new_best = bt.sort_values(["Sharpe","AUC_test"], ascending=[False, False]).head(1).iloc[0].to_dict()

promote = True
# AUC guardrail
if not (np.isnan(new_best["AUC_test"]) or np.isnan(prod_metrics["AUC_test"])):
    if new_best["AUC_test"] < prod_metrics["AUC_test"] + PROMOTE_RULE["min_auc_gain"]:
        promote = False
# Sharpe guardrail
if not np.isnan(new_best["Sharpe"]):
    if new_best["Sharpe"] < PROMOTE_RULE["min_sharpe"]:
        promote = False

decision = dict(promote=promote, reason="meets_guardrails" if promote else "fails_guardrails",
                new=new_best, prod=prod_metrics)
decision


{'promote': False,
 'reason': 'fails_guardrails',
 'new': {'model': 'LR',
  'thr': 0.5596911486813771,
  'Sharpe': -0.8350221076586054,
  'CAGR': -0.0745922147354845,
  'total_return': -0.15253075843443376,
  'max_drawdown': -0.18799590989508974,
  'vol_annual': 0.08932962858269754,
  'AUC_test': 0.47917974471646785},
 'prod': {'model': 'PROD',
  'thr': 0.59,
  'AUC_test': 0.47899839575922437,
  'Sharpe': -0.8414327300269396,
  'CAGR': -0.06994977516750267,
  'total_return': -0.1434284062079053,
  'max_drawdown': -0.16464420453018258,
  'vol_annual': 0.08313174977786178}}

In [34]:
if decision["promote"]:
    name = list(candidates.keys())[0] if decision["new"]["model"]=="LR" else decision["new"]["model"]
    cand = candidates[decision["new"]["model"]]
    model = cand["model"]; sc = cand["scaler"]

    # Versioned filenames
    vtag = RUN_ID
    lr_new = ART/f"lr_{vtag}.joblib"
    sc_new = ART/f"scaler_{vtag}.joblib"
    thr_new = ART/f"threshold_{vtag}.json"
    bs_new  = ART/f"backtest_summary_{vtag}.json"

    dump(model, lr_new)
    if sc is not None:
        dump(sc, sc_new)
    json.dump({"threshold": float(decision["new"]["thr"]), "policy": "SharpeValOptim"},
              open(thr_new,"w",encoding="utf-8"), indent=2)
    json.dump({
        "fee_bps": FEE_BPS, "slippage_bps": 0.0, "allow_short": False,
        "models": {decision["new"]["model"]: {
            "tau": float(decision["new"]["thr"]), "Sharpe": float(decision["new"]["Sharpe"]),
            "CAGR": float(decision["new"]["CAGR"]), "max_drawdown": float(decision["new"]["max_drawdown"]),
            "total_return": float(decision["new"]["total_return"]), "vol_annual": float(decision["new"]["vol_annual"]),
            "AUC_test": float(decision["new"]["AUC_test"])
        }}
    }, open(bs_new,"w",encoding="utf-8"), indent=2)

    # Atomically update "current" pointers
    shutil.copy2(lr_new, ART/"lr.joblib")
    if sc is not None:
        shutil.copy2(sc_new, ART/"scaler.joblib")
    shutil.copy2(thr_new, ART/"threshold.json")
    shutil.copy2(bs_new, ART/"backtest_summary.json")

    # Update registry
    reg_p = ART/"model_registry.json"
    if reg_p.exists():
        reg = json.loads(reg_p.read_text(encoding="utf-8"))
        if not isinstance(reg, list): reg = [reg]
    else:
        reg = []
    reg.append({
        "version": vtag,
        "promoted_utc": RUN_ID,
        "model": decision["new"]["model"],
        "n_features": len(feat_cols),
        "thr": float(decision["new"]["thr"]),
        "metrics_test": {
            "AUC": float(decision["new"]["AUC_test"]),
            "Sharpe": float(decision["new"]["Sharpe"]),
            "CAGR": float(decision["new"]["CAGR"]),
            "total_return": float(decision["new"]["total_return"]),
            "max_drawdown": float(decision["new"]["max_drawdown"]),
            "vol_annual": float(decision["new"]["vol_annual"]),
        }
    })
    reg_p.write_text(json.dumps(reg, indent=2), encoding="utf-8")

    print("PROMOTED ✅  → Updated lr.joblib / scaler.joblib / threshold.json / backtest_summary.json")
else:
    print("NOT PROMOTED ⚠️  → Kept current production artifacts.")


NOT PROMOTED ⚠️  → Kept current production artifacts.


In [35]:
print("=== NB12 Verifier ===")
need = [ART/"lr.joblib", ART/"threshold.json", ART/"backtest_summary.json"]
exists = {p.name: (p.exists() and p.stat().st_size > 0) for p in need}
print("Artifacts exist:", exists)

# Show who is active now
thr = json.loads((ART/"threshold.json").read_text(encoding="utf-8"))
bs  = json.loads((ART/"backtest_summary.json").read_text(encoding="utf-8"))
active_tau = thr.get("threshold") or thr.get("tau") or thr.get("LR",{}).get("tau")

show = dict(
    active_tau=active_tau,
    model_keys=list(bs.get("models", {}).keys()),
    fee_bps=bs.get("fee_bps"),
)
print(show)
print("VERDICT:", "PASS ✅" if all(exists.values()) else "CHECK ⚠️")


=== NB12 Verifier ===
Artifacts exist: {'lr.joblib': True, 'threshold.json': True, 'backtest_summary.json': True}
{'active_tau': 0.59, 'model_keys': ['LR'], 'fee_bps': 5.0}
VERDICT: PASS ✅


In [36]:
# NB12 — archive decision (even when NOT PROMOTED)
import json, pandas as pd
from pathlib import Path
from datetime import datetime, timezone

ART, DATA = Path("artifacts"), Path("data")
ART.mkdir(exist_ok=True, parents=True); DATA.mkdir(exist_ok=True, parents=True)

# Load prior decision object from Cell 6 if still in memory; else read a minimal one
try:
    _decision = decision  # from Cell 6
except NameError:
    _decision = {"promote": False, "reason": "unknown"}

entry = {
    "ts_utc": datetime.now(timezone.utc).isoformat(),
    "promote": bool(_decision.get("promote", False)),
    "reason": _decision.get("reason"),
    "new": _decision.get("new", {}),
    "prod": _decision.get("prod", {}),
}

# Append to a CSV log
log_p = DATA/"promotion_decisions.csv"
pd.DataFrame([{
    "ts_utc": entry["ts_utc"],
    "promote": entry["promote"],
    "reason": entry["reason"],
    "new_auc": entry["new"].get("AUC_test"),
    "new_sharpe": entry["new"].get("Sharpe"),
    "new_thr": entry["new"].get("thr"),
    "prod_auc": entry["prod"].get("AUC_test"),
    "prod_sharpe": entry["prod"].get("Sharpe"),
    "prod_thr": entry["prod"].get("thr"),
}]).to_csv(log_p, mode="a", header=not log_p.exists(), index=False)

# Also write a JSON snapshot
snap_p = ART/f"promotion_decision_{entry['ts_utc'].replace(':','-')}.json"
snap_p.write_text(json.dumps(entry, indent=2), encoding="utf-8")

print("Logged decision →", log_p.as_posix())
print("Snapshot       →", snap_p.as_posix())


Logged decision → data/promotion_decisions.csv
Snapshot       → artifacts/promotion_decision_2025-10-23T00-42-40.234682+00-00.json


In [37]:
# NB12 — verify promotion decision log & latest snapshot
from pathlib import Path
import json, pandas as pd

ART, DATA = Path("artifacts"), Path("data")
log_p = DATA/"promotion_decisions.csv"

print("Log exists:", log_p.exists(), "| size:", log_p.stat().st_size if log_p.exists() else 0)
if log_p.exists():
    log = pd.read_csv(log_p)
    print("Log rows:", len(log))
    display(log.tail(5))
    print("\nPromote counts:", log["promote"].value_counts(dropna=False).to_dict())

# Show latest JSON snapshot
snaps = list(ART.glob("promotion_decision_*.json"))
if snaps:
    latest = max(snaps, key=lambda p: p.stat().st_mtime)
    print("\nLatest snapshot:", latest.name)
    snap = json.loads(latest.read_text(encoding="utf-8"))
    print(snap)
else:
    print("\nNo JSON snapshots found.")


Log exists: True | size: 563
Log rows: 3


Unnamed: 0,ts_utc,promote,reason,new_auc,new_sharpe,new_thr,prod_auc,prod_sharpe,prod_thr
0,2025-10-23T00:39:11.135467+00:00,False,fails_guardrails,0.47918,-0.835022,0.559691,0.478998,-0.841433,0.59
1,2025-10-23T00:41:22.778234+00:00,False,fails_guardrails,0.47918,-0.835022,0.559691,0.478998,-0.841433,0.59
2,2025-10-23T00:42:40.234682+00:00,False,fails_guardrails,0.47918,-0.835022,0.559691,0.478998,-0.841433,0.59



Promote counts: {False: 3}

Latest snapshot: promotion_decision_2025-10-23T00-42-40.234682+00-00.json
{'ts_utc': '2025-10-23T00:42:40.234682+00:00', 'promote': False, 'reason': 'fails_guardrails', 'new': {'model': 'LR', 'thr': 0.5596911486813771, 'Sharpe': -0.8350221076586054, 'CAGR': -0.0745922147354845, 'total_return': -0.15253075843443376, 'max_drawdown': -0.18799590989508974, 'vol_annual': 0.08932962858269754, 'AUC_test': 0.47917974471646785}, 'prod': {'model': 'PROD', 'thr': 0.59, 'AUC_test': 0.47899839575922437, 'Sharpe': -0.8414327300269396, 'CAGR': -0.06994977516750267, 'total_return': -0.1434284062079053, 'max_drawdown': -0.16464420453018258, 'vol_annual': 0.08313174977786178}}
