In [1]:
# NB18 — Repo root autodetect
from pathlib import Path

def find_repo_root(start: Path, must_have=("data", "artifacts")) -> Path:
    cur = start.resolve()
    for _ in range(6):
        if all((cur / m).exists() for m in must_have):
            return cur
        cur = cur.parent
    if start.name.lower() == "notebooks" and all((start.parent / m).exists() for m in must_have):
        return start.parent.resolve()
    raise FileNotFoundError(f"Could not locate repo root containing {must_have} starting at {start}")

CWD = Path.cwd()
ROOT = find_repo_root(CWD)
print("Repo root →", ROOT)
print("Has data?      ", (ROOT/"data").exists())
print("Has artifacts? ", (ROOT/"artifacts").exists())


Repo root → C:\.projects\stock-direction-ml\stock-direction-ml\notebooks
Has data?       True
Has artifacts?  True


In [2]:
# NB18 — Ensure app/ package exists
from pathlib import Path
APP = ROOT / "app"
APP.mkdir(parents=True, exist_ok=True)
init_path = APP / "__init__.py"
if not init_path.exists():
    init_path.write_text("# package marker\n", encoding="utf-8")
print("app/ exists:", APP.exists(), "| __init__.py:", init_path.exists())


app/ exists: True | __init__.py: True


In [3]:
# NB18 — Write app/config.py
from textwrap import dedent
code = dedent("""
# app/config.py
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]  # repo root

# Artifacts
ART_DIR_EQUITY = ROOT / "artifacts"
ART_DIR_CRYPTO = ROOT / "artifacts_crypto"  # will be created in NB20

# Optional per-ticker thresholds (equities)
TAU_MAP_PATH = ART_DIR_EQUITY / "tau_map.json"

# Defaults
DEFAULT_TAU = 0.59

# Local data
DATA_CSV = ROOT / "data" / "df_nb02.csv"
DATA_PQ  = ROOT / "data" / "df_nb02.parquet"
""").lstrip()
out = (ROOT/"app"/"config.py")
out.write_text(code, encoding="utf-8")
print("Wrote:", out.resolve())


Wrote: C:\.projects\stock-direction-ml\stock-direction-ml\notebooks\app\config.py


In [4]:
# NB18 — Write app/lib_artifacts.py
from textwrap import dedent
code = dedent("""
# app/lib_artifacts.py
import json, joblib
from .config import ART_DIR_EQUITY, ART_DIR_CRYPTO, DEFAULT_TAU, TAU_MAP_PATH

def _safe_tau(v, default):
    try: return float(v)
    except Exception: return default

def load_artifacts(asset_class="equity"):
    art = ART_DIR_EQUITY if asset_class == "equity" else ART_DIR_CRYPTO
    feature_list = json.loads((art/"feature_list.json").read_text(encoding="utf-8"))
    scaler = joblib.load(art/"scaler.joblib")
    model  = joblib.load(art/"lr.joblib")

    tau_art = DEFAULT_TAU
    tfile = art / "threshold.json"
    if tfile.exists():
        try:
            t = json.loads(tfile.read_text(encoding="utf-8"))
            tau_art = _safe_tau(t.get("tau") or t.get("threshold") or t.get("value"), DEFAULT_TAU)
        except Exception:
            pass

    tau_map = {}
    if asset_class == "equity" and TAU_MAP_PATH.exists():
        try:
            tau_map = json.loads(TAU_MAP_PATH.read_text(encoding="utf-8"))
        except Exception:
            tau_map = {}

    return feature_list, scaler, model, tau_art, tau_map
""").lstrip()
out = (ROOT/"app"/"lib_artifacts.py")
out.write_text(code, encoding="utf-8")
print("Wrote:", out.resolve())


Wrote: C:\.projects\stock-direction-ml\stock-direction-ml\notebooks\app\lib_artifacts.py


In [5]:
# NB18 — Write app/lib_features.py
from textwrap import dedent
code = dedent("""
# app/lib_features.py
import math, numpy as np, pandas as pd

def ema(s, span): return s.ewm(span=span, adjust=False).mean()

def rsi_wilder(close, length=14):
    d = close.diff()
    gain = d.clip(lower=0).ewm(alpha=1/length, adjust=False).mean()
    loss = (-d.clip(upper=0)).ewm(alpha=1/length, adjust=False).mean()
    rs = gain / (loss + 1e-12)
    return 100 - (100/(1+rs))

def macd_and_signal(close, fast=12, slow=26, sig=9):
    macd = ema(close, fast) - ema(close, slow)
    signal = ema(macd, sig)
    return macd, signal

def add_nb02_features(df):
    df = df.copy()
    ret1  = df["close"].pct_change(1)
    ret5  = df["close"].pct_change(5)
    ret10 = df["close"].pct_change(10)
    vol10 = ret1.rolling(10).std()
    volz  = (vol10 - vol10.rolling(100).mean()) / (vol10.rolling(100).std() + 1e-12)
    rsi14 = rsi_wilder(df["close"], 14)
    macd, macd_signal = macd_and_signal(df["close"], 12, 26, 9)

    df["ret1"]=ret1; df["ret5"]=ret5; df["ret10"]=ret10
    df["vol10"]=vol10; df["volz"]=volz
    df["rsi14"]=rsi14; df["macd"]=macd; df["macd_signal"]=macd_signal
    return df

def add_market_features_equity(df, spy_series, vix_series):
    m = df.merge(spy_series, on="date", how="left").merge(vix_series, on="date", how="left")
    m["mkt_ret1"] = m["spy_close"].pct_change(1)
    m["mkt_ret5"] = m["spy_close"].pct_change(5)
    m["vix_chg1"] = m["vix_close"].pct_change(1)
    return m

def add_market_features_crypto(df, bench_close):
    m = df.merge(bench_close, on="date", how="left").rename(columns={"btc_close":"spy_close"})
    m["mkt_ret1"]  = m["spy_close"].pct_change(1)
    m["mkt_ret5"]  = m["spy_close"].pct_change(5)
    btc_ret = m["spy_close"].pct_change()
    vix_proxy = btc_ret.rolling(30).std() * math.sqrt(365) * 100.0
    m["vix_close"] = vix_proxy
    m["vix_chg1"]  = m["vix_close"].pct_change(1)
    return m

def infer_target(df):
    for c in ["y","label","target","y_bin","direction","is_up","class","cls"]:
        if c in df.columns: return df[c].astype(int).clip(0,1).values, c
    if "ret_next" in df.columns:
        y = (df["ret_next"].astype(float) > 0).astype(int).values
        return y, "ret_next>0"
    if "close" in df.columns:
        rn = df["close"].astype(float).pct_change().shift(-1).fillna(0.0)
        df["ret_next"] = rn
        return (rn > 0).astype(int).values, "ret_next_from_close>0"
    return None, None

def make_dataset(df, feature_list):
    cols = [c for c in feature_list if c in df.columns]
    if not cols: raise ValueError("No overlap between feature_list.json and data columns.")
    tmp = df[cols].replace([np.inf,-np.inf], np.nan)
    y_vals, _ = infer_target(df)
    retn = df["ret_next"].astype(float).values if "ret_next" in df.columns else np.zeros(len(df))
    tmp["__y__"] = y_vals if y_vals is not None else np.nan
    tmp["__ret_next__"] = retn
    tmp = tmp.dropna()
    X = tmp[cols].to_numpy()
    y = (tmp["__y__"].astype(int).to_numpy() if y_vals is not None else None)
    retn = tmp["__ret_next__"].astype(float).to_numpy()
    idx = tmp.index
    return X, y, retn, idx, cols
""").lstrip()
out = (ROOT/"app"/"lib_features.py")
out.write_text(code, encoding="utf-8")
print("Wrote:", out.resolve())


Wrote: C:\.projects\stock-direction-ml\stock-direction-ml\notebooks\app\lib_features.py


In [6]:
# NB18 — Write app/lib_fetch.py
from textwrap import dedent
code = dedent("""
# app/lib_fetch.py
import pandas as pd
from .config import DATA_CSV, DATA_PQ
from .lib_features import add_nb02_features

def ensure_date(df):
    for c in ["date","Date","timestamp","ts"]:
        if c in df.columns:
            try: df[c] = pd.to_datetime(df[c])
            except: pass
            if c != "date": df["date"] = df[c]
            return df
    return df

def load_repo_df():
    if DATA_CSV.exists(): df = pd.read_csv(DATA_CSV)
    elif DATA_PQ.exists(): df = pd.read_parquet(DATA_PQ)
    else: raise FileNotFoundError("Missing data/df_nb02.csv or .parquet")
    return ensure_date(df)

def fetch_equity_df(ticker, start, end):
    import yfinance as yf
    px = yf.download(ticker, start=start, end=end, auto_adjust=True, progress=False)
    if px.empty: raise ValueError(f"No data for {ticker}")
    df = px.rename_axis("date").reset_index()
    df["date"]=pd.to_datetime(df["date"])
    df = df.rename(columns={"Open":"open","High":"high","Low":"low","Close":"close","Volume":"volume"})
    df = df[["date","open","high","low","close","volume"]]; df["ticker"]=ticker
    df = add_nb02_features(df)

    spy = yf.download("SPY", start=start, end=end, auto_adjust=True, progress=False).rename_axis("date").reset_index()[["date","Close"]].rename(columns={"Close":"spy_close"})
    vix = yf.download("^VIX", start=start, end=end, progress=False).rename_axis("date").reset_index()[["date","Close"]].rename(columns={"Close":"vix_close"})
    spy["date"]=pd.to_datetime(spy["date"]); vix["date"]=pd.to_datetime(vix["date"])

    df = df.merge(spy,on="date",how="left").merge(vix,on="date",how="left")
    df["mkt_ret1"]=df["spy_close"].pct_change(1); df["mkt_ret5"]=df["spy_close"].pct_change(5)
    df["vix_chg1"]=df["vix_close"].pct_change(1)
    df["ret_next"]=df["close"].pct_change().shift(-1); df["y"]=(df["ret_next"]>0).astype(int)
    return df

def fetch_crypto_df(ticker, start, end):
    import yfinance as yf
    px = yf.download(ticker, start=start, end=end, auto_adjust=True, progress=False)
    if px.empty: raise ValueError(f"No data for {ticker}")
    df = px.rename_axis("date").reset_index()
    df["date"]=pd.to_datetime(df["date"])
    df = df.rename(columns={"Open":"open","High":"high","Low":"low","Close":"close","Volume":"volume"})
    df = df[["date","open","high","low","close","volume"]]; df["ticker"]=ticker
    df = add_nb02_features(df)

    btc = yf.download("BTC-USD", start=start, end=end, auto_adjust=True, progress=False).rename_axis("date").reset_index()[["date","Close"]].rename(columns={"Close":"btc_close"})
    btc["date"]=pd.to_datetime(btc["date"])
    df = df.merge(btc,on="date",how="left").rename(columns={"btc_close":"spy_close"})
    df["mkt_ret1"]=df["spy_close"].pct_change(1); df["mkt_ret5"]=df["spy_close"].pct_change(5)

    ret = df["spy_close"].pct_change()
    vix_proxy = ret.rolling(30).std() * (365 ** 0.5) * 100.0
    df["vix_close"]=vix_proxy; df["vix_chg1"]=df["vix_close"].pct_change(1)

    df["ret_next"]=df["close"].pct_change().shift(-1); df["y"]=(df["ret_next"]>0).astype(int)
    return df
""").lstrip()
out = (ROOT/"app"/"lib_fetch.py")
out.write_text(code, encoding="utf-8")
print("Wrote:", out.resolve())


Wrote: C:\.projects\stock-direction-ml\stock-direction-ml\notebooks\app\lib_fetch.py


In [7]:
# NB18 — Write app/lib_eval.py
from textwrap import dedent
code = dedent("""
# app/lib_eval.py
import numpy as np
from sklearn.metrics import (
    roc_auc_score, average_precision_score, brier_score_loss, log_loss, f1_score
)

def predict_proba(model, X):
    if hasattr(model,"predict_proba"):
        p = model.predict_proba(X)
        return p[:,1] if p.ndim==2 else p
    if hasattr(model,"decision_function"):
        s = model.decision_function(X); return 1/(1+np.exp(-s))
    return np.clip(model.predict(X).astype(float), 0, 1)

def metrics_all(y, p):
    def safe(fn,*a):
        try: return float(fn(*a))
        except: return float("nan")
    return dict(
        auc     = safe(roc_auc_score, y, p),
        ap      = safe(average_precision_score, y, p),
        brier   = safe(brier_score_loss, y, p),
        logloss = safe(log_loss, y, p),
    )

def tau_sweep(y, p, retn, fee_bps=5, grid=None):
    import numpy as np
    if grid is None: grid = np.linspace(0.05, 0.95, 91)
    f1s, finals = [], []
    for t in grid:
        sig = (p >= t).astype(int)
        f1s.append(_safe_f1(y, sig))
        finals.append(_final_equity(retn, sig, fee_bps))
    return grid, np.array(f1s), np.array(finals)

def _safe_f1(y, sig):
    try: return f1_score(y, sig)
    except: return float("nan")

def _final_equity(retn, sig, fee_bps):
    import numpy as np
    flips = np.zeros_like(sig)
    if len(flips)>1: flips[1:] = (sig[1:] != sig[:-1]).astype(int)
    fee = flips * (fee_bps/10000.0)
    eq  = np.cumprod(1 + (retn*sig - fee))
    return float(eq[-1]) if len(eq) else float("nan")
""").lstrip()
out = (ROOT/"app"/"lib_eval.py")
out.write_text(code, encoding="utf-8")
print("Wrote:", out.resolve())


Wrote: C:\.projects\stock-direction-ml\stock-direction-ml\notebooks\app\lib_eval.py


In [8]:
# NB18 — Write modular Streamlit app (Repo/Fetch, Equity/Crypto, τ-map)
from textwrap import dedent
code = dedent("""
# app/streamlit_app.py — modular, fetch-enabled
import numpy as np, pandas as pd, matplotlib.pyplot as plt, streamlit as st
from app.config import ROOT, DEFAULT_TAU
from app.lib_artifacts import load_artifacts
from app.lib_fetch import load_repo_df, fetch_equity_df, fetch_crypto_df
from app.lib_features import make_dataset
from app.lib_eval import predict_proba, metrics_all, tau_sweep

st.set_page_config(page_title="Direction Classifier", layout="wide")
st.title("📈 Direction Classifier — Any Ticker (Equities & Crypto)")

# ---- Sidebar: data source ----
with st.sidebar:
    st.header("Data source")
    src = st.radio("Choose", ["Repo file","Fetch (Yahoo)"], index=0)
    asset_class = st.selectbox("Asset class", ["equity","crypto"], index=0)

    if src == "Repo file":
        df = load_repo_df()
        if "ticker" in df.columns:
            ticks = sorted(df["ticker"].dropna().unique().tolist())
            default = df["ticker"].value_counts().idxmax()
            ticker = st.selectbox("Ticker", ticks, index=max(0, ticks.index(default)))
            df = df.loc[df["ticker"]==ticker].copy()
            st.caption(f"Ticker: **{ticker}**  •  Rows: {len(df)}")
        else:
            ticker = None
            st.caption("No 'ticker' column; using all rows.")
        if "date" in df.columns:
            dmin, dmax = df["date"].min(), df["date"].max()
            start, end = st.date_input("Date range", value=(dmin.date(), dmax.date()),
                                       min_value=dmin.date(), max_value=dmax.date())
            df = df.loc[df["date"].dt.date.between(start, end)].copy()
    else:
        ticker = st.text_input("Ticker", value=("AAPL" if asset_class=="equity" else "BTC-USD"))
        dates = st.date_input("Fetch range (UTC)", value=(pd.to_datetime("2023-01-01").date(), pd.Timestamp.today().date()))
        btn = st.button("Fetch data")
        if not btn:
            st.stop()
        try:
            if asset_class=="equity":
                df = fetch_equity_df(ticker, dates[0], dates[1])
            else:
                df = fetch_crypto_df(ticker, dates[0], dates[1])
            st.success(f"Fetched {len(df)} rows for {ticker}")
        except Exception as e:
            st.error(f"Fetch failed: {e}"); st.stop()

# ---- Artifacts ----
feature_list, scaler, model, tau_art, tau_map = load_artifacts(asset_class=("equity" if asset_class=="equity" else "crypto"))
default_tau = float(tau_map.get(ticker, tau_art if tau_art is not None else DEFAULT_TAU)) if ticker else (tau_art or DEFAULT_TAU)

# ---- Dataset ----
X, y, retn, idx, used_cols = make_dataset(df, feature_list)
if len(X)==0: st.error("No usable rows after feature alignment/NA drop."); st.stop()
Xs = scaler.transform(X)
p  = np.clip(predict_proba(model, Xs), 1e-6, 1-1e-6)

with st.sidebar:
    tau     = st.slider("Decision threshold (τ)", 0.00, 1.00, value=float(round(default_tau,2)), step=0.01)
    fee_bps = st.number_input("Fee (bps) per position flip", value=5, min_value=0, max_value=100, step=1)

# ---- Metrics ----
c1,c2,c3,c4 = st.columns(4)
if y is not None and len(y)==len(p):
    m = metrics_all(y, p)
    c1.metric("ROC AUC", f"{m['auc']:.3f}" if np.isfinite(m['auc']) else "n/a")
    c2.metric("PR AUC",  f"{m['ap']:.3f}" if np.isfinite(m['ap']) else "n/a")
    c3.metric("Brier",    f"{m['brier']:.4f}" if np.isfinite(m['brier']) else "n/a")
    c4.metric("Log Loss", f"{m['logloss']:.4f}" if np.isfinite(m['logloss']) else "n/a")
else:
    for c in (c1,c2,c3,c4): c.metric("—","—")
    st.info("Labels not available for this selection; showing predictions/equity only.")

# ---- Equity vs B&H ----
sig = (p >= tau).astype(int)
flips = np.zeros_like(sig)
if len(flips)>1: flips[1:] = (sig[1:] != sig[:-1]).astype(int)
fee = flips * (fee_bps/10000.0)
eq  = np.cumprod(1 + (retn*sig - fee))
bh  = np.cumprod(1 + retn)

dates_axis = (df.iloc[idx]["date"].values if "date" in df.columns else df.index.values)
st.subheader("Equity Curve vs. Buy & Hold")
fig, ax = plt.subplots()
ax.plot(dates_axis, bh,  label="Buy & Hold")
ax.plot(dates_axis, eq,  label=f"Strategy (τ={tau:.2f}, fee={fee_bps}bps)")
ax.set_xlabel("Date" if "date" in df.columns else "Index"); ax.set_ylabel("Equity (×)")
ax.legend(); st.pyplot(fig)

# ---- τ-sweep ----
with st.expander("τ-sweep (F1 & Final Equity)"):
    if y is not None and len(y)==len(p):
        grid, f1s, finals = tau_sweep(y, p, retn, fee_bps=fee_bps)
        best_f1_tau = float(grid[int(np.nanargmax(f1s))])
        best_eq_tau = float(grid[int(np.nanargmax(finals))])
        st.write({"best_f1_tau":best_f1_tau, "best_final_equity_tau":best_eq_tau})
        f, axf = plt.subplots(); axf.plot(grid, f1s, label="F1 vs τ"); axf.set_xlabel("τ"); axf.set_ylabel("F1"); axf.legend(); st.pyplot(f)
    else:
        st.info("Labels not available; τ-sweep (F1) disabled.")

# ---- Tail & CSV ----
pred_df = pd.DataFrame({"date": dates_axis, "proba": p, "signal": sig})
if "close" in df.columns: pred_df["close"] = df.iloc[idx]["close"].values
st.subheader("Latest predictions (tail)")
st.dataframe(pred_df.tail(min(12, len(pred_df))))
st.download_button("Download predictions CSV",
    data=pred_df.to_csv(index=False).encode("utf-8"),
    file_name="predictions.csv", mime="text/csv")

st.caption("This UI runs your trained LR on repo data or live Yahoo fetch. Crypto uses BTC-based proxies. Not financial advice.")
""").lstrip()
out = (ROOT/"app"/"streamlit_app.py")
out.write_text(code, encoding="utf-8")
print("Wrote:", out.resolve())


Wrote: C:\.projects\stock-direction-ml\stock-direction-ml\notebooks\app\streamlit_app.py


In [9]:
# NB18 — Verify files
targets = [
    "app/__init__.py",
    "app/config.py",
    "app/lib_artifacts.py",
    "app/lib_features.py",
    "app/lib_fetch.py",
    "app/lib_eval.py",
    "app/streamlit_app.py",
]
for t in targets:
    print(f"{t:28s}:", (ROOT / t).exists())


app/__init__.py             : True
app/config.py               : True
app/lib_artifacts.py        : True
app/lib_features.py         : True
app/lib_fetch.py            : True
app/lib_eval.py             : True
app/streamlit_app.py        : True


In [10]:
# NB18 — Syntax check modules & app
for t in ["app/config.py","app/lib_artifacts.py","app/lib_features.py","app/lib_fetch.py","app/lib_eval.py","app/streamlit_app.py"]:
    src = (ROOT/t).read_text(encoding="utf-8")
    compile(src, str(ROOT/t), "exec")
print("Syntax OK")


Syntax OK


In [11]:
# NB18 — Ensure yfinance in requirements.txt
req = (ROOT/"requirements.txt")
lines = []
if req.exists():
    lines = [ln.strip() for ln in req.read_text(encoding="utf-8").splitlines() if ln.strip()]
if not any(ln.lower().startswith("yfinance") for ln in lines):
    lines.append("yfinance")
    req.write_text("\n".join(lines) + "\n", encoding="utf-8")
print("Updated:", req.resolve())
print(req.read_text(encoding="utf-8"))


Updated: C:\.projects\stock-direction-ml\stock-direction-ml\notebooks\requirements.txt
streamlit
scikit-learn==1.7.2
pandas==2.3.2
numpy==2.3.3
joblib==1.5.2
matplotlib==3.10.6
pyarrow==21.0.0
yfinance



In [12]:
# NB18 — How to run
print("Local:")
print("  streamlit run app/streamlit_app.py")
print("\nCloud:")
print("  Main file path → app/streamlit_app.py")


Local:
  streamlit run app/streamlit_app.py

Cloud:
  Main file path → app/streamlit_app.py
