In [38]:
# NB24 — repo root
from pathlib import Path

def find_repo_root(start: Path, must_have=("app","notebooks")) -> Path:
    cur = start.resolve()
    for _ in range(6):
        if all((cur/m).exists() for m in must_have): return cur
        cur = cur.parent
    if start.name.lower()=="notebooks" and all((start.parent/m).exists() for m in must_have):
        return start.parent.resolve()
    raise FileNotFoundError("Repo root not found")

ROOT = find_repo_root(Path.cwd())
print("Repo root →", ROOT)


Repo root → C:\.projects\stock-direction-ml\stock-direction-ml


In [39]:
# NB24 — write app/api.py (FastAPI + predict_core)
from textwrap import dedent
from pathlib import Path

code = dedent("""
# app/api.py — Direction Classifier FastAPI
from __future__ import annotations
import numpy as np
import pandas as pd
from datetime import date
from typing import Optional

from fastapi import FastAPI
from pydantic import BaseModel

from app.config import ROOT, DEFAULT_TAU
from app.lib_artifacts import load_artifacts
from app.lib_fetch import load_repo_df, fetch_equity_df, fetch_crypto_df
from app.lib_features import make_dataset
from app.lib_eval import predict_proba, metrics_all

__all__ = ["app", "predict_core"]

app = FastAPI(title="Direction Classifier API", version="1.0.0")

class PredictRequest(BaseModel):
    asset_class: str = "equity"   # "equity" or "crypto"
    source: str = "repo"          # "repo" or "fetch"
    ticker: Optional[str] = None
    start: Optional[date] = None
    end: Optional[date] = None
    fee_bps: int = 5
    tau: Optional[float] = None

def _load_artifacts_safe(asset_class: str):
    ac = "equity" if asset_class == "equity" else "crypto"
    try:
        feature_list, scaler, model, tau_art, tau_map = load_artifacts(ac)  # new signature
    except TypeError:
        feature_list, scaler, model, tau_art = load_artifacts(ac)  # type: ignore
        tau_map = {}
    return feature_list, scaler, model, tau_art, (tau_map or {})

def _choose_tau(tau_arg, tau_art, tau_map, ticker):
    if tau_arg is not None:
        try: return float(tau_arg)
        except Exception: pass
    if ticker and isinstance(tau_map, dict) and ticker in tau_map:
        try: return float(tau_map[ticker])
        except Exception: pass
    return float(tau_art if tau_art is not None else DEFAULT_TAU)

def _load_df(source: str, asset_class: str, ticker, start, end) -> pd.DataFrame:
    if source == "repo":
        df = load_repo_df()
        if ticker and "ticker" in df.columns:
            df = df.loc[df["ticker"] == ticker].copy()
        return df
    if not ticker:
        raise ValueError("ticker is required when source='fetch'")
    if asset_class == "equity":
        return fetch_equity_df(ticker, start, end)
    else:
        return fetch_crypto_df(ticker, start, end)

def predict_core(asset_class: str, source: str, ticker: Optional[str], start: Optional[date], end: Optional[date], fee_bps: int = 5, tau: Optional[float] = None):
    # Data
    df = _load_df(source, asset_class, ticker, start, end)
    if "date" in df.columns:
        try: df["date"] = pd.to_datetime(df["date"])
        except Exception: pass
    if ticker is None and "ticker" in df.columns and not df.empty:
        ticker = str(df["ticker"].iloc[0])

    # Artifacts & dataset
    feature_list, scaler, model, tau_art, tau_map = _load_artifacts_safe(asset_class)
    tau_used = _choose_tau(tau, tau_art, tau_map, ticker)

    X, y, retn, idx, used_cols = make_dataset(df, feature_list)
    if len(X) == 0:
        raise ValueError("No usable rows after feature alignment / NA drop.")
    Xs = scaler.transform(X)
    p  = np.clip(predict_proba(model, Xs), 1e-6, 1-1e-6)
    sig = (p >= tau_used).astype(int)

    # Equity summary
    flips = np.zeros_like(sig)
    if len(flips) > 1:
        flips[1:] = (sig[1:] != sig[:-1]).astype(int)
    fee = flips * (fee_bps/10000.0)
    eq  = float(np.cumprod(1 + (retn*sig - fee))[-1])
    bh  = float(np.cumprod(1 + retn)[-1])

    # Predictions frame
    if "date" in df.columns:
        dates = pd.to_datetime(df.iloc[idx]["date"], errors="coerce").astype("datetime64[ns]").dt.strftime("%Y-%m-%d").to_numpy()
    else:
        dates = np.asarray(idx).astype(str)
    if "close" in df.columns:
        close = pd.to_numeric(df.iloc[idx]["close"], errors="coerce").to_numpy()
    else:
        close = np.full(len(idx), np.nan, dtype=float)

    pred_df = pd.DataFrame({
        "date": dates,
        "ticker": ticker if ticker else (df["ticker"].iloc[0] if "ticker" in df.columns else None),
        "close": close,
        "proba": p,
        "signal": sig.astype(int),
        "tau_used": np.full(len(sig), float(tau_used)),
    })

    metrics = {}
    if y is not None and len(y) == len(p):
        try: metrics = metrics_all(y, p)
        except Exception: metrics = {}

    meta = {
        "asset_class": asset_class,
        "ticker": ticker,
        "n": int(len(pred_df)),
        "used_features": list(used_cols) if isinstance(used_cols, (list, tuple)) else used_cols,
        "tau_used": float(tau_used),
        "fee_bps": int(fee_bps),
        "equity_final": eq,
        "buyhold_final": bh,
        "metrics": metrics,
    }
    return pred_df, meta

@app.get("/health")
def health():
    return {"ok": True, "version": app.version}

@app.post("/predict")
def predict(req: PredictRequest):
    df, meta = predict_core(
        asset_class=req.asset_class,
        source=req.source,
        ticker=req.ticker,
        start=req.start,
        end=req.end,
        fee_bps=req.fee_bps,
        tau=req.tau,
    )
    return {"meta": meta, "predictions": df.to_dict(orient="records")}
""").lstrip()

out = (ROOT/"app"/"api.py")
out.write_text(code, encoding="utf-8")
print("Wrote:", out.resolve())


Wrote: C:\.projects\stock-direction-ml\stock-direction-ml\app\api.py


In [40]:
# NB24 — write scripts/predict_cli.py (CLI)
from textwrap import dedent
from pathlib import Path

code = dedent("""
# scripts/predict_cli.py — programmatic predictions via CLI
import argparse
from pathlib import Path
import pandas as pd
from app.api import predict_core  # reuse API core

def main():
    ap = argparse.ArgumentParser(description="Direction Classifier CLI")
    ap.add_argument("--asset", choices=["equity","crypto"], default="equity")
    ap.add_argument("--source", choices=["repo","fetch"], default="repo")
    ap.add_argument("--ticker", type=str, help="Ticker (required for fetch; optional for repo)")
    ap.add_argument("--start", type=str, default=None, help="YYYY-MM-DD (fetch only)")
    ap.add_argument("--end", type=str, default=None, help="YYYY-MM-DD (fetch only)")
    ap.add_argument("--fee-bps", type=int, default=5)
    ap.add_argument("--tau", type=float, default=None)
    ap.add_argument("--out", type=str, default=None, help="Output CSV path (default under reports/demo)")
    args = ap.parse_args()

    start = pd.to_datetime(args.start).date() if args.start else None
    end   = pd.to_datetime(args.end).date() if args.end else None

    df, meta = predict_core(
        asset_class=args.asset,
        source=args.source,
        ticker=args.ticker,
        start=start,
        end=end,
        fee_bps=args.fee_bps,
        tau=args.tau,
    )

    out = Path(args.out) if args.out else (Path("reports")/"demo"/f"preds_{args.asset}_{(args.ticker or 'UNKNOWN').replace('-','')}.csv")
    out.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(out, index=False)
    print("Wrote:", out.resolve())
    print("Meta:", meta)

if __name__ == "__main__":
    main()
""").lstrip()

scripts_dir = (ROOT/"scripts")
scripts_dir.mkdir(exist_ok=True)
out = (scripts_dir/"predict_cli.py")
out.write_text(code, encoding="utf-8")
print("Wrote:", out.resolve())


Wrote: C:\.projects\stock-direction-ml\stock-direction-ml\scripts\predict_cli.py


In [41]:
# NB24 — ensure 'app' is a package and on sys.path
import sys
pkg_root = ROOT
app_pkg = pkg_root / "app"
(app_pkg / "__init__.py").touch(exist_ok=True)
if str(pkg_root) not in sys.path:
    sys.path.insert(0, str(pkg_root))
print("sys.path OK & app/__init__.py present")


sys.path OK & app/__init__.py present


In [42]:
# NB24 — update notebooks/requirements.txt to include fastapi & uvicorn
req = (ROOT/"notebooks"/"requirements.txt")
lines = req.read_text(encoding="utf-8").splitlines() if req.exists() else []

def ensure(pkg: str):
    base = pkg.split("[")[0].lower()
    if not any(l.strip().lower().startswith(base) for l in lines):
        lines.append(pkg)

ensure("fastapi")
ensure("uvicorn[standard]")
req.write_text("\n".join(lines) + "\n", encoding="utf-8")
print("Updated:", req.resolve())
print(req.read_text(encoding="utf-8"))


Updated: C:\.projects\stock-direction-ml\stock-direction-ml\notebooks\requirements.txt
streamlit
scikit-learn==1.7.2
pandas==2.3.2
numpy==2.3.3
joblib==1.5.2
matplotlib==3.10.6
pyarrow==21.0.0
yfinance
fastapi
uvicorn[standard]



In [43]:
# NB24 — install API deps into THIS kernel
import sys, subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "fastapi", "uvicorn[standard]"])
print("Installed: fastapi, uvicorn[standard]")


Installed: fastapi, uvicorn[standard]


In [44]:
# NB24 — Cell 7a: force-rewrite app/api.py and import it directly from path
import sys, types, importlib.util, importlib
from pathlib import Path
from textwrap import dedent

app_dir  = ROOT / "app"
api_path = app_dir / "api.py"
(app_dir / "__init__.py").touch(exist_ok=True)

# Canonical API code (includes predict_core)
api_code = dedent("""
from __future__ import annotations
import numpy as np
import pandas as pd
from datetime import date
from typing import Optional

from fastapi import FastAPI
from pydantic import BaseModel

from app.config import ROOT, DEFAULT_TAU
from app.lib_artifacts import load_artifacts
from app.lib_fetch import load_repo_df, fetch_equity_df, fetch_crypto_df
from app.lib_features import make_dataset
from app.lib_eval import predict_proba, metrics_all

__all__ = ["app", "predict_core"]

app = FastAPI(title="Direction Classifier API", version="1.0.0")

class PredictRequest(BaseModel):
    asset_class: str = "equity"   # "equity" or "crypto"
    source: str = "repo"          # "repo" or "fetch"
    ticker: Optional[str] = None
    start: Optional[date] = None
    end: Optional[date] = None
    fee_bps: int = 5
    tau: Optional[float] = None

def _load_artifacts_safe(asset_class: str):
    ac = "equity" if asset_class == "equity" else "crypto"
    try:
        feature_list, scaler, model, tau_art, tau_map = load_artifacts(ac)
    except TypeError:
        feature_list, scaler, model, tau_art = load_artifacts(ac)  # type: ignore
        tau_map = {}
    return feature_list, scaler, model, tau_art, (tau_map or {})

def _choose_tau(tau_arg, tau_art, tau_map, ticker):
    if tau_arg is not None:
        try: return float(tau_arg)
        except Exception: pass
    if ticker and isinstance(tau_map, dict) and ticker in tau_map:
        try: return float(tau_map[ticker])
        except Exception: pass
    return float(tau_art if tau_art is not None else DEFAULT_TAU)

def _load_df(source: str, asset_class: str, ticker, start, end) -> pd.DataFrame:
    if source == "repo":
        df = load_repo_df()
        if ticker and "ticker" in df.columns:
            df = df.loc[df["ticker"] == ticker].copy()
        return df
    if not ticker:
        raise ValueError("ticker is required when source='fetch'")
    if asset_class == "equity":
        return fetch_equity_df(ticker, start, end)
    else:
        return fetch_crypto_df(ticker, start, end)

def predict_core(asset_class: str, source: str, ticker: Optional[str], start: Optional[date], end: Optional[date], fee_bps: int = 5, tau: Optional[float] = None):
    df = _load_df(source, asset_class, ticker, start, end)
    if "date" in df.columns:
        try: df["date"] = pd.to_datetime(df["date"])
        except Exception: pass
    if ticker is None and "ticker" in df.columns and not df.empty:
        ticker = str(df["ticker"].iloc[0])

    feature_list, scaler, model, tau_art, tau_map = _load_artifacts_safe(asset_class)
    tau_used = _choose_tau(tau, tau_art, tau_map, ticker)

    X, y, retn, idx, used_cols = make_dataset(df, feature_list)
    if len(X) == 0:
        raise ValueError("No usable rows after feature alignment / NA drop.")
    Xs = scaler.transform(X)
    p  = np.clip(predict_proba(model, Xs), 1e-6, 1-1e-6)
    sig = (p >= tau_used).astype(int)

    flips = np.zeros_like(sig)
    if len(flips) > 1:
        flips[1:] = (sig[1:] != sig[:-1]).astype(int)
    fee = flips * (fee_bps/10000.0)
    eq  = float(np.cumprod(1 + (retn*sig - fee))[-1])
    bh  = float(np.cumprod(1 + retn)[-1])

    if "date" in df.columns:
        dates = pd.to_datetime(df.iloc[idx]["date"], errors="coerce").astype("datetime64[ns]").dt.strftime("%Y-%m-%d").to_numpy()
    else:
        dates = np.asarray(idx).astype(str)
    if "close" in df.columns:
        close = pd.to_numeric(df.iloc[idx]["close"], errors="coerce").to_numpy()
    else:
        close = np.full(len(idx), np.nan, dtype=float)

    pred_df = pd.DataFrame({
        "date": dates,
        "ticker": ticker if ticker else (df["ticker"].iloc[0] if "ticker" in df.columns else None),
        "close": close,
        "proba": p,
        "signal": sig.astype(int),
        "tau_used": np.full(len(sig), float(tau_used)),
    })

    metrics = {}
    if y is not None and len(y) == len(p):
        try: metrics = metrics_all(y, p)
        except Exception: metrics = {}

    meta = {
        "asset_class": asset_class,
        "ticker": ticker,
        "n": int(len(pred_df)),
        "used_features": list(used_cols) if isinstance(used_cols, (list, tuple)) else used_cols,
        "tau_used": float(tau_used),
        "fee_bps": int(fee_bps),
        "equity_final": eq,
        "buyhold_final": bh,
        "metrics": metrics,
    }
    return pred_df, meta

app = FastAPI(title="Direction Classifier API", version="1.0.0")

@app.get("/health")
def health():
    return {"ok": True, "version": app.version}

class PredictRequest(BaseModel):
    asset_class: str = "equity"
    source: str = "repo"
    ticker: Optional[str] = None
    start: Optional[date] = None
    end: Optional[date] = None
    fee_bps: int = 5
    tau: Optional[float] = None

@app.post("/predict")
def predict(req: PredictRequest):
    df, meta = predict_core(
        asset_class=req.asset_class,
        source=req.source,
        ticker=req.ticker,
        start=req.start,
        end=req.end,
        fee_bps=req.fee_bps,
        tau=req.tau,
    )
    return {"meta": meta, "predictions": df.to_dict(orient="records")}
""").lstrip()

api_path.write_text(api_code, encoding="utf-8")
print("Rewrote:", api_path.resolve())
print("predict_core present?", "def predict_core(" in api_code)

# Purge cached module, then import from file path
sys.modules.pop("app.api", None)

# Ensure 'app' package is registered (with correct path)
if "app" not in sys.modules or not getattr(sys.modules["app"], "__path__", None):
    pkg = types.ModuleType("app")
    pkg.__path__ = [str(app_dir)]
    sys.modules["app"] = pkg

spec = importlib.util.spec_from_file_location("app.api", str(api_path))
api = importlib.util.module_from_spec(spec)
sys.modules["app.api"] = api
assert spec.loader is not None
spec.loader.exec_module(api)
print("Loaded:", getattr(api, "__file__", "<no-file>"))
print("Has predict_core?", hasattr(api, "predict_core"))


Rewrote: C:\.projects\stock-direction-ml\stock-direction-ml\app\api.py
predict_core present? True
Loaded: C:\.projects\stock-direction-ml\stock-direction-ml\app\api.py
Has predict_core? True


In [45]:
# NB24 — Cell 7b: smoke test (equity/repo) using the fresh api module
import importlib, importlib.util, sys
from pathlib import Path

# Load lib_fetch reliably too
fetch_path = (ROOT/"app"/"lib_fetch.py")
try:
    fetch = importlib.import_module("app.lib_fetch")
except ModuleNotFoundError:
    spec = importlib.util.spec_from_file_location("app.lib_fetch", str(fetch_path))
    fetch = importlib.util.module_from_spec(spec)
    sys.modules["app.lib_fetch"] = fetch
    assert spec.loader is not None
    spec.loader.exec_module(fetch)

df_repo = fetch.load_repo_df()
tick = "AAPL" if "ticker" in df_repo.columns and "AAPL" in set(df_repo["ticker"]) else (
    str(df_repo["ticker"].value_counts().idxmax()) if "ticker" in df_repo.columns else None
)

pred_df, meta = api.predict_core(
    asset_class="equity", source="repo", ticker=tick,
    start=None, end=None, fee_bps=5, tau=None
)
print("Rows:", len(pred_df), "| ticker:", tick, "| tau_used:", meta.get("tau_used"))
print(pred_df.tail(3))


Rows: 2686 | ticker: AAPL | tau_used: 0.4499999999999999
            date ticker       close     proba  signal  tau_used
2683  2025-10-08   AAPL  258.059998  0.528984       1      0.45
2684  2025-10-09   AAPL  254.039993  0.561558       1      0.45
2685  2025-10-10   AAPL  245.270004  0.565084       1      0.45


In [46]:
# NB24 — run instructions
print("Run API (from REPO ROOT):")
print("  python -m pip install -r notebooks/requirements.txt")
print("  uvicorn app.api:app --reload --port=8000")
print()
print("Test with curl (POST /predict):")
print('  curl -X POST "http://127.0.0.1:8000/predict" \\')
print('       -H "Content-Type: application/json" \\')
print('       -d "{\\"asset_class\\":\\"equity\\",\\"source\\":\\"repo\\",\\"ticker\\":\\"AAPL\\",\\"fee_bps\\":5}"')
print()
print("CLI examples (from REPO ROOT):")
print("  python scripts/predict_cli.py --asset equity --source repo --ticker AAPL --out reports/demo/preds_equity_AAPL_cli.csv")
print("  python scripts/predict_cli.py --asset crypto --source fetch --ticker BTC-USD --start 2025-01-01 --end 2025-10-22 --out reports/demo/preds_crypto_BTCUSD_cli.csv")


Run API (from REPO ROOT):
  python -m pip install -r notebooks/requirements.txt
  uvicorn app.api:app --reload --port=8000

Test with curl (POST /predict):
  curl -X POST "http://127.0.0.1:8000/predict" \
       -H "Content-Type: application/json" \
       -d "{\"asset_class\":\"equity\",\"source\":\"repo\",\"ticker\":\"AAPL\",\"fee_bps\":5}"

CLI examples (from REPO ROOT):
  python scripts/predict_cli.py --asset equity --source repo --ticker AAPL --out reports/demo/preds_equity_AAPL_cli.csv
  python scripts/predict_cli.py --asset crypto --source fetch --ticker BTC-USD --start 2025-01-01 --end 2025-10-22 --out reports/demo/preds_crypto_BTCUSD_cli.csv
