<a href="https://colab.research.google.com/github/viki-m13/SPX-Iron-Condor-Edge-vs-Baseline-Model-band-1-/blob/main/SPX_Iron_Condor_(Max_Edge).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#0.40%

In [None]:
#!/usr/bin/env python3
# SPX Iron Condor — Optimize filters for MAX EDGE VS BASELINE with band ≤ 1.0%
# - Objective: maximize (hit_rate - baseline_hit_rate)
# - Searches over band_pct grid (upper bound 1%)
# - Keeps only parameter sets with >= 2000 signals (instances)
# - Data: yfinance (^GSPC, ^VIX), daily
# - Prints rich diagnostics similar to your previous version
# pip install yfinance pandas numpy

import warnings, builtins, json, itertools
from dataclasses import dataclass
from typing import Tuple, Dict, List

import numpy as np
import pandas as pd
import yfinance as yf

warnings.filterwarnings("ignore")

# ---------------- Config ----------------
@dataclass
class Config:
    ticker: str = "^GSPC"         # SPX proxy
    vix_ticker: str = "^VIX"
    start: str = "2010-01-01"
    end: str = None               # latest
    interval: str = "1d"

    # This is now SEARCHED; keep here only for defaults when building the "best" frame
    band_pct: float = 0.01

    last_n: int = 10              # how many past trades to print in the report
    min_signals: int = 1       # reject param sets with fewer signals than this

    # Grids to search (you can widen/narrow)
    grid_band_pct: List[float] = None
    grid_avgabs_window: List[int] = None
    grid_avgabs_max: List[float] = None
    grid_vix_max: List[float] = None
    grid_sma_n: List[int] = None
    grid_use_trend: List[bool] = None  # whether to require close > SMA(n)

CFG = Config(
    # Band grid (≤ 1%); feel free to adjust granularity
    grid_band_pct=[0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01],

    grid_avgabs_window=[3,4,5,6,7,10],
    grid_avgabs_max=[0.0035,0.004,0.0045,0.005,0.0055,0.006,0.0065,0.007,0.0075],
    grid_vix_max=[12,13,14,15,16,17,18],
    grid_sma_n=[5,8,10,12,15,20],
    grid_use_trend=[True, False],
)

# --------------- Data -------------------
def _flatten_cols(df: pd.DataFrame) -> pd.DataFrame:
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = [builtins.str(c[0]).lower() for c in df.columns]
    else:
        df.columns = [builtins.str(c).lower() for c in df.columns]
    return df

def load_yf_ohlc(ticker: str, start: str, end: str=None, interval: str="1d") -> pd.DataFrame:
    df = yf.download(ticker, start=start, end=end, interval=interval,
                     auto_adjust=True, progress=False, group_by="column")
    if df is None or df.empty:
        raise SystemExit(f"No data for {ticker}.")
    df = _flatten_cols(df)
    need = ["open","high","low","close"]
    for c in need:
        if c not in df.columns:
            raise SystemExit(f"Missing {c} in {ticker}.")
    return df[need].dropna().copy()

def load_market(cfg: Config) -> Tuple[pd.DataFrame, pd.Series]:
    px  = load_yf_ohlc(cfg.ticker, cfg.start, cfg.end, cfg.interval)
    vix = load_yf_ohlc(cfg.vix_ticker, cfg.start, cfg.end, cfg.interval)["close"].rename("vix")
    vix = vix.reindex(px.index).ffill()
    return px, vix

# ----------- Core builder -----------
def build_frame(px: pd.DataFrame, vix: pd.Series, band_pct: float,
                avgabs_window: int, avgabs_max: float,
                vix_max: float, sma_n: int, use_trend: bool) -> pd.DataFrame:
    close = px["close"]
    ret = close.pct_change()
    avgabs = ret.abs().rolling(avgabs_window).mean()
    sma = close.rolling(sma_n).mean()

    df = pd.DataFrame(index=px.index)
    df["close"] = close
    df["ret"] = ret
    df["avgabs"] = avgabs
    df["vix"] = vix
    df["above_sma"] = (close > sma).astype(int)

    # Conditions at t (saved to enable ablations)
    df["cond_low_vol_t"] = (df["avgabs"] < avgabs_max).astype(int)
    df["cond_vix_t"]     = (df["vix"] < vix_max).astype(int)
    df["cond_trend_t"]   = ((df["above_sma"] == 1) if use_trend else (df["above_sma"] == df["above_sma"])).astype(int)

    # Signal at t (for t+1)
    df["signal_t"] = (df["cond_low_vol_t"] & df["cond_vix_t"] & df["cond_trend_t"]).astype(int)

    # Predicted band (built at t for t+1)
    df["band_low_t"]  = df["close"] * (1.0 - band_pct)
    df["band_high_t"] = df["close"] * (1.0 + band_pct)

    # Align to next day (no index.freq needed)
    df["pred_for_t1"]      = df["signal_t"].shift(1).fillna(0).astype(int)
    df["band_low_for_t1"]  = df["band_low_t"].shift(1)
    df["band_high_for_t1"] = df["band_high_t"].shift(1)
    df["base_close_t"]     = df["close"].shift(1)   # base that formed the band
    df["next_close_t1"]    = df["close"]            # actual close to verify

    # Inside?
    df["inside_today"] = ((df["next_close_t1"] >= df["band_low_for_t1"]) &
                          (df["next_close_t1"] <= df["band_high_for_t1"])).astype(int)

    # Previous trading day label for printing
    df["pred_date"] = pd.Series(df.index, index=df.index).shift(1)

    return df.dropna()

def trades_table(df: pd.DataFrame) -> pd.DataFrame:
    t = df[df["pred_for_t1"] == 1].copy()
    if t.empty:
        return t
    t["verify_date"] = t.index
    t["abs_move_pct"]= (t["next_close_t1"]/t["base_close_t"] - 1.0).abs()

    # Distance to nearest edge (positive on hits, negative on misses)
    gap_low  = (t["next_close_t1"] - t["band_low_for_t1"])  / t["base_close_t"]
    gap_high = (t["band_high_for_t1"] - t["next_close_t1"]) / t["base_close_t"]
    t["edge_gap_pct"] = np.minimum(gap_low, gap_high)

    # Miss type & overshoot as % of base, derived from band edges (no cfg needed)
    miss = t["inside_today"] == 0
    t.loc[miss & (t["next_close_t1"] > t["band_high_for_t1"]), "miss_type"] = "broke_high"
    t.loc[miss & (t["next_close_t1"] < t["band_low_for_t1"]),  "miss_type"] = "broke_low"
    t.loc[miss & (t["miss_type"]=="broke_high"), "overshoot_pct"] = (t["next_close_t1"] - t["band_high_for_t1"]) / t["base_close_t"]
    t.loc[miss & (t["miss_type"]=="broke_low"),  "overshoot_pct"] = (t["band_low_for_t1"] - t["next_close_t1"]) / t["base_close_t"]

    cols = ["pred_date","verify_date","base_close_t","band_low_for_t1","band_high_for_t1",
            "next_close_t1","inside_today","abs_move_pct","edge_gap_pct","vix","avgabs","miss_type","overshoot_pct"]
    return t[cols].rename(columns={
        "band_low_for_t1":"band_low",
        "band_high_for_t1":"band_high",
        "inside_today":"inside"
    })

def accuracy(trades: pd.DataFrame, df_all: pd.DataFrame, band_pct: float) -> Dict:
    # Baseline: unconditional probability of |ret| <= band_pct over *all* days
    baseline = float((df_all["ret"].abs() <= band_pct).mean())
    if trades.empty:
        return {"signals": 0, "hit_rate": float("nan"), "baseline_hit_rate": baseline, "edge_vs_baseline": float("nan")}
    hit = float(trades["inside"].mean())
    return {
        "signals": int(len(trades)),
        "hit_rate": hit,
        "baseline_hit_rate": baseline,
        "edge_vs_baseline": hit - baseline
    }

# ----------- Helper: Evaluate custom mask at t (verify at t+1) -----------
def eval_mask(df: pd.DataFrame, mask_t: pd.Series) -> Dict:
    m = mask_t.shift(1).fillna(0).astype(int)
    t = df[m == 1].copy()
    if t.empty:
        return {"signals": 0, "hit_rate": float("nan")}
    hr = float(((t["next_close_t1"] >= t["band_low_for_t1"]) & (t["next_close_t1"] <= t["band_high_for_t1"])).mean())
    return {"signals": int(len(t)), "hit_rate": hr}

# ----------- Grid Search (maximize EDGE vs BASELINE) -----------
def grid_search(px: pd.DataFrame, vix: pd.Series, cfg: Config) -> Dict:
    best = None
    results = []
    for band_pct, avgabs_window, avgabs_max, vix_max, sma_n, use_trend in itertools.product(
        cfg.grid_band_pct, cfg.grid_avgabs_window, cfg.grid_avgabs_max,
        cfg.grid_vix_max, cfg.grid_sma_n, cfg.grid_use_trend
    ):
        if band_pct > 0.01:
            continue  # enforce ≤ 1.0%

        df = build_frame(px, vix, band_pct, avgabs_window, avgabs_max, vix_max, sma_n, use_trend)
        trades = trades_table(df)
        summ = accuracy(trades, df, band_pct)

        if summ["signals"] < cfg.min_signals:
            continue

        rec = dict(
            params=dict(
                band_pct=band_pct,
                avgabs_window=avgabs_window, avgabs_max=avgabs_max,
                vix_max=vix_max, sma_n=sma_n, use_trend=use_trend
            ),
            **summ
        )
        results.append(rec)

        if best is None:
            best = rec
        else:
            # Primary: maximize edge_vs_baseline
            # Tie-breakers (in order): higher hit_rate, more signals, lower band_pct, lower vix_max
            lhs = (rec["edge_vs_baseline"], rec["hit_rate"], rec["signals"], -rec["params"]["band_pct"], -rec["params"]["vix_max"])
            rhs = (best["edge_vs_baseline"], best["hit_rate"], best["signals"], -best["params"]["band_pct"], -best["params"]["vix_max"])
            if lhs > rhs:
                best = rec

    if best is None:
        raise SystemExit(
            "No parameter set met min_signals. Loosen grids (vix_max/avgabs_max) or reduce CFG.min_signals."
        )
    # Sort all by objective + tie-breakers for reporting
    results_sorted = sorted(
        results,
        key=lambda r: (r["edge_vs_baseline"], r["hit_rate"], r["signals"], -r["params"]["band_pct"], -r["params"]["vix_max"]),
        reverse=True
    )
    return {"best": best, "all": results_sorted}

# ----------- Diagnostics -----------
def _bucketize_vix(s: pd.Series) -> pd.Series:
    bins = [0, 12, 14, 16, 18, 22, 28, 1000]
    labels = ["<=12","12-14","14-16","16-18","18-22","22-28",">28"]
    return pd.cut(s, bins=bins, labels=labels, include_lowest=True, right=True)

def _bucketize_avgabs(s: pd.Series) -> pd.Series:
    qs = s.dropna().quantile([0.25,0.5,0.75]).values.tolist()
    bins = [0, qs[0], qs[1], qs[2], 10.0]
    labels = ["Q1-low","Q2-mid","Q3-high","Q4-very-high"]
    return pd.cut(s, bins=bins, labels=labels, include_lowest=True, right=True)

def rolling_hit_rates(trades: pd.DataFrame, k: int = 100) -> Dict:
    if trades.empty:
        return {}
    x = trades["inside"].astype(int).to_numpy()
    if len(x) < k:
        rh = float(np.mean(x)) if len(x) else float("nan")
        return {"window": k, "last_window_hit_rate": rh, "min_window_hit_rate": rh, "max_window_hit_rate": rh}
    csum = np.cumsum(np.insert(x,0,0))
    roll = (csum[k:] - csum[:-k]) / k
    return {
        "window": k,
        "last_window_hit_rate": float(roll[-1]),
        "min_window_hit_rate": float(np.min(roll)),
        "p05_window_hit_rate": float(np.quantile(roll, 0.05)),
        "p95_window_hit_rate": float(np.quantile(roll, 0.95)),
    }

def streaks(trades: pd.DataFrame) -> Dict:
    if trades.empty:
        return {}
    seq = trades["inside"].astype(int).tolist()
    def longest(target: int) -> int:
        m = cur = 0
        for v in seq:
            if v == target:
                cur += 1; m = max(m, cur)
            else:
                cur = 0
        return m
    return {"longest_hit_streak": longest(1), "longest_miss_streak": longest(0)}

def ablations(df: pd.DataFrame) -> Dict:
    out = {}
    out["only_vix"]         = eval_mask(df, df["cond_vix_t"] == 1)
    out["only_avgabs"]      = eval_mask(df, df["cond_low_vol_t"] == 1)
    out["only_trend"]       = eval_mask(df, df["cond_trend_t"] == 1)
    out["vix+avgabs"]       = eval_mask(df, ((df["cond_vix_t"]==1) & (df["cond_low_vol_t"]==1)))
    out["vix+trend"]        = eval_mask(df, ((df["cond_vix_t"]==1) & (df["cond_trend_t"]==1)))
    out["avgabs+trend"]     = eval_mask(df, ((df["cond_low_vol_t"]==1) & (df["cond_trend_t"]==1)))
    out["all_three"]        = eval_mask(df, ((df["cond_vix_t"]==1) & (df["cond_low_vol_t"]==1) & (df["cond_trend_t"]==1)))
    return out

def regime_breakdowns(trades: pd.DataFrame) -> Dict:
    if trades.empty:
        return {}
    td = trades.copy()
    td["pred_date"] = pd.to_datetime(td["pred_date"])
    td["year"] = td["pred_date"].dt.year
    td["month"] = td["pred_date"].dt.month
    td["dow"] = td["pred_date"].dt.dayofweek  # 0=Mon

    def summarize(group: pd.DataFrame) -> Dict:
        return {"signals": int(len(group)), "hit_rate": float(group["inside"].mean())}

    by_year  = {int(y): summarize(g) for y, g in td.groupby("year")}
    by_month = {int(m): summarize(g) for m, g in td.groupby("month")}
    by_dow   = {int(d): summarize(g) for d, g in td.groupby("dow")}

    td["vix_bucket"] = _bucketize_vix(td["vix"])
    td["avgabs_bucket"] = _bucketize_avgabs(td["avgabs"])
    by_vix  = {str(b): summarize(g) for b, g in td.groupby("vix_bucket")}
    by_avga = {str(b): summarize(g) for b, g in td.groupby("avgabs_bucket")}

    misses = td[td["inside"] == 0]
    miss_info = {"count": int(len(misses))}
    if not misses.empty:
        bh = int((misses["miss_type"]=="broke_high").sum())
        bl = int((misses["miss_type"]=="broke_low").sum())
        miss_info.update({
            "broke_high": bh,
            "broke_low": bl,
            "overshoot_pct_mean": float(misses["overshoot_pct"].dropna().mean()) if "overshoot_pct" in misses else float("nan"),
            "overshoot_pct_p95": float(misses["overshoot_pct"].dropna().quantile(0.95)) if "overshoot_pct" in misses else float("nan"),
        })

    hits = td[td["inside"] == 1]
    margins = {}
    if not hits.empty:
        q = hits["edge_gap_pct"].quantile([0.05,0.25,0.5,0.75,0.95])
        margins = {f"p{int(k*100)}": float(v) for k,v in q.items()}

    roll100 = rolling_hit_rates(td, k=100)
    streak  = streaks(td)

    return {
        "by_year": by_year,
        "by_month": by_month,
        "by_dow": by_dow,
        "by_vix_bucket": by_vix,
        "by_avgabs_bucket": by_avga,
        "miss_breakdown": miss_info,
        "hit_margin_edge_gap_pct": margins,
        "rolling_window_100": roll100,
        "streaks": streak
    }

# ----------- Current trade -----------
def current_trade(df: pd.DataFrame, cfg_band: float) -> Dict:
    last = df.iloc[-1]
    return {
        "today": builtins.str(df.index[-1].date()),
        "signal_today": bool(last["signal_t"] == 1),
        "close_today": float(last["close"]),
        "predicted_low_next": float(last["band_low_t"]),
        "predicted_high_next": float(last["band_high_t"]),
        "vix_today": float(last["vix"]),
        "avgabs_today": float(last["avgabs"]),
        "above_sma_today": bool(last["above_sma"] == 1),
        "band_pct": cfg_band
    }

# ---------------- Main -------------------
def main(cfg: Config = CFG):
    px, vix = load_market(cfg)

    # Search best params (maximize EDGE VS BASELINE among sets with >= min_signals)
    gs = grid_search(px, vix, cfg)
    best = gs["best"]
    p = best["params"]

    # Rebuild with best params for reporting
    df_best = build_frame(px, vix, p["band_pct"], p["avgabs_window"], p["avgabs_max"],
                          p["vix_max"], p["sma_n"], p["use_trend"])
    trades = trades_table(df_best)
    summ = accuracy(trades, df_best, p["band_pct"])

    # Diagnostics
    diag_regimes = regime_breakdowns(trades)
    diag_abl = ablations(df_best)

    # Local sensitivity (neighbors in grid, including band_pct)
    def neighbor_tests() -> List[Dict]:
        tests = []
        def try_params(q):
            dfq = build_frame(px, vix, q["band_pct"], q["avgabs_window"], q["avgabs_max"], q["vix_max"], q["sma_n"], q["use_trend"])
            trq = trades_table(dfq)
            summq = accuracy(trq, dfq, q["band_pct"])
            if summq["signals"] >= cfg.min_signals:
                tests.append({"params": q.copy(), **summq})

        grids = {
            "band_pct": cfg.grid_band_pct,
            "avgabs_window": cfg.grid_avgabs_window,
            "avgabs_max": cfg.grid_avgabs_max,
            "vix_max": cfg.grid_vix_max,
            "sma_n": cfg.grid_sma_n
        }
        for key, grid in grids.items():
            vals = sorted(set(grid))
            if p[key] in vals:
                i = vals.index(p[key])
                for j in [i-1, i+1]:
                    if 0 <= j < len(vals):
                        q = p.copy(); q[key] = vals[j]; try_params(q)
        q = p.copy(); q["use_trend"] = not p["use_trend"]; try_params(q)
        tests.sort(key=lambda r: (r["edge_vs_baseline"], r["hit_rate"], r["signals"], -r["params"].get("band_pct", 0), -r["params"].get("vix_max", 0)), reverse=True)
        return tests[:10]

    diag_local = {"neighbor_tests": neighbor_tests()}

    # Coverage/health checks
    total_days = int(len(df_best))
    signal_rate = float(summ["signals"] / total_days) if total_days else float("nan")
    first_sig = builtins.str(pd.to_datetime(trades["pred_date"].iloc[0]).date()) if not trades.empty else None
    last_sig  = builtins.str(pd.to_datetime(trades["pred_date"].iloc[-1]).date()) if not trades.empty else None

    print("=== SPX Iron Condor — Optimized for Max EDGE vs Baseline (band ≤ 1.0%) ===")
    print(json.dumps(dict(
        best_params=p,
        summary=summ,
        coverage=dict(
            total_days=total_days,
            signals=summ["signals"],
            signal_rate=signal_rate,
            first_signal_date=first_sig,
            last_signal_date=last_sig
        ),
        diagnostics=dict(
            regimes=diag_regimes,
            ablations=diag_abl,
            local_sensitivity=diag_local
        )
    ), indent=2, default=float))

    # Last N trades
    if trades.empty:
        print("\nNo historical signals under best params (unexpected given min_signals).")
    else:
        lastN = trades.tail(cfg.last_n)
        print(f"\n--- Last {len(lastN)} trades ---")
        for _, r in lastN.iterrows():
            miss_str = ""
            if "miss_type" in r and not pd.isna(r["miss_type"]):
                miss_str = f"| miss={builtins.str(r['miss_type'])}"
            print(
                f"{pd.to_datetime(r['verify_date']).date()} | pred_on={pd.to_datetime(r['pred_date']).date()} "
                f"| base={r['base_close_t']:.2f} band=[{r['band_low']:.2f},{r['band_high']:.2f}] "
                f"| close={r['next_close_t1']:.2f} | inside={bool(r['inside'])} "
                f"| |move|={100*r['abs_move_pct']:.2f}% | edge_gap={100*(r['edge_gap_pct'] if not pd.isna(r['edge_gap_pct']) else 0):.2f}% "
                f"| VIX_pred={r['vix']:.2f} | avg|r|_w={100*r['avgabs']:.2f}% {miss_str}"
            )

    # Current trade (today -> next session) using best band
    cur = current_trade(df_best, p["band_pct"])
    print("\n--- Current (for next trading day) ---")
    print(json.dumps(cur, indent=2, default=float))

if __name__ == "__main__":
    main()

=== SPX Iron Condor — Optimized for Max EDGE vs Baseline (band ≤ 1.0%) ===
{
  "best_params": {
    "band_pct": 0.004,
    "avgabs_window": 10,
    "avgabs_max": 0.004,
    "vix_max": 12,
    "sma_n": 12,
    "use_trend": true
  },
  "summary": {
    "signals": 229,
    "hit_rate": 0.7685589519650655,
    "baseline_hit_rate": 0.4363545066532764,
    "edge_vs_baseline": 0.3322044453117891
  },
  "coverage": {
    "total_days": 3983,
    "signals": 229,
    "signal_rate": 0.057494350991714786,
    "first_signal_date": "2013-03-13",
    "last_signal_date": "2024-06-13"
  },
  "diagnostics": {
    "regimes": {
      "by_year": {
        "2013": {
          "signals": 5,
          "hit_rate": 0.4
        },
        "2014": {
          "signals": 26,
          "hit_rate": 0.6153846153846154
        },
        "2016": {
          "signals": 16,
          "hit_rate": 0.8125
        },
        "2017": {
          "signals": 162,
          "hit_rate": 0.8271604938271605
        },
        "2018"

#0.60%

In [None]:
#!/usr/bin/env python3
# SPX Iron Condor — Optimize filters for MAX EDGE VS BASELINE with band ≤ 1.0%
# - Objective: maximize (hit_rate - baseline_hit_rate)
# - Searches over band_pct grid (upper bound 1%)
# - Keeps only parameter sets with >= 2000 signals (instances)
# - Data: yfinance (^GSPC, ^VIX), daily
# - Prints rich diagnostics similar to your previous version
# pip install yfinance pandas numpy

import warnings, builtins, json, itertools
from dataclasses import dataclass
from typing import Tuple, Dict, List

import numpy as np
import pandas as pd
import yfinance as yf

warnings.filterwarnings("ignore")

# ---------------- Config ----------------
@dataclass
class Config:
    ticker: str = "^GSPC"         # SPX proxy
    vix_ticker: str = "^VIX"
    start: str = "2010-01-01"
    end: str = None               # latest
    interval: str = "1d"

    # This is now SEARCHED; keep here only for defaults when building the "best" frame
    band_pct: float = 0.01

    last_n: int = 10              # how many past trades to print in the report
    min_signals: int = 500       # reject param sets with fewer signals than this

    # Grids to search (you can widen/narrow)
    grid_band_pct: List[float] = None
    grid_avgabs_window: List[int] = None
    grid_avgabs_max: List[float] = None
    grid_vix_max: List[float] = None
    grid_sma_n: List[int] = None
    grid_use_trend: List[bool] = None  # whether to require close > SMA(n)

CFG = Config(
    # Band grid (≤ 1%); feel free to adjust granularity
    grid_band_pct=[0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01],

    grid_avgabs_window=[3,4,5,6,7,10],
    grid_avgabs_max=[0.0035,0.004,0.0045,0.005,0.0055,0.006,0.0065,0.007,0.0075],
    grid_vix_max=[12,13,14,15,16,17,18],
    grid_sma_n=[5,8,10,12,15,20],
    grid_use_trend=[True, False],
)

# --------------- Data -------------------
def _flatten_cols(df: pd.DataFrame) -> pd.DataFrame:
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = [builtins.str(c[0]).lower() for c in df.columns]
    else:
        df.columns = [builtins.str(c).lower() for c in df.columns]
    return df

def load_yf_ohlc(ticker: str, start: str, end: str=None, interval: str="1d") -> pd.DataFrame:
    df = yf.download(ticker, start=start, end=end, interval=interval,
                     auto_adjust=True, progress=False, group_by="column")
    if df is None or df.empty:
        raise SystemExit(f"No data for {ticker}.")
    df = _flatten_cols(df)
    need = ["open","high","low","close"]
    for c in need:
        if c not in df.columns:
            raise SystemExit(f"Missing {c} in {ticker}.")
    return df[need].dropna().copy()

def load_market(cfg: Config) -> Tuple[pd.DataFrame, pd.Series]:
    px  = load_yf_ohlc(cfg.ticker, cfg.start, cfg.end, cfg.interval)
    vix = load_yf_ohlc(cfg.vix_ticker, cfg.start, cfg.end, cfg.interval)["close"].rename("vix")
    vix = vix.reindex(px.index).ffill()
    return px, vix

# ----------- Core builder -----------
def build_frame(px: pd.DataFrame, vix: pd.Series, band_pct: float,
                avgabs_window: int, avgabs_max: float,
                vix_max: float, sma_n: int, use_trend: bool) -> pd.DataFrame:
    close = px["close"]
    ret = close.pct_change()
    avgabs = ret.abs().rolling(avgabs_window).mean()
    sma = close.rolling(sma_n).mean()

    df = pd.DataFrame(index=px.index)
    df["close"] = close
    df["ret"] = ret
    df["avgabs"] = avgabs
    df["vix"] = vix
    df["above_sma"] = (close > sma).astype(int)

    # Conditions at t (saved to enable ablations)
    df["cond_low_vol_t"] = (df["avgabs"] < avgabs_max).astype(int)
    df["cond_vix_t"]     = (df["vix"] < vix_max).astype(int)
    df["cond_trend_t"]   = ((df["above_sma"] == 1) if use_trend else (df["above_sma"] == df["above_sma"])).astype(int)

    # Signal at t (for t+1)
    df["signal_t"] = (df["cond_low_vol_t"] & df["cond_vix_t"] & df["cond_trend_t"]).astype(int)

    # Predicted band (built at t for t+1)
    df["band_low_t"]  = df["close"] * (1.0 - band_pct)
    df["band_high_t"] = df["close"] * (1.0 + band_pct)

    # Align to next day (no index.freq needed)
    df["pred_for_t1"]      = df["signal_t"].shift(1).fillna(0).astype(int)
    df["band_low_for_t1"]  = df["band_low_t"].shift(1)
    df["band_high_for_t1"] = df["band_high_t"].shift(1)
    df["base_close_t"]     = df["close"].shift(1)   # base that formed the band
    df["next_close_t1"]    = df["close"]            # actual close to verify

    # Inside?
    df["inside_today"] = ((df["next_close_t1"] >= df["band_low_for_t1"]) &
                          (df["next_close_t1"] <= df["band_high_for_t1"])).astype(int)

    # Previous trading day label for printing
    df["pred_date"] = pd.Series(df.index, index=df.index).shift(1)

    return df.dropna()

def trades_table(df: pd.DataFrame) -> pd.DataFrame:
    t = df[df["pred_for_t1"] == 1].copy()
    if t.empty:
        return t
    t["verify_date"] = t.index
    t["abs_move_pct"]= (t["next_close_t1"]/t["base_close_t"] - 1.0).abs()

    # Distance to nearest edge (positive on hits, negative on misses)
    gap_low  = (t["next_close_t1"] - t["band_low_for_t1"])  / t["base_close_t"]
    gap_high = (t["band_high_for_t1"] - t["next_close_t1"]) / t["base_close_t"]
    t["edge_gap_pct"] = np.minimum(gap_low, gap_high)

    # Miss type & overshoot as % of base, derived from band edges (no cfg needed)
    miss = t["inside_today"] == 0
    t.loc[miss & (t["next_close_t1"] > t["band_high_for_t1"]), "miss_type"] = "broke_high"
    t.loc[miss & (t["next_close_t1"] < t["band_low_for_t1"]),  "miss_type"] = "broke_low"
    t.loc[miss & (t["miss_type"]=="broke_high"), "overshoot_pct"] = (t["next_close_t1"] - t["band_high_for_t1"]) / t["base_close_t"]
    t.loc[miss & (t["miss_type"]=="broke_low"),  "overshoot_pct"] = (t["band_low_for_t1"] - t["next_close_t1"]) / t["base_close_t"]

    cols = ["pred_date","verify_date","base_close_t","band_low_for_t1","band_high_for_t1",
            "next_close_t1","inside_today","abs_move_pct","edge_gap_pct","vix","avgabs","miss_type","overshoot_pct"]
    return t[cols].rename(columns={
        "band_low_for_t1":"band_low",
        "band_high_for_t1":"band_high",
        "inside_today":"inside"
    })

def accuracy(trades: pd.DataFrame, df_all: pd.DataFrame, band_pct: float) -> Dict:
    # Baseline: unconditional probability of |ret| <= band_pct over *all* days
    baseline = float((df_all["ret"].abs() <= band_pct).mean())
    if trades.empty:
        return {"signals": 0, "hit_rate": float("nan"), "baseline_hit_rate": baseline, "edge_vs_baseline": float("nan")}
    hit = float(trades["inside"].mean())
    return {
        "signals": int(len(trades)),
        "hit_rate": hit,
        "baseline_hit_rate": baseline,
        "edge_vs_baseline": hit - baseline
    }

# ----------- Helper: Evaluate custom mask at t (verify at t+1) -----------
def eval_mask(df: pd.DataFrame, mask_t: pd.Series) -> Dict:
    m = mask_t.shift(1).fillna(0).astype(int)
    t = df[m == 1].copy()
    if t.empty:
        return {"signals": 0, "hit_rate": float("nan")}
    hr = float(((t["next_close_t1"] >= t["band_low_for_t1"]) & (t["next_close_t1"] <= t["band_high_for_t1"])).mean())
    return {"signals": int(len(t)), "hit_rate": hr}

# ----------- Grid Search (maximize EDGE vs BASELINE) -----------
def grid_search(px: pd.DataFrame, vix: pd.Series, cfg: Config) -> Dict:
    best = None
    results = []
    for band_pct, avgabs_window, avgabs_max, vix_max, sma_n, use_trend in itertools.product(
        cfg.grid_band_pct, cfg.grid_avgabs_window, cfg.grid_avgabs_max,
        cfg.grid_vix_max, cfg.grid_sma_n, cfg.grid_use_trend
    ):
        if band_pct > 0.01:
            continue  # enforce ≤ 1.0%

        df = build_frame(px, vix, band_pct, avgabs_window, avgabs_max, vix_max, sma_n, use_trend)
        trades = trades_table(df)
        summ = accuracy(trades, df, band_pct)

        if summ["signals"] < cfg.min_signals:
            continue

        rec = dict(
            params=dict(
                band_pct=band_pct,
                avgabs_window=avgabs_window, avgabs_max=avgabs_max,
                vix_max=vix_max, sma_n=sma_n, use_trend=use_trend
            ),
            **summ
        )
        results.append(rec)

        if best is None:
            best = rec
        else:
            # Primary: maximize edge_vs_baseline
            # Tie-breakers (in order): higher hit_rate, more signals, lower band_pct, lower vix_max
            lhs = (rec["edge_vs_baseline"], rec["hit_rate"], rec["signals"], -rec["params"]["band_pct"], -rec["params"]["vix_max"])
            rhs = (best["edge_vs_baseline"], best["hit_rate"], best["signals"], -best["params"]["band_pct"], -best["params"]["vix_max"])
            if lhs > rhs:
                best = rec

    if best is None:
        raise SystemExit(
            "No parameter set met min_signals. Loosen grids (vix_max/avgabs_max) or reduce CFG.min_signals."
        )
    # Sort all by objective + tie-breakers for reporting
    results_sorted = sorted(
        results,
        key=lambda r: (r["edge_vs_baseline"], r["hit_rate"], r["signals"], -r["params"]["band_pct"], -r["params"]["vix_max"]),
        reverse=True
    )
    return {"best": best, "all": results_sorted}

# ----------- Diagnostics -----------
def _bucketize_vix(s: pd.Series) -> pd.Series:
    bins = [0, 12, 14, 16, 18, 22, 28, 1000]
    labels = ["<=12","12-14","14-16","16-18","18-22","22-28",">28"]
    return pd.cut(s, bins=bins, labels=labels, include_lowest=True, right=True)

def _bucketize_avgabs(s: pd.Series) -> pd.Series:
    qs = s.dropna().quantile([0.25,0.5,0.75]).values.tolist()
    bins = [0, qs[0], qs[1], qs[2], 10.0]
    labels = ["Q1-low","Q2-mid","Q3-high","Q4-very-high"]
    return pd.cut(s, bins=bins, labels=labels, include_lowest=True, right=True)

def rolling_hit_rates(trades: pd.DataFrame, k: int = 100) -> Dict:
    if trades.empty:
        return {}
    x = trades["inside"].astype(int).to_numpy()
    if len(x) < k:
        rh = float(np.mean(x)) if len(x) else float("nan")
        return {"window": k, "last_window_hit_rate": rh, "min_window_hit_rate": rh, "max_window_hit_rate": rh}
    csum = np.cumsum(np.insert(x,0,0))
    roll = (csum[k:] - csum[:-k]) / k
    return {
        "window": k,
        "last_window_hit_rate": float(roll[-1]),
        "min_window_hit_rate": float(np.min(roll)),
        "p05_window_hit_rate": float(np.quantile(roll, 0.05)),
        "p95_window_hit_rate": float(np.quantile(roll, 0.95)),
    }

def streaks(trades: pd.DataFrame) -> Dict:
    if trades.empty:
        return {}
    seq = trades["inside"].astype(int).tolist()
    def longest(target: int) -> int:
        m = cur = 0
        for v in seq:
            if v == target:
                cur += 1; m = max(m, cur)
            else:
                cur = 0
        return m
    return {"longest_hit_streak": longest(1), "longest_miss_streak": longest(0)}

def ablations(df: pd.DataFrame) -> Dict:
    out = {}
    out["only_vix"]         = eval_mask(df, df["cond_vix_t"] == 1)
    out["only_avgabs"]      = eval_mask(df, df["cond_low_vol_t"] == 1)
    out["only_trend"]       = eval_mask(df, df["cond_trend_t"] == 1)
    out["vix+avgabs"]       = eval_mask(df, ((df["cond_vix_t"]==1) & (df["cond_low_vol_t"]==1)))
    out["vix+trend"]        = eval_mask(df, ((df["cond_vix_t"]==1) & (df["cond_trend_t"]==1)))
    out["avgabs+trend"]     = eval_mask(df, ((df["cond_low_vol_t"]==1) & (df["cond_trend_t"]==1)))
    out["all_three"]        = eval_mask(df, ((df["cond_vix_t"]==1) & (df["cond_low_vol_t"]==1) & (df["cond_trend_t"]==1)))
    return out

def regime_breakdowns(trades: pd.DataFrame) -> Dict:
    if trades.empty:
        return {}
    td = trades.copy()
    td["pred_date"] = pd.to_datetime(td["pred_date"])
    td["year"] = td["pred_date"].dt.year
    td["month"] = td["pred_date"].dt.month
    td["dow"] = td["pred_date"].dt.dayofweek  # 0=Mon

    def summarize(group: pd.DataFrame) -> Dict:
        return {"signals": int(len(group)), "hit_rate": float(group["inside"].mean())}

    by_year  = {int(y): summarize(g) for y, g in td.groupby("year")}
    by_month = {int(m): summarize(g) for m, g in td.groupby("month")}
    by_dow   = {int(d): summarize(g) for d, g in td.groupby("dow")}

    td["vix_bucket"] = _bucketize_vix(td["vix"])
    td["avgabs_bucket"] = _bucketize_avgabs(td["avgabs"])
    by_vix  = {str(b): summarize(g) for b, g in td.groupby("vix_bucket")}
    by_avga = {str(b): summarize(g) for b, g in td.groupby("avgabs_bucket")}

    misses = td[td["inside"] == 0]
    miss_info = {"count": int(len(misses))}
    if not misses.empty:
        bh = int((misses["miss_type"]=="broke_high").sum())
        bl = int((misses["miss_type"]=="broke_low").sum())
        miss_info.update({
            "broke_high": bh,
            "broke_low": bl,
            "overshoot_pct_mean": float(misses["overshoot_pct"].dropna().mean()) if "overshoot_pct" in misses else float("nan"),
            "overshoot_pct_p95": float(misses["overshoot_pct"].dropna().quantile(0.95)) if "overshoot_pct" in misses else float("nan"),
        })

    hits = td[td["inside"] == 1]
    margins = {}
    if not hits.empty:
        q = hits["edge_gap_pct"].quantile([0.05,0.25,0.5,0.75,0.95])
        margins = {f"p{int(k*100)}": float(v) for k,v in q.items()}

    roll100 = rolling_hit_rates(td, k=100)
    streak  = streaks(td)

    return {
        "by_year": by_year,
        "by_month": by_month,
        "by_dow": by_dow,
        "by_vix_bucket": by_vix,
        "by_avgabs_bucket": by_avga,
        "miss_breakdown": miss_info,
        "hit_margin_edge_gap_pct": margins,
        "rolling_window_100": roll100,
        "streaks": streak
    }

# ----------- Current trade -----------
def current_trade(df: pd.DataFrame, cfg_band: float) -> Dict:
    last = df.iloc[-1]
    return {
        "today": builtins.str(df.index[-1].date()),
        "signal_today": bool(last["signal_t"] == 1),
        "close_today": float(last["close"]),
        "predicted_low_next": float(last["band_low_t"]),
        "predicted_high_next": float(last["band_high_t"]),
        "vix_today": float(last["vix"]),
        "avgabs_today": float(last["avgabs"]),
        "above_sma_today": bool(last["above_sma"] == 1),
        "band_pct": cfg_band
    }

# ---------------- Main -------------------
def main(cfg: Config = CFG):
    px, vix = load_market(cfg)

    # Search best params (maximize EDGE VS BASELINE among sets with >= min_signals)
    gs = grid_search(px, vix, cfg)
    best = gs["best"]
    p = best["params"]

    # Rebuild with best params for reporting
    df_best = build_frame(px, vix, p["band_pct"], p["avgabs_window"], p["avgabs_max"],
                          p["vix_max"], p["sma_n"], p["use_trend"])
    trades = trades_table(df_best)
    summ = accuracy(trades, df_best, p["band_pct"])

    # Diagnostics
    diag_regimes = regime_breakdowns(trades)
    diag_abl = ablations(df_best)

    # Local sensitivity (neighbors in grid, including band_pct)
    def neighbor_tests() -> List[Dict]:
        tests = []
        def try_params(q):
            dfq = build_frame(px, vix, q["band_pct"], q["avgabs_window"], q["avgabs_max"], q["vix_max"], q["sma_n"], q["use_trend"])
            trq = trades_table(dfq)
            summq = accuracy(trq, dfq, q["band_pct"])
            if summq["signals"] >= cfg.min_signals:
                tests.append({"params": q.copy(), **summq})

        grids = {
            "band_pct": cfg.grid_band_pct,
            "avgabs_window": cfg.grid_avgabs_window,
            "avgabs_max": cfg.grid_avgabs_max,
            "vix_max": cfg.grid_vix_max,
            "sma_n": cfg.grid_sma_n
        }
        for key, grid in grids.items():
            vals = sorted(set(grid))
            if p[key] in vals:
                i = vals.index(p[key])
                for j in [i-1, i+1]:
                    if 0 <= j < len(vals):
                        q = p.copy(); q[key] = vals[j]; try_params(q)
        q = p.copy(); q["use_trend"] = not p["use_trend"]; try_params(q)
        tests.sort(key=lambda r: (r["edge_vs_baseline"], r["hit_rate"], r["signals"], -r["params"].get("band_pct", 0), -r["params"].get("vix_max", 0)), reverse=True)
        return tests[:10]

    diag_local = {"neighbor_tests": neighbor_tests()}

    # Coverage/health checks
    total_days = int(len(df_best))
    signal_rate = float(summ["signals"] / total_days) if total_days else float("nan")
    first_sig = builtins.str(pd.to_datetime(trades["pred_date"].iloc[0]).date()) if not trades.empty else None
    last_sig  = builtins.str(pd.to_datetime(trades["pred_date"].iloc[-1]).date()) if not trades.empty else None

    print("=== SPX Iron Condor — Optimized for Max EDGE vs Baseline (band ≤ 1.0%) ===")
    print(json.dumps(dict(
        best_params=p,
        summary=summ,
        coverage=dict(
            total_days=total_days,
            signals=summ["signals"],
            signal_rate=signal_rate,
            first_signal_date=first_sig,
            last_signal_date=last_sig
        ),
        diagnostics=dict(
            regimes=diag_regimes,
            ablations=diag_abl,
            local_sensitivity=diag_local
        )
    ), indent=2, default=float))

    # Last N trades
    if trades.empty:
        print("\nNo historical signals under best params (unexpected given min_signals).")
    else:
        lastN = trades.tail(cfg.last_n)
        print(f"\n--- Last {len(lastN)} trades ---")
        for _, r in lastN.iterrows():
            miss_str = ""
            if "miss_type" in r and not pd.isna(r["miss_type"]):
                miss_str = f"| miss={builtins.str(r['miss_type'])}"
            print(
                f"{pd.to_datetime(r['verify_date']).date()} | pred_on={pd.to_datetime(r['pred_date']).date()} "
                f"| base={r['base_close_t']:.2f} band=[{r['band_low']:.2f},{r['band_high']:.2f}] "
                f"| close={r['next_close_t1']:.2f} | inside={bool(r['inside'])} "
                f"| |move|={100*r['abs_move_pct']:.2f}% | edge_gap={100*(r['edge_gap_pct'] if not pd.isna(r['edge_gap_pct']) else 0):.2f}% "
                f"| VIX_pred={r['vix']:.2f} | avg|r|_w={100*r['avgabs']:.2f}% {miss_str}"
            )

    # Current trade (today -> next session) using best band
    cur = current_trade(df_best, p["band_pct"])
    print("\n--- Current (for next trading day) ---")
    print(json.dumps(cur, indent=2, default=float))

if __name__ == "__main__":
    main()

=== SPX Iron Condor — Optimized for Max EDGE vs Baseline (band ≤ 1.0%) ===
{
  "best_params": {
    "band_pct": 0.006,
    "avgabs_window": 3,
    "avgabs_max": 0.005,
    "vix_max": 13,
    "sma_n": 12,
    "use_trend": true
  },
  "summary": {
    "signals": 513,
    "hit_rate": 0.8440545808966862,
    "baseline_hit_rate": 0.5726817042606517,
    "edge_vs_baseline": 0.2713728766360345
  },
  "coverage": {
    "total_days": 3990,
    "signals": 513,
    "signal_rate": 0.12857142857142856,
    "first_signal_date": "2013-01-18",
    "last_signal_date": "2024-12-06"
  },
  "diagnostics": {
    "regimes": {
      "by_year": {
        "2013": {
          "signals": 44,
          "hit_rate": 0.8409090909090909
        },
        "2014": {
          "signals": 76,
          "hit_rate": 0.8157894736842105
        },
        "2015": {
          "signals": 22,
          "hit_rate": 0.7272727272727273
        },
        "2016": {
          "signals": 46,
          "hit_rate": 0.8478260869565217
