In [23]:
import sys, os, json, warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from matplotlib.colors import LinearSegmentedColormap
from scipy import stats

warnings.filterwarnings("ignore", category=FutureWarning)

# -- project path ----------------------------------------------------------
SAC_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
PROJ_ROOT = os.path.abspath(os.path.join(SAC_ROOT, "..", ".."))
if SAC_ROOT not in sys.path:
    sys.path.insert(0, SAC_ROOT)

from config import SACConfig
from data_utils import load_and_prepare_data
from environment import Env
from agent import Agent
from evaluate import run_backtest, equity_curve, max_drawdown, sharpe, ann_vol, cagr

# -- load theme ------------------------------------------------------------
THEME_PATH = os.path.join(PROJ_ROOT, "utils", "theme.json")
with open(THEME_PATH) as f:
    THEME = json.load(f)

ASSET_COLORS   = {k: v for k, v in THEME["portfolio_assets"].items() if k != "description"}
REGIME_COLORS  = {k: v for k, v in THEME["regime_colors"].items()    if k != "description"}
PERF_COLORS    = {k: v for k, v in THEME["performance"].items()      if k != "description"}
CLUSTER_COLORS  = {"C1": "#009BDE", "C3": "#FF1F5B"}
CLUSTER_DISPLAY = {"C1": "SAC-Dir-HMM", "C3": "SAC-Dir-Base"}   # legends, titles, tables
CLUSTER_SHORT   = {"C1": "SD-HMM", "C3": "SD-Base"}             # compact (heatmap rows)

# register sequential teal colormap
TEAL_CMAP = LinearSegmentedColormap.from_list(
    "teal_seq", THEME["sequential"]["teal"]["colors"]
)
plt.colormaps.register(TEAL_CMAP, name="teal_seq", force=True)

# -- rcParams (thesis quality) ---------------------------------------------
mpl_def = THEME["matplotlib_defaults"]
plt.rcParams.update({
    # colors from theme
    "figure.facecolor":   mpl_def["background"],
    "axes.facecolor":     mpl_def["background"],
    "text.color":         mpl_def["text"],
    "axes.labelcolor":    mpl_def["text"],
    "xtick.color":        mpl_def["text"],
    "ytick.color":        mpl_def["text"],
    "axes.edgecolor":     mpl_def["grid"],
    "grid.color":         mpl_def["grid"],
    "grid.alpha":         mpl_def["grid_alpha"],
    # thesis conventions
    "axes.spines.top":    False,
    "axes.spines.right":  False,
    "axes.axisbelow":     True,
    "axes.grid":          True,
    "font.size":          11,
    "axes.titlesize":     14,
    "axes.titleweight":   "bold",
    "legend.frameon":     False,
    "savefig.dpi":        300,
    "savefig.bbox":       "tight",
    "figure.figsize":     [10, 5],
})

# -- constants -------------------------------------------------------------
SEEDS        = [42, 123, 456, 789, 1024]
TICKERS      = ["VNQ", "SPY", "TLT", "GLD", "BTC-USD"]
ASSET_LABELS = TICKERS + ["Cash"]
TC_RATE      = 0.0001        # 1 bps
LAG          = 5             # weekly rebalancing
STEP_SIZE    = 5             # for annualization
BM_EW        = [0.2, 0.2, 0.2, 0.2, 0.2, 0.0]
BM_FW        = [0.10, 0.50, 0.30, 0.05, 0.05, 0.0]

OUT_FIG = "analysis_outputs/figures"
OUT_TBL = "analysis_outputs/tables"
os.makedirs(OUT_FIG, exist_ok=True)
os.makedirs(OUT_TBL, exist_ok=True)

# -- sanity check ----------------------------------------------------------
print(f"Project root : {PROJ_ROOT}")
print(f"SAC root     : {SAC_ROOT}")
print(f"Theme keys   : {list(THEME.keys())}")
print(f"Asset colors : {len(ASSET_COLORS)} entries")
print(f"Regime colors: {list(REGIME_COLORS.keys())}")
print(f"Cluster clrs : {CLUSTER_COLORS}")
print(f"Display names: {CLUSTER_DISPLAY}")
cmap_check = plt.colormaps["teal_seq"]
print(f"Teal cmap    : {cmap_check.name}")
print("\nCell 1 complete - setup ready")

Project root : c:\Users\Admin\Documents\Codes\drl_quant_trading
SAC root     : c:\Users\Admin\Documents\Codes\drl_quant_trading\algorithms\SAC
Theme keys   : ['_meta', 'qualitative', 'red_amber_green', 'sequential', 'diverging', 'matplotlib_defaults', 'portfolio_assets', 'regime_colors', 'ablation_configs', 'performance']
Asset colors : 6 entries
Regime colors: ['stable', 'transition', 'crisis']
Cluster clrs : {'C1': '#009BDE', 'C3': '#FF1F5B'}
Display names: {'C1': 'SAC-Dir-HMM', 'C3': 'SAC-Dir-Base'}
Teal cmap    : teal_seq

Cell 1 complete - setup ready


  plt.colormaps.register(TEAL_CMAP, name="teal_seq", force=True)


In [24]:
# ══════════════════════════════════════════════════════════════════════
# Cell 2 — Model Discovery
# ══════════════════════════════════════════════════════════════════════
from config import Config

MODELS_ROOT = os.path.join(SAC_ROOT, "models")
CLUSTERS = ["cluster_1", "cluster_3"]

# -- scan for seed folders with model weights + config ------------------
model_inventory = {}  # {cluster: {seed: {"weights": path, "config": path}}}

for cluster in CLUSTERS:
    cluster_dir = os.path.join(MODELS_ROOT, cluster)
    model_inventory[cluster] = {}
    for seed in SEEDS:
        seed_dir = os.path.join(cluster_dir, f"seed_{seed}")
        weights = os.path.join(seed_dir, "sac_best.pth")
        cfg_path = os.path.join(seed_dir, "config.json")
        found_w = os.path.exists(weights)
        found_c = os.path.exists(cfg_path)
        if found_w and found_c:
            model_inventory[cluster][seed] = {
                "weights": weights,
                "config": cfg_path,
            }
        else:
            print(f"  WARNING: {cluster}/seed_{seed} missing — weights={found_w}, config={found_c}")

print("=== Model Inventory ===")
for cluster, seeds_dict in model_inventory.items():
    print(f"  {cluster}: {len(seeds_dict)} seeds — {sorted(seeds_dict.keys())}")

# -- load one representative config per cluster for comparison ----------
rep_cfgs = {}
for cluster in CLUSTERS:
    first_seed = sorted(model_inventory[cluster].keys())[0]
    rep_cfgs[cluster] = Config.load_json(model_inventory[cluster][first_seed]["config"])

c1, c3 = rep_cfgs["cluster_1"], rep_cfgs["cluster_3"]

# -- print key config differences --------------------------------------
print("\n=== Key Config Differences ===")
print(f"{'Parameter':<30} {'Cluster 1':<25} {'Cluster 3':<25}")
print("-" * 80)

diffs = [
    ("Credit spread source",
     os.path.basename(c1.data.credit_spread_path),
     os.path.basename(c3.data.credit_spread_path)),
    ("HMM enabled",
     str(c1.features.use_regime_hmm),
     str(c3.features.use_regime_hmm)),
    ("HMM n_states",
     str(c1.features.hmm_n_states),
     str(c3.features.hmm_n_states)),
    ("Macro feature count",
     str(len(c1.features.macro_feature_columns)),
     str(len(c3.features.macro_feature_columns))),
    ("Macro features",
     ", ".join(c1.features.macro_feature_columns),
     ", ".join(c3.features.macro_feature_columns)),
    ("Yield curve features",
     "Yes" if any("Yield" in f for f in c1.features.macro_feature_columns) else "No",
     "Yes" if any("Yield" in f for f in c3.features.macro_feature_columns) else "No"),
    ("Gamma",
     str(c1.sac.gamma),
     str(c3.sac.gamma)),
    ("Training timesteps",
     f"{c1.training.total_timesteps:,}",
     f"{c3.training.total_timesteps:,}"),
]

for label, v1, v3 in diffs:
    marker = " *" if v1 != v3 else ""
    v1_disp = (v1[:22] + "...") if len(v1) > 25 else v1
    v3_disp = (v3[:22] + "...") if len(v3) > 25 else v3
    print(f"  {label:<28} {v1_disp:<25} {v3_disp:<25}{marker}")

print("\n(* = differs between clusters)")
print("\nCell 2 complete — model discovery done")

=== Model Inventory ===
  cluster_1: 5 seeds — [42, 123, 456, 789, 1024]
  cluster_3: 5 seeds — [42, 123, 456, 789, 1024]

=== Key Config Differences ===
Parameter                      Cluster 1                 Cluster 3                
--------------------------------------------------------------------------------
  Credit spread source         CREDIT_SPREAD_2010_202... CREDIT_SPREAD_2010_202...
  HMM enabled                  True                      False                     *
  HMM n_states                 3                         3                        
  Macro feature count          11                        9                         *
  Macro features               VIX_normalized, VIX_re... VIX_normalized, VIX_re... *
  Yield curve features         Yes                       No                        *
  Gamma                        0.995                     0.99                      *
  Training timesteps           900,000                   690,000                   *

(* = 

In [25]:
# ══════════════════════════════════════════════════════════════════════
# Cell 3 — Data Loading
# ══════════════════════════════════════════════════════════════════════

# Config paths are relative to algorithms/SAC/, so chdir there for data loading
ANALYSIS_DIR = os.getcwd()
os.chdir(SAC_ROOT)

# -- load data per cluster using representative configs -----------------
cluster_data = {}  # {cluster: {"cfg", "df_train", "df_test", "feature_cols"}}

for cluster in CLUSTERS:
    cfg = rep_cfgs[cluster]
    print(f"Loading data for {cluster} (HMM={cfg.features.use_regime_hmm}) ...")
    df_train, df_test, feature_cols = load_and_prepare_data(cfg)
    cluster_data[cluster] = {
        "cfg": cfg,
        "df_train": df_train,
        "df_test": df_test,
        "feature_cols": feature_cols,
    }
    print(f"  train: {df_train.shape}, test: {df_test.shape}")
    print(f"  test range: {df_test.index[0].date()} → {df_test.index[-1].date()}")
    print(f"  features: {len(feature_cols)}")

# restore CWD so output paths (analysis_outputs/) still work
os.chdir(ANALYSIS_DIR)

# -- compute canonical (shared) test date range ------------------------
idx_c1 = cluster_data["cluster_1"]["df_test"].index
idx_c3 = cluster_data["cluster_3"]["df_test"].index
canonical_dates = idx_c1.intersection(idx_c3).sort_values()

print(f"\n=== Canonical Test Period ===")
print(f"  C1 test dates : {len(idx_c1)}")
print(f"  C3 test dates : {len(idx_c3)}")
print(f"  Intersection  : {len(canonical_dates)}")
print(f"  Range         : {canonical_dates[0].date()} → {canonical_dates[-1].date()}")

# -- trim test DataFrames to canonical dates ----------------------------
for cluster in CLUSTERS:
    df_test = cluster_data[cluster]["df_test"]
    cluster_data[cluster]["df_test"] = df_test.loc[df_test.index.isin(canonical_dates)].copy()

# verify alignment
for cluster in CLUSTERS:
    n = len(cluster_data[cluster]["df_test"])
    print(f"  {cluster} trimmed test: {n} rows")

# -- extract price arrays on canonical dates ----------------------------
df_test_canonical = cluster_data["cluster_1"]["df_test"]
prices_test = df_test_canonical[TICKERS].copy()

print(f"\n=== Price Array ===")
print(f"  Shape: {prices_test.shape}")
print(f"  Columns: {list(prices_test.columns)}")

# -- extract regime probabilities (from HMM-enabled cluster = cluster_1) --
regime_prob_cols = c1.features.regime_prob_columns
regime_probs = cluster_data["cluster_1"]["df_test"][regime_prob_cols].copy()
print(f"\n=== Regime Probabilities (Cluster 1 — HMM ON) ===")
print(f"  Columns: {list(regime_probs.columns)}")
print(f"  Shape: {regime_probs.shape}")
print(f"  Sample (first 3 rows):")
print(regime_probs.head(3).to_string())

print("\nCell 3 complete — data loaded")

Loading data for cluster_1 (HMM=True) ...
Downloading data for ['VNQ', 'SPY', 'TLT', 'GLD', 'BTC-USD']...
  ✓ Downloaded 3758 rows
  ✓ Date range: 2014-09-17 to 2024-12-30

Fitting HMM on 2,805 training samples with 6 features...

HMM MODEL SUMMARY

Observation Features (6):
  [0] SPY_logret
  [1] VIX
  [2] Credit_Spread
  [3] VIX_term_struct
  [4] YC_change_5d
  [5] DXY_logret

State Means (scaled space):
----------------------------------------------------------------------
Feature                   Stable  Transition      Crisis
----------------------------------------------------------------------
SPY_logret                0.0504      0.0221     -0.1189
VIX                      -0.7964      0.0677      1.2307
Credit_Spread            -0.1494     -0.4673      0.9759
VIX_term_struct           0.7157     -0.1122     -1.0262
YC_change_5d              0.0947      0.0585     -0.2496
DXY_logret               -0.0117     -0.0151      0.0430

Transition Matrix (row = from, col = to):
------

In [26]:
# ══════════════════════════════════════════════════════════════════════
# Cell 4 — Agent Backtests
# ══════════════════════════════════════════════════════════════════════
import torch

device = torch.device("cpu")  # Dirichlet grad on CPU for stability

# -- helper: patch seed config with fields dropped by Config.load_json --
def patch_seed_config(seed_cfg, config_path):
    """Restore feature flags that Config.load_json drops (not in FeatureConfig fields).

    Some models were trained with use_credit_regime=True, which added
    Credit_Spread_regime to the feature columns. Since that flag isn't a
    FeatureConfig dataclass field, load_json silently discards it.
    """
    with open(config_path) as f:
        raw = json.load(f)
    raw_feat = raw.get("features", {})

    if raw_feat.get("use_credit_regime", False):
        if "Credit_Spread_regime" not in seed_cfg.features.macro_feature_columns:
            seed_cfg.features.macro_feature_columns.append("Credit_Spread_regime")
    if raw_feat.get("use_vix_regime", False):
        if "VIX_regime" not in seed_cfg.features.macro_feature_columns:
            seed_cfg.features.macro_feature_columns.append("VIX_regime")

# -- helper: load data for a specific seed config (with caching) -------
_data_cache = {}  # keyed by (cluster, tuple(feature_cols)) to avoid redundant loads

def load_seed_data(seed_cfg, cluster):
    """Load and prepare data for a seed config, caching by feature signature."""
    feat_key = tuple(sorted(seed_cfg.env.build_feature_columns(TICKERS, seed_cfg.features)))
    cache_key = (cluster, feat_key)
    if cache_key in _data_cache:
        return _data_cache[cache_key]

    prev_dir = os.getcwd()
    os.chdir(SAC_ROOT)
    df_train, df_test, feature_cols = load_and_prepare_data(seed_cfg)
    os.chdir(prev_dir)

    # trim to canonical dates
    df_test = df_test.loc[df_test.index.isin(canonical_dates)].copy()
    _data_cache[cache_key] = (df_train, df_test, feature_cols)
    return df_train, df_test, feature_cols

# -- cluster label mapping (internal key → folder name) --------------------
CLUSTER_LABELS = {"cluster_1": "C1", "cluster_3": "C3"}

# -- run backtests for all 10 seeds ------------------------------------
agent_results = {}  # {cluster: {seed: {backtest_dict + metrics}}}

for cluster in CLUSTERS:
    agent_results[cluster] = {}

    for seed in SEEDS:
        inv = model_inventory[cluster][seed]

        # load this seed's own config
        seed_cfg = Config.load_json(inv["config"])
        patch_seed_config(seed_cfg, inv["config"])

        # load data matching this seed's features (cached if identical to rep)
        _, df_test_seed, _ = load_seed_data(seed_cfg, cluster)

        # create env on this seed's test data
        env = Env(df_test_seed, TICKERS, seed_cfg)

        # create agent and load weights
        state_dim = env.get_state_dim()
        action_dim = env.get_action_dim()
        agent = Agent(state_dim, action_dim, seed_cfg, device)
        agent.load_model(inv["weights"], map_location="cpu")

        # run deterministic backtest
        bt = run_backtest(env, agent, deterministic=True)

        # compute metrics (step_size=5 for weekly returns)
        net = bt["net_returns"]
        eq = bt["equity"]
        metrics = {
            "sharpe":     sharpe(net, step_size=STEP_SIZE),
            "cagr":       cagr(eq, step_size=STEP_SIZE),
            "max_dd":     max_drawdown(eq),
            "ann_vol":    ann_vol(net, step_size=STEP_SIZE),
            "final_eq":   eq[-1],
            "avg_turn":   bt["turnover_oneway"].mean(),
        }
        metrics["calmar"] = metrics["cagr"] / abs(metrics["max_dd"]) if metrics["max_dd"] != 0 else np.nan

        agent_results[cluster][seed] = {**bt, **metrics}
        label = CLUSTER_DISPLAY[CLUSTER_LABELS[cluster]]
        print(f"  {label} seed={seed:>4d}  Sharpe={metrics['sharpe']:.3f}  "
              f"CAGR={metrics['cagr']:.2%}  MaxDD={metrics['max_dd']:.2%}  "
              f"FinalEq={metrics['final_eq']:.3f}")

    print()

# -- summary table ------------------------------------------------------
print("=== Agent Backtest Summary ===")
header = f"{'Cluster':<14} {'Seed':>5}  {'Sharpe':>7}  {'CAGR':>8}  {'MaxDD':>8}  {'AnnVol':>8}  {'Calmar':>7}  {'FinalEq':>8}  {'AvgTurn':>8}"
print(header)
print("-" * len(header))

for cluster in CLUSTERS:
    label = CLUSTER_DISPLAY[CLUSTER_LABELS[cluster]]
    for seed in SEEDS:
        r = agent_results[cluster][seed]
        print(f"  {label:<12} {seed:>5d}  {r['sharpe']:>7.3f}  {r['cagr']:>7.2%}  "
              f"{r['max_dd']:>7.2%}  {r['ann_vol']:>7.2%}  {r['calmar']:>7.2f}  "
              f"{r['final_eq']:>8.3f}  {r['avg_turn']:>7.4f}")
    # cluster mean +/- std
    vals = {m: np.array([agent_results[cluster][s][m] for s in SEEDS])
            for m in ["sharpe", "cagr", "max_dd", "ann_vol", "calmar", "final_eq", "avg_turn"]}
    print(f"  {label+' mean':<12} {'':>5}  {vals['sharpe'].mean():>7.3f}  {vals['cagr'].mean():>7.2%}  "
          f"{vals['max_dd'].mean():>7.2%}  {vals['ann_vol'].mean():>7.2%}  {vals['calmar'].mean():>7.2f}  "
          f"{vals['final_eq'].mean():>8.3f}  {vals['avg_turn'].mean():>7.4f}")
    print(f"  {label+' std':<12} {'':>5}  {vals['sharpe'].std():>7.3f}  {vals['cagr'].std():>7.2%}  "
          f"{vals['max_dd'].std():>7.2%}  {vals['ann_vol'].std():>7.2%}  {vals['calmar'].std():>7.2f}  "
          f"{vals['final_eq'].std():>8.3f}  {vals['avg_turn'].std():>7.4f}")
    print()

print("Cell 4 complete — agent backtests done")

Downloading data for ['VNQ', 'SPY', 'TLT', 'GLD', 'BTC-USD']...
  ✓ Downloaded 3758 rows
  ✓ Date range: 2014-09-17 to 2024-12-30

Fitting HMM on 2,805 training samples with 6 features...

HMM MODEL SUMMARY

Observation Features (6):
  [0] SPY_logret
  [1] VIX
  [2] Credit_Spread
  [3] VIX_term_struct
  [4] YC_change_5d
  [5] DXY_logret

State Means (scaled space):
----------------------------------------------------------------------
Feature                   Stable  Transition      Crisis
----------------------------------------------------------------------
SPY_logret                0.0504      0.0221     -0.1189
VIX                      -0.7964      0.0677      1.2307
Credit_Spread            -0.1494     -0.4673      0.9759
VIX_term_struct           0.7157     -0.1122     -1.0262
YC_change_5d              0.0947      0.0585     -0.2496
DXY_logret               -0.0117     -0.0151      0.0430

Transition Matrix (row = from, col = to):
------------------------------------------------

In [13]:
# ══════════════════════════════════════════════════════════════════════
# Cell 5 — Benchmark Strategies
# ══════════════════════════════════════════════════════════════════════
from scipy.optimize import minimize

# -- Wrapper agents for Env-based benchmarks --------------------------------
class FixedAgent:
    """Agent that always returns the same target weights."""
    def __init__(self, weights):
        self.weights = np.array(weights, dtype=np.float32)
    def select_action(self, obs, evaluate=True):
        return self.weights.copy()

class IndexedAgent:
    """Agent that returns pre-computed weights by step index."""
    def __init__(self, weight_sequence):
        self.weights = list(weight_sequence)
        self.idx = 0
    def select_action(self, obs, evaluate=True):
        w = self.weights[min(self.idx, len(self.weights) - 1)]
        self.idx += 1
        return w

# -- Benchmark Env uses cluster_1 config/data (prices & TC identical) -------
bm_cfg = rep_cfgs["cluster_1"]
bm_df  = cluster_data["cluster_1"]["df_test"]

# ── 1. Buy-and-Hold SPY (from raw prices, NO Env) ─────────────────────────
spy_prices = prices_test["SPY"].values
spy_eq_daily = spy_prices / spy_prices[0]  # daily equity for smooth plotting

# Derive 5-day returns matching Env rebalance cadence (pos: 4→9→14→…)
spy_rets = []
pos = LAG - 1
while pos + LAG < len(spy_prices):
    r = (spy_prices[pos + LAG] - spy_prices[pos]) / (spy_prices[pos] + 1e-12)
    spy_rets.append(r)
    pos += LAG
spy_net = np.array(spy_rets, dtype=np.float64)
spy_eq  = equity_curve(spy_net)

print(f"SPY B&H: {len(spy_net)} stepped returns, final equity = {spy_eq[-1]:.3f}")

# ── 2. Equal-Weight Rebalanced (through Env) ──────────────────────────────
env_ew = Env(bm_df, TICKERS, bm_cfg)
bm_ew_bt = run_backtest(env_ew, FixedAgent(BM_EW), deterministic=True)
print(f"EW:  {len(bm_ew_bt['net_returns'])} returns, final equity = {bm_ew_bt['equity'][-1]:.3f}")

# ── 3. Fixed-Weight Rebalanced (through Env) ──────────────────────────────
env_fw = Env(bm_df, TICKERS, bm_cfg)
bm_fw_bt = run_backtest(env_fw, FixedAgent(BM_FW), deterministic=True)
print(f"FW:  {len(bm_fw_bt['net_returns'])} returns, final equity = {bm_fw_bt['equity'][-1]:.3f}")

# ── 4. Mean-Variance Max Sharpe (pre-compute + through Env) ───────────────
MV_LOOKBACK = 252

# Full price history for rolling lookback (train + test)
prices_full = pd.concat([
    cluster_data["cluster_1"]["df_train"][TICKERS],
    cluster_data["cluster_1"]["df_test"][TICKERS],
]).sort_index()
prices_full = prices_full[~prices_full.index.duplicated(keep="first")]
daily_rets_full = prices_full.pct_change().dropna()

def mv_max_sharpe(rets_window):
    """Long-only max-Sharpe portfolio (5 risky assets)."""
    mu  = rets_window.mean().values
    cov = rets_window.cov().values
    n   = len(mu)

    def neg_sharpe(w):
        port_ret = w @ mu
        port_vol = np.sqrt(w @ cov @ w + 1e-12)
        return -port_ret / (port_vol + 1e-8)

    res = minimize(
        neg_sharpe,
        x0=np.ones(n) / n,
        method="SLSQP",
        bounds=[(0.0, 1.0)] * n,
        constraints=[{"type": "eq", "fun": lambda w: w.sum() - 1.0}],
        options={"maxiter": 1000, "ftol": 1e-10},
    )
    return res.x if res.success else np.ones(n) / n

# Pre-compute MV weights at each rebalance date
test_dates  = prices_test.index
n_env_steps = (len(test_dates) - LAG) // LAG + 2  # +2 safety margin
ew_fallback = np.array(BM_EW, dtype=np.float32)

mv_weights = []
for i in range(n_env_steps):
    pos_idx = LAG - 1 + i * LAG
    if pos_idx >= len(test_dates):
        mv_weights.append(ew_fallback.copy())
        continue

    date   = test_dates[pos_idx]
    window = daily_rets_full.loc[daily_rets_full.index <= date].tail(MV_LOOKBACK)

    if len(window) < MV_LOOKBACK:
        w_risky = np.ones(len(TICKERS)) / len(TICKERS)
    else:
        try:
            w_risky = mv_max_sharpe(window)
        except Exception:
            w_risky = np.ones(len(TICKERS)) / len(TICKERS)

    w_full = np.concatenate([w_risky, [0.0]]).astype(np.float32)
    mv_weights.append(w_full)

print(f"MV:  pre-computed {len(mv_weights)} weight vectors")

env_mv = Env(bm_df, TICKERS, bm_cfg)
bm_mv_bt = run_backtest(env_mv, IndexedAgent(mv_weights), deterministic=True)
print(f"MV:  {len(bm_mv_bt['net_returns'])} returns, final equity = {bm_mv_bt['equity'][-1]:.3f}")

# ── Collect benchmark results ──────────────────────────────────────────────
benchmark_results = {}

for name, bt in [("SPY B&H", None), ("Equal-Weight", bm_ew_bt),
                  ("Fixed-Weight", bm_fw_bt), ("Mean-Variance", bm_mv_bt)]:
    if name == "SPY B&H":
        net, eq = spy_net, spy_eq
        avg_turn = 0.0
        bt_data = {"net_returns": net, "equity": eq, "equity_daily": spy_eq_daily}
    else:
        net, eq = bt["net_returns"], bt["equity"]
        avg_turn = bt["turnover_oneway"].mean()
        bt_data = bt

    metrics = {
        "sharpe":   sharpe(net, step_size=STEP_SIZE),
        "cagr":     cagr(eq, step_size=STEP_SIZE),
        "max_dd":   max_drawdown(eq),
        "ann_vol":  ann_vol(net, step_size=STEP_SIZE),
        "final_eq": eq[-1],
        "avg_turn": avg_turn,
    }
    metrics["calmar"] = metrics["cagr"] / abs(metrics["max_dd"]) if metrics["max_dd"] != 0 else np.nan
    benchmark_results[name] = {**bt_data, **metrics}

# ── Print benchmark summary ───────────────────────────────────────────────
print("\n=== Benchmark Results (all step_size=5) ===")
header = f"{'Benchmark':<16} {'Sharpe':>7}  {'CAGR':>8}  {'MaxDD':>8}  {'AnnVol':>8}  {'Calmar':>7}  {'FinalEq':>8}  {'AvgTurn':>8}"
print(header)
print("-" * len(header))
for name, r in benchmark_results.items():
    print(f"  {name:<14} {r['sharpe']:>7.3f}  {r['cagr']:>7.2%}  "
          f"{r['max_dd']:>7.2%}  {r['ann_vol']:>7.2%}  {r['calmar']:>7.2f}  "
          f"{r['final_eq']:>8.3f}  {r['avg_turn']:>7.4f}")

print("\nCell 5 complete — benchmarks done")

SPY B&H: 139 stepped returns, final equity = 1.468
EW:  140 returns, final equity = 1.557
FW:  140 returns, final equity = 1.290
MV:  pre-computed 141 weight vectors
MV:  140 returns, final equity = 1.372

=== Benchmark Results (all step_size=5) ===
Benchmark         Sharpe      CAGR     MaxDD    AnnVol   Calmar   FinalEq   AvgTurn
-----------------------------------------------------------------------------------
  SPY B&H          1.372   14.93%   -8.64%   10.57%     1.73     1.468   0.0000
  Equal-Weight     1.366   17.27%   -9.74%   12.22%     1.77     1.557   0.0124
  Fixed-Weight     1.038    9.61%  -10.06%    9.26%     0.96     1.290   0.0086
  Mean-Variance    1.037   12.07%  -11.06%   11.64%     1.09     1.372   0.0927

Cell 5 complete — benchmarks done


In [27]:
# ══════════════════════════════════════════════════════════════════════
# Cell 6 — Tables
# ══════════════════════════════════════════════════════════════════════
METRICS = ["sharpe", "cagr", "max_dd", "ann_vol", "calmar", "final_eq", "avg_turn"]
METRIC_LABELS = {
    "sharpe": "Sharpe", "cagr": "CAGR", "max_dd": "Max DD",
    "ann_vol": "Ann. Vol", "calmar": "Calmar", "final_eq": "Final Eq.",
    "avg_turn": "Avg. Turnover",
}
PCT_METRICS = {"cagr", "max_dd", "ann_vol", "avg_turn"}

def fmt_val(val, metric):
    """Format a single metric value."""
    if metric in PCT_METRICS:
        return f"{val:.2%}"
    return f"{val:.3f}"

def fmt_mean_std(mean, std, metric):
    """Format mean ± std."""
    if metric in PCT_METRICS:
        return f"{mean:.2%} ± {std:.2%}"
    return f"{mean:.3f} ± {std:.3f}"

# -- helper: collect per-seed metric arrays --------------------------------
def cluster_metrics(cluster):
    return {m: np.array([agent_results[cluster][s][m] for s in SEEDS]) for m in METRICS}

c1_vals = cluster_metrics("cluster_1")
c3_vals = cluster_metrics("cluster_3")

# Display names
D_C1 = CLUSTER_DISPLAY["C1"]  # "SAC-Dir-HMM"
D_C3 = CLUSTER_DISPLAY["C3"]  # "SAC-Dir-Base"

# Determine best cluster by mean Sharpe
best_cluster = "cluster_1" if c1_vals["sharpe"].mean() >= c3_vals["sharpe"].mean() else "cluster_3"
best_vals = c1_vals if best_cluster == "cluster_1" else c3_vals
best_label = D_C1 if best_cluster == "cluster_1" else D_C3
print(f"Best cluster: {best_label} (mean Sharpe = {best_vals['sharpe'].mean():.3f})")

# ══════════════════════════════════════════════════════════════════════
# Table 5.1 — Best SAC Cluster vs Benchmarks
# ══════════════════════════════════════════════════════════════════════
rows_51 = {}
rows_51[best_label] = {
    METRIC_LABELS[m]: fmt_mean_std(best_vals[m].mean(), best_vals[m].std(), m) for m in METRICS
}
for bm_name, bm in benchmark_results.items():
    rows_51[bm_name] = {METRIC_LABELS[m]: fmt_val(bm[m], m) for m in METRICS}

df_51 = pd.DataFrame(rows_51).T
df_51.index.name = "Strategy"

print("\n" + "=" * 80)
print("Table 5.1 — Best SAC Cluster vs Benchmarks")
print("=" * 80)
print(df_51.to_string())

df_51.to_csv(os.path.join(OUT_TBL, "table_5_1_best_vs_benchmarks.csv"))
df_51.to_latex(os.path.join(OUT_TBL, "table_5_1_best_vs_benchmarks.tex"),
               caption="Best SAC cluster (mean $\\pm$ std over 5 seeds) vs benchmark strategies.",
               label="tab:best_vs_benchmarks")

# ══════════════════════════════════════════════════════════════════════
# Table 5.2 — Cluster Comparison
# ══════════════════════════════════════════════════════════════════════
rows_52 = {}
rows_52[D_C1] = {
    METRIC_LABELS[m]: fmt_mean_std(c1_vals[m].mean(), c1_vals[m].std(), m) for m in METRICS
}
rows_52[D_C3] = {
    METRIC_LABELS[m]: fmt_mean_std(c3_vals[m].mean(), c3_vals[m].std(), m) for m in METRICS
}

# Delta row
delta_row = {}
for m in METRICS:
    d = c1_vals[m].mean() - c3_vals[m].mean()
    if m in PCT_METRICS:
        delta_row[METRIC_LABELS[m]] = f"{d:+.2%}"
    else:
        delta_row[METRIC_LABELS[m]] = f"{d:+.3f}"
rows_52[f"\u0394 ({D_C1} \u2212 {D_C3})"] = delta_row

# CV(Sharpe) row
cv_c1 = c1_vals["sharpe"].std() / c1_vals["sharpe"].mean() if c1_vals["sharpe"].mean() != 0 else np.nan
cv_c3 = c3_vals["sharpe"].std() / c3_vals["sharpe"].mean() if c3_vals["sharpe"].mean() != 0 else np.nan
cv_row = {METRIC_LABELS[m]: "" for m in METRICS}
cv_row["Sharpe"] = f"{D_C1}={cv_c1:.3f}, {D_C3}={cv_c3:.3f}"
rows_52["CV(Sharpe)"] = cv_row

df_52 = pd.DataFrame(rows_52).T
df_52.index.name = "Cluster"

print("\n" + "=" * 80)
print(f"Table 5.2 — {D_C1} vs {D_C3}")
print("=" * 80)
print(df_52.to_string())

df_52.to_csv(os.path.join(OUT_TBL, "table_5_2_cluster_comparison.csv"))
df_52.to_latex(os.path.join(OUT_TBL, "table_5_2_cluster_comparison.tex"),
               caption="Cluster comparison: mean $\\pm$ std over 5 seeds, delta, and coefficient of variation.",
               label="tab:cluster_comparison")

# ══════════════════════════════════════════════════════════════════════
# Table 5.3 — All 10 Individual Seed Results
# ══════════════════════════════════════════════════════════════════════
rows_53 = []
for cluster in CLUSTERS:
    label = CLUSTER_DISPLAY[CLUSTER_LABELS[cluster]]
    for seed in SEEDS:
        r = agent_results[cluster][seed]
        row = {"Cluster": label, "Seed": seed}
        for m in METRICS:
            row[METRIC_LABELS[m]] = fmt_val(r[m], m)
        rows_53.append(row)

df_53 = pd.DataFrame(rows_53).set_index(["Cluster", "Seed"])

print("\n" + "=" * 80)
print("Table 5.3 — All 10 Individual Seed Results")
print("=" * 80)
print(df_53.to_string())

df_53.to_csv(os.path.join(OUT_TBL, "table_5_3_all_seeds.csv"))
df_53.to_latex(os.path.join(OUT_TBL, "table_5_3_all_seeds.tex"),
               caption="Per-seed backtest results for reproducibility.",
               label="tab:all_seeds")

print(f"\nTables saved to: {OUT_TBL}/")
print("Cell 6 complete — tables done")

Best cluster: SAC-Dir-HMM (mean Sharpe = 1.839)

Table 5.1 — Best SAC Cluster vs Benchmarks
                      Sharpe            CAGR          Max DD        Ann. Vol         Calmar      Final Eq.   Avg. Turnover
Strategy                                                                                                                  
SAC-Dir-HMM    1.839 ± 0.152  40.92% ± 4.20%  -8.87% ± 2.02%  19.76% ± 1.31%  4.902 ± 1.365  2.599 ± 0.219  19.49% ± 1.59%
SPY B&H                1.372          14.93%          -8.64%          10.57%          1.729          1.468           0.00%
Equal-Weight           1.366          17.27%          -9.74%          12.22%          1.772          1.557           1.24%
Fixed-Weight           1.038           9.61%         -10.06%           9.26%          0.955          1.290           0.86%
Mean-Variance          1.037          12.07%         -11.06%          11.64%          1.091          1.372           9.27%

Table 5.2 — SAC-Dir-HMM vs SAC-Dir-Base
      

In [34]:
# ══════════════════════════════════════════════════════════════════════
# Cell 7 — Group 1: Headline Performance (F01–F03)
# ══════════════════════════════════════════════════════════════════════
import matplotlib.dates as mdates

# -- Equity date axes ------------------------------------------------------
test_dates = prices_test.index

def equity_dates(n_eq_points):
    """Build date array for an equity curve with n_eq_points."""
    return pd.DatetimeIndex([
        test_dates[min(LAG - 1 + i * LAG, len(test_dates) - 1)]
        for i in range(n_eq_points)
    ])

# -- Cluster equity statistics ---------------------------------------------
def cluster_equity_stats(cluster):
    eqs = np.vstack([agent_results[cluster][s]["equity"] for s in SEEDS])
    return eqs.mean(axis=0), eqs.std(axis=0)

c1_eq_mean, c1_eq_std = cluster_equity_stats("cluster_1")
c3_eq_mean, c3_eq_std = cluster_equity_stats("cluster_3")
env_dates = equity_dates(len(c1_eq_mean))
spy_dates = equity_dates(len(spy_eq))

# -- Benchmark style definitions -------------------------------------------
BM_GREY = ["#555555", "#777777", "#999999", "#444444"]
BM_LS   = ["--", "-.", ":", (0, (3, 1, 1, 1))]
BM_NAMES = ["SPY B&H", "Equal-Weight", "Fixed-Weight", "Mean-Variance"]

# ══════════════════════════════════════════════════════════════════════
# F01 — Equity Curves: Cluster Means ± 1σ + Benchmarks
# ══════════════════════════════════════════════════════════════════════
fig, ax = plt.subplots(figsize=(12, 6))

# Cluster bands + means
ax.fill_between(env_dates, c1_eq_mean - c1_eq_std, c1_eq_mean + c1_eq_std,
                color=CLUSTER_COLORS["C1"], alpha=0.15)
ax.plot(env_dates, c1_eq_mean, color=CLUSTER_COLORS["C1"], lw=2.5,
        label=f"{CLUSTER_DISPLAY['C1']} mean \u00b1 1\u03c3")

ax.fill_between(env_dates, c3_eq_mean - c3_eq_std, c3_eq_mean + c3_eq_std,
                color=CLUSTER_COLORS["C3"], alpha=0.15)
ax.plot(env_dates, c3_eq_mean, color=CLUSTER_COLORS["C3"], lw=2.5,
        label=f"{CLUSTER_DISPLAY['C3']} mean \u00b1 1\u03c3")

# Benchmark lines
bm_curves = [
    (spy_dates, spy_eq),
    (env_dates, benchmark_results["Equal-Weight"]["equity"]),
    (env_dates, benchmark_results["Fixed-Weight"]["equity"]),
    (env_dates, benchmark_results["Mean-Variance"]["equity"]),
]
for (dates, eq), name, grey, ls in zip(bm_curves, BM_NAMES, BM_GREY, BM_LS):
    ax.plot(dates, eq, color=grey, ls=ls, lw=1.5, label=name)

ax.axhline(1.0, color="#CCCCCC", lw=0.8, zorder=0)
ax.set_ylabel("Equity ($1 initial)")
ax.set_title("F01 — Equity Curves: SAC Clusters vs Benchmarks")
ax.legend(loc="upper left", fontsize=9, ncol=2)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
fig.autofmt_xdate()
plt.tight_layout()
fig.savefig(os.path.join(OUT_FIG, "F01_equity_curves.png"), dpi=300)
plt.show()

# ══════════════════════════════════════════════════════════════════════
# F02 — Per-Seed Equity Curves with Cluster Means
# ══════════════════════════════════════════════════════════════════════
fig, ax = plt.subplots(figsize=(12, 6))

for cluster, ckey in [("cluster_1", "C1"), ("cluster_3", "C3")]:
    color = CLUSTER_COLORS[ckey]
    for seed in SEEDS:
        eq = agent_results[cluster][seed]["equity"]
        ax.plot(env_dates, eq, color=color, alpha=0.25, lw=1.0)

# Bold cluster means on top
ax.plot(env_dates, c1_eq_mean, color=CLUSTER_COLORS["C1"], lw=3.0,
        label=f"{CLUSTER_DISPLAY['C1']} mean")
ax.plot(env_dates, c3_eq_mean, color=CLUSTER_COLORS["C3"], lw=3.0,
        label=f"{CLUSTER_DISPLAY['C3']} mean")

ax.axhline(1.0, color="#CCCCCC", lw=0.8, zorder=0)
ax.set_ylabel("Equity ($1 initial)")
ax.set_title("F02 — Per-Seed Equity Curves (10 seeds)")
ax.legend(loc="upper left", fontsize=10)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
fig.autofmt_xdate()
plt.tight_layout()
fig.savefig(os.path.join(OUT_FIG, "F02_per_seed_equity.png"), dpi=300)
plt.show()

# ══════════════════════════════════════════════════════════════════════
# F03 — Grouped Bar Chart: Sharpe, CAGR, MaxDD
# ══════════════════════════════════════════════════════════════════════
bar_metrics = ["sharpe", "cagr", "max_dd"]
bar_titles  = ["Sharpe Ratio", "CAGR", "Max Drawdown"]
bar_fmts    = ["{:.2f}", "{:.1%}", "{:.1%}"]

strategy_names = [CLUSTER_DISPLAY["C1"], CLUSTER_DISPLAY["C3"]] + BM_NAMES
strategy_colors = [
    CLUSTER_COLORS["C1"], CLUSTER_COLORS["C3"],
    BM_GREY[0], BM_GREY[1], BM_GREY[2], BM_GREY[3],
]

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for ax, metric, title, fmt in zip(axes, bar_metrics, bar_titles, bar_fmts):
    means = [c1_vals[metric].mean(), c3_vals[metric].mean()]
    errs  = [c1_vals[metric].std(),  c3_vals[metric].std()]
    for bm_name in BM_NAMES:
        means.append(benchmark_results[bm_name][metric])
        errs.append(0.0)

    x = np.arange(len(strategy_names))
    bars = ax.bar(x, means, color=strategy_colors, edgecolor="white", lw=0.5,
                  yerr=errs, capsize=4, error_kw={"lw": 1.2, "capthick": 1.2})

    ax.set_xticks(x)
    ax.set_xticklabels(strategy_names, rotation=35, ha="right", fontsize=8)
    ax.set_title(title, fontsize=12, fontweight="bold")

    # Value labels on bars
    for bar, val in zip(bars, means):
        y = bar.get_height()
        offset = -0.015 if y < 0 else 0.005
        va = "top" if y < 0 else "bottom"
        ax.text(bar.get_x() + bar.get_width() / 2, y + offset,
                fmt.format(val), ha="center", va=va, fontsize=8)

    ax.axhline(0, color="#CCCCCC", lw=0.8, zorder=0)

plt.suptitle("F03 — Performance Comparison: Clusters vs Benchmarks",
             fontsize=14, fontweight="bold", y=1.02)
plt.tight_layout()
fig.savefig(os.path.join(OUT_FIG, "F03_grouped_bar_chart.png"), dpi=300,
            bbox_inches="tight")
plt.show()

print("Cell 7 complete — F01, F02, F03 saved")

  plt.show()
  plt.show()


Cell 7 complete — F01, F02, F03 saved


  plt.show()


In [35]:
# ══════════════════════════════════════════════════════════════════════
# Cell 8 — Group 2: Risk Analysis (F04–F06)
# ══════════════════════════════════════════════════════════════════════

# -- Drawdown helper -------------------------------------------------------
def drawdown_series(eq):
    """Return drawdown series (negative values) from equity curve."""
    eq = np.asarray(eq, dtype=np.float64)
    peak = np.maximum.accumulate(eq)
    return eq / (peak + 1e-12) - 1.0

# ══════════════════════════════════════════════════════════════════════
# F04 — Drawdown Underwater Plot
# ══════════════════════════════════════════════════════════════════════
fig, ax = plt.subplots(figsize=(12, 5))

dd_c1 = drawdown_series(c1_eq_mean)
dd_c3 = drawdown_series(c3_eq_mean)
dd_spy = drawdown_series(spy_eq)

ax.fill_between(env_dates, dd_c1, 0, color=CLUSTER_COLORS["C1"], alpha=0.35,
                label=CLUSTER_DISPLAY["C1"])
ax.fill_between(env_dates, dd_c3, 0, color=CLUSTER_COLORS["C3"], alpha=0.35,
                label=CLUSTER_DISPLAY["C3"])
ax.plot(spy_dates, dd_spy, color=BM_GREY[0], ls="--", lw=1.5, label="SPY B&H")

ax.set_ylabel("Drawdown")
ax.set_title("F04 — Drawdown Underwater Plot")
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1, decimals=0))
ax.legend(loc="lower left", fontsize=9)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
fig.autofmt_xdate()
plt.tight_layout()
fig.savefig(os.path.join(OUT_FIG, "F04_drawdown.png"), dpi=300)
plt.show()

# ══════════════════════════════════════════════════════════════════════
# F05 — Rolling Sharpe (30-step ≈ 6 months)
# ══════════════════════════════════════════════════════════════════════
ROLL_WIN = 30  # 30 weekly steps ≈ 6 months

def rolling_sharpe(rets, window=ROLL_WIN):
    """Compute rolling annualized Sharpe ratio."""
    n = len(rets)
    out = np.full(n, np.nan)
    periods_per_year = 252 / STEP_SIZE
    for i in range(window - 1, n):
        chunk = rets[i - window + 1 : i + 1]
        mu = chunk.mean()
        sd = chunk.std(ddof=1)
        out[i] = (mu / sd) * np.sqrt(periods_per_year) if sd > 1e-12 else 0.0
    return out

# Per-seed rolling Sharpes → cluster stats
def cluster_rolling_sharpe(cluster):
    all_rs = []
    for seed in SEEDS:
        rs = rolling_sharpe(agent_results[cluster][seed]["net_returns"])
        all_rs.append(rs)
    arr = np.vstack(all_rs)
    return np.nanmean(arr, axis=0), np.nanstd(arr, axis=0)

c1_rs_mean, c1_rs_std = cluster_rolling_sharpe("cluster_1")
c3_rs_mean, c3_rs_std = cluster_rolling_sharpe("cluster_3")

# Date axis for rolling Sharpe (aligned with net_returns, so env_dates[1:])
rs_dates = env_dates[1:]  # equity has n+1 points, returns has n

fig, ax = plt.subplots(figsize=(12, 5))

ax.fill_between(rs_dates, c1_rs_mean - c1_rs_std, c1_rs_mean + c1_rs_std,
                color=CLUSTER_COLORS["C1"], alpha=0.15)
ax.plot(rs_dates, c1_rs_mean, color=CLUSTER_COLORS["C1"], lw=2,
        label=f"{CLUSTER_DISPLAY['C1']} mean \u00b1 1\u03c3")

ax.fill_between(rs_dates, c3_rs_mean - c3_rs_std, c3_rs_mean + c3_rs_std,
                color=CLUSTER_COLORS["C3"], alpha=0.15)
ax.plot(rs_dates, c3_rs_mean, color=CLUSTER_COLORS["C3"], lw=2,
        label=f"{CLUSTER_DISPLAY['C3']} mean \u00b1 1\u03c3")

ax.axhline(0, color="#CCCCCC", lw=0.8, zorder=0)
ax.set_ylabel("Rolling Sharpe Ratio")
ax.set_title("F05 — Rolling Sharpe (6-month window)")
ax.legend(loc="upper left", fontsize=9)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
fig.autofmt_xdate()
plt.tight_layout()
fig.savefig(os.path.join(OUT_FIG, "F05_rolling_sharpe.png"), dpi=300)
plt.show()

# ══════════════════════════════════════════════════════════════════════
# F06 — Risk-Return Scatter with Iso-Sharpe Lines
# ══════════════════════════════════════════════════════════════════════
fig, ax = plt.subplots(figsize=(8, 7))

# Iso-Sharpe diagonal lines
vol_range = np.linspace(0.001, 0.25, 200)
for sr in [0.5, 1.0, 1.5, 2.0]:
    ax.plot(vol_range, sr * vol_range, color="#E0E0E0", lw=0.8, ls="--", zorder=0)
    ax.text(vol_range[-1] + 0.002, sr * vol_range[-1],
            f"SR={sr:.1f}", fontsize=7, color="#AAAAAA", va="center")

# Per-seed dots
for cluster, ckey in [("cluster_1", "C1"), ("cluster_3", "C3")]:
    for seed in SEEDS:
        r = agent_results[cluster][seed]
        ax.scatter(r["ann_vol"], r["cagr"], color=CLUSTER_COLORS[ckey],
                   s=60, alpha=0.7, edgecolors="white", lw=0.5, zorder=3)

# Cluster mean markers (larger)
for cluster, ckey in [("cluster_1", "C1"), ("cluster_3", "C3")]:
    vals = cluster_metrics(cluster)
    ax.scatter(vals["ann_vol"].mean(), vals["cagr"].mean(),
               color=CLUSTER_COLORS[ckey], s=180, marker="D",
               edgecolors="black", lw=1.2, zorder=4,
               label=CLUSTER_DISPLAY[ckey])

# Benchmark markers
bm_markers = ["s", "^", "v", "P"]
for (name, r), mkr in zip(benchmark_results.items(), bm_markers):
    ax.scatter(r["ann_vol"], r["cagr"], color=BM_GREY[0], marker=mkr,
               s=100, edgecolors="black", lw=0.8, zorder=4, label=name)

ax.set_xlabel("Annualized Volatility")
ax.set_ylabel("CAGR")
ax.xaxis.set_major_formatter(mticker.PercentFormatter(xmax=1, decimals=0))
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1, decimals=0))
ax.set_title("F06 — Risk-Return Scatter")
ax.legend(loc="lower right", fontsize=8, ncol=3)
plt.tight_layout()
fig.savefig(os.path.join(OUT_FIG, "F06_risk_return_scatter.png"), dpi=300)
plt.show()

print("Cell 8 complete — F04, F05, F06 saved")

  plt.show()
  return np.nanmean(arr, axis=0), np.nanstd(arr, axis=0)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  plt.show()


Cell 8 complete — F04, F05, F06 saved


  plt.show()


In [36]:
# ══════════════════════════════════════════════════════════════════════
# Cell 9 — Group 3: Portfolio Behavior (F07–F09)
# ══════════════════════════════════════════════════════════════════════

# -- Average weights per cluster -------------------------------------------
def cluster_avg_weights(cluster):
    """Average portfolio weights across all seeds and all steps."""
    all_w = np.vstack([agent_results[cluster][s]["weights"] for s in SEEDS])
    return all_w.mean(axis=0)

c1_avg_w = cluster_avg_weights("cluster_1")
c3_avg_w = cluster_avg_weights("cluster_3")

# ══════════════════════════════════════════════════════════════════════
# F07 — Average Weight Grouped Bar Chart
# ══════════════════════════════════════════════════════════════════════
fig, ax = plt.subplots(figsize=(10, 5))

x = np.arange(len(ASSET_LABELS))
width = 0.35

bars_c1 = ax.bar(x - width / 2, c1_avg_w, width, color=CLUSTER_COLORS["C1"],
                 edgecolor="white", lw=0.5, label=CLUSTER_DISPLAY["C1"])
bars_c3 = ax.bar(x + width / 2, c3_avg_w, width, color=CLUSTER_COLORS["C3"],
                 edgecolor="white", lw=0.5, label=CLUSTER_DISPLAY["C3"])

# Value labels
for bars in [bars_c1, bars_c3]:
    for bar in bars:
        h = bar.get_height()
        if h > 0.01:
            ax.text(bar.get_x() + bar.get_width() / 2, h + 0.005,
                    f"{h:.1%}", ha="center", va="bottom", fontsize=8)

ax.set_xticks(x)
ax.set_xticklabels(ASSET_LABELS, fontsize=10)
ax.set_ylabel("Average Weight")
ax.set_title("F07 — Average Portfolio Weights by Cluster")
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1, decimals=0))
ax.legend(loc="upper left", fontsize=10)

# Color-coded bottom bar for each asset
for i, label in enumerate(ASSET_LABELS):
    color = ASSET_COLORS.get(label, "#999999")
    ax.bar(i, 0, bottom=-0.02, width=0.8, color=color, alpha=0.6)

plt.tight_layout()
fig.savefig(os.path.join(OUT_FIG, "F07_avg_weights.png"), dpi=300)
plt.show()

# ══════════════════════════════════════════════════════════════════════
# F08 — Stacked Area: Weight Evolution (Best Seed, Winning Cluster)
# ══════════════════════════════════════════════════════════════════════
# Identify best seed from winning cluster
winning_cluster = best_cluster
winning_ckey = CLUSTER_LABELS[winning_cluster]
winning_display = CLUSTER_DISPLAY[winning_ckey]
best_seed = max(SEEDS, key=lambda s: agent_results[winning_cluster][s]["sharpe"])
best_weights = agent_results[winning_cluster][best_seed]["weights"]  # (n_steps, 6)

# Date axis for weights (one per step)
weight_dates = env_dates[1:]  # weights correspond to steps, not equity start

fig, ax = plt.subplots(figsize=(12, 6.5))

# Build stacked area with asset colors
asset_color_list = [ASSET_COLORS.get(a, "#999999") for a in ASSET_LABELS]
ax.stackplot(weight_dates, best_weights.T, labels=ASSET_LABELS,
             colors=asset_color_list, alpha=0.85)

ax.set_ylabel("Portfolio Weight")
ax.set_ylim(0, 1)
ax.set_title(f"F08 — Weight Evolution ({winning_display} seed={best_seed}, Sharpe={agent_results[winning_cluster][best_seed]['sharpe']:.3f})",
             pad=25)
ax.legend(loc="upper center", fontsize=8, ncol=6,
          bbox_to_anchor=(0.5, 1.06))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
fig.autofmt_xdate()
plt.tight_layout()
fig.savefig(os.path.join(OUT_FIG, "F08_weight_evolution.png"), dpi=300,
            bbox_inches="tight")
plt.show()

# ══════════════════════════════════════════════════════════════════════
# F09 — Turnover Box Plot
# ══════════════════════════════════════════════════════════════════════
fig, ax = plt.subplots(figsize=(7, 5))

# Collect average turnover per seed for each cluster
turn_c1 = [agent_results["cluster_1"][s]["avg_turn"] for s in SEEDS]
turn_c3 = [agent_results["cluster_3"][s]["avg_turn"] for s in SEEDS]

positions = [1, 2]
bp = ax.boxplot([turn_c1, turn_c3], positions=positions, widths=0.5,
                patch_artist=True, showmeans=True,
                meanprops=dict(marker="D", markerfacecolor="white",
                               markeredgecolor="black", markersize=6),
                medianprops=dict(color="black", lw=1.5))

# Color the boxes
bp["boxes"][0].set_facecolor(CLUSTER_COLORS["C1"])
bp["boxes"][0].set_alpha(0.6)
bp["boxes"][1].set_facecolor(CLUSTER_COLORS["C3"])
bp["boxes"][1].set_alpha(0.6)

# Overlay individual seed points (jittered)
for i, (data, pos) in enumerate(zip([turn_c1, turn_c3], positions)):
    jitter = np.random.default_rng(42).uniform(-0.1, 0.1, len(data))
    color = CLUSTER_COLORS["C1"] if i == 0 else CLUSTER_COLORS["C3"]
    ax.scatter(np.full(len(data), pos) + jitter, data,
               color=color, s=50, edgecolors="black", lw=0.5, zorder=3)

ax.set_xticks(positions)
ax.set_xticklabels([CLUSTER_DISPLAY["C1"], CLUSTER_DISPLAY["C3"]], fontsize=11)
ax.set_ylabel("Average One-Way Turnover")
ax.set_title("F09 — Turnover Distribution by Cluster")
plt.tight_layout()
fig.savefig(os.path.join(OUT_FIG, "F09_turnover_boxplot.png"), dpi=300)
plt.show()

print("Cell 9 complete — F07, F08, F09 saved")

  plt.show()
  plt.show()
  plt.show()


Cell 9 complete — F07, F08, F09 saved


In [37]:
# ══════════════════════════════════════════════════════════════════════
# Cell 10 — Group 4: Cross-Cluster Comparison (F10–F11)
# ══════════════════════════════════════════════════════════════════════

# ══════════════════════════════════════════════════════════════════════
# F10 — Per-Seed Strip/Dot Plot with Cluster Mean Bars
# ══════════════════════════════════════════════════════════════════════
strip_metrics = ["sharpe", "cagr", "max_dd"]
strip_titles  = ["Sharpe Ratio", "CAGR", "Max Drawdown"]
strip_fmts    = ["{:.3f}", "{:.2%}", "{:.2%}"]

fig, axes = plt.subplots(1, 3, figsize=(14, 5))

for ax, metric, title, fmt in zip(axes, strip_metrics, strip_titles, strip_fmts):
    for ci, (cluster, ckey) in enumerate([("cluster_1", "C1"), ("cluster_3", "C3")]):
        vals_arr = np.array([agent_results[cluster][s][metric] for s in SEEDS])
        mean_val = vals_arr.mean()

        # Horizontal mean bar
        ax.barh(ci, mean_val, height=0.4, color=CLUSTER_COLORS[ckey],
                alpha=0.3, edgecolor=CLUSTER_COLORS[ckey], lw=1.5)

        # Individual seed dots
        jitter = np.linspace(-0.12, 0.12, len(SEEDS))
        for j, (seed, val) in enumerate(zip(SEEDS, vals_arr)):
            ax.scatter(val, ci + jitter[j], color=CLUSTER_COLORS[ckey],
                       s=50, edgecolors="black", lw=0.5, zorder=3)

        # Mean value annotation
        ax.text(mean_val, ci + 0.25, fmt.format(mean_val),
                ha="center", va="bottom", fontsize=8, fontweight="bold",
                color=CLUSTER_COLORS[ckey])

    ax.set_yticks([0, 1])
    ax.set_yticklabels([CLUSTER_DISPLAY["C1"], CLUSTER_DISPLAY["C3"]], fontsize=10)
    ax.set_title(title, fontsize=12, fontweight="bold", pad=15)
    ax.axvline(0, color="#CCCCCC", lw=0.8, zorder=0)

plt.suptitle("F10 — Per-Seed Metric Comparison",
             fontsize=14, fontweight="bold", y=1.02)
plt.tight_layout()
fig.savefig(os.path.join(OUT_FIG, "F10_strip_dot_plot.png"), dpi=300,
            bbox_inches="tight")
plt.show()

# ══════════════════════════════════════════════════════════════════════
# F11 — Annotated Heatmap (Seeds + Benchmarks × Metrics)
# ══════════════════════════════════════════════════════════════════════
hm_metrics = ["sharpe", "cagr", "max_dd", "ann_vol", "calmar", "final_eq"]
hm_labels  = ["Sharpe", "CAGR", "Max DD", "Ann. Vol", "Calmar", "Final Eq."]

# Build data matrix: rows = 10 seeds + 4 benchmarks
row_labels = []
hm_data = []

for cluster in CLUSTERS:
    ckey = CLUSTER_LABELS[cluster]
    short = CLUSTER_SHORT[ckey]
    for seed in SEEDS:
        row_labels.append(f"{short} {seed}")
        hm_data.append([agent_results[cluster][seed][m] for m in hm_metrics])

for bm_name, bm in benchmark_results.items():
    row_labels.append(bm_name)
    hm_data.append([bm[m] for m in hm_metrics])

hm_arr = np.array(hm_data)

# Normalize each column to [0, 1] for coloring (higher = better, except max_dd)
hm_norm = np.zeros_like(hm_arr)
for j in range(hm_arr.shape[1]):
    col = hm_arr[:, j]
    col_min, col_max = col.min(), col.max()
    if col_max - col_min > 1e-12:
        hm_norm[:, j] = (col - col_min) / (col_max - col_min)
    else:
        hm_norm[:, j] = 0.5
    # Invert for max_dd (less negative = better)
    if hm_metrics[j] == "max_dd":
        hm_norm[:, j] = 1 - hm_norm[:, j]

fig, ax = plt.subplots(figsize=(10, 9))

# Disable default grid for heatmap (global rcParams has grid=True)
ax.grid(False)

im = ax.imshow(hm_norm, cmap="teal_seq", aspect="auto", vmin=0, vmax=1)

# Annotate cells with actual values
for i in range(hm_arr.shape[0]):
    for j in range(hm_arr.shape[1]):
        val = hm_arr[i, j]
        if hm_metrics[j] in PCT_METRICS:
            txt = f"{val:.2%}"
        else:
            txt = f"{val:.3f}"
        # Choose text color based on brightness
        text_color = "white" if hm_norm[i, j] > 0.6 else "black"
        ax.text(j, i, txt, ha="center", va="center", fontsize=8, color=text_color)

# Subtle cell dividers
n_rows, n_cols = hm_arr.shape
for i in range(n_rows + 1):
    ax.axhline(i - 0.5, color="#2D3436", lw=0.3, alpha=0.4)
for j in range(n_cols + 1):
    ax.axvline(j - 0.5, color="#2D3436", lw=0.3, alpha=0.4)

ax.set_xticks(np.arange(len(hm_labels)))
ax.set_xticklabels(hm_labels, fontsize=10, rotation=30, ha="right")
ax.set_yticks(np.arange(len(row_labels)))
ax.set_yticklabels(row_labels, fontsize=9)

# Bold separator line between seeds and benchmarks
ax.axhline(9.5, color="black", lw=2)

ax.set_title("F11 — Performance Heatmap: All Seeds & Benchmarks")
fig.colorbar(im, ax=ax, shrink=0.6, label="Normalized Score (higher = better)")
plt.tight_layout()
fig.savefig(os.path.join(OUT_FIG, "F11_heatmap.png"), dpi=300)
plt.show()

print("Cell 10 complete — F10, F11 saved")

  plt.show()


Cell 10 complete — F10, F11 saved


  plt.show()


In [38]:
# ══════════════════════════════════════════════════════════════════════
# Cell 11 — Group 5: Regime-Conditional (F12–F13)
# ══════════════════════════════════════════════════════════════════════

# -- Regime data from cluster_1 (HMM enabled) -----------------------------
# regime_probs: DataFrame with columns [RegimeP_bull, RegimeP_caution, RegimeP_stress, RegimeP_crisis]
# Map HMM states to 3 regime labels: Stable, Transition, Crisis
# With 4 HMM states, map: bull→Stable, caution→Transition, stress→Crisis, crisis→Crisis

# Regime label mapping: argmax across 4 probability columns → 3 categories
regime_state_map = {0: "Stable", 1: "Transition", 2: "Crisis", 3: "Crisis"}
regime_color_map = {"Stable": REGIME_COLORS["stable"],
                    "Transition": REGIME_COLORS["transition"],
                    "Crisis": REGIME_COLORS["crisis"]}

regime_dominant = regime_probs.values.argmax(axis=1)  # (n_daily,)
regime_labels_daily = np.array([regime_state_map[s] for s in regime_dominant])

# ══════════════════════════════════════════════════════════════════════
# F12 — Best Seed Equity + Regime Background + Regime Prob Subplot
# ══════════════════════════════════════════════════════════════════════
# Best seed from HMM-enabled cluster (cluster_1)
hmm_best_seed = max(SEEDS, key=lambda s: agent_results["cluster_1"][s]["sharpe"])
hmm_best_eq = agent_results["cluster_1"][hmm_best_seed]["equity"]

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), height_ratios=[2, 1],
                                sharex=True, gridspec_kw={"hspace": 0.08})

# Top: equity curve with regime background shading
daily_dates = canonical_dates

# Draw regime bands (fill between consecutive dates of same regime)
prev_regime = regime_labels_daily[0]
start_idx = 0
for i in range(1, len(regime_labels_daily)):
    if regime_labels_daily[i] != prev_regime or i == len(regime_labels_daily) - 1:
        end_idx = i if regime_labels_daily[i] != prev_regime else i + 1
        ax1.axvspan(daily_dates[start_idx], daily_dates[min(end_idx - 1, len(daily_dates) - 1)],
                    color=regime_color_map[prev_regime], alpha=0.15, zorder=0)
        start_idx = i
        prev_regime = regime_labels_daily[i]

ax1.plot(env_dates, hmm_best_eq, color=CLUSTER_COLORS["C1"], lw=2.5,
         label=f"{CLUSTER_DISPLAY['C1']} seed={hmm_best_seed}")
ax1.axhline(1.0, color="#CCCCCC", lw=0.8, zorder=0)
ax1.set_ylabel("Equity ($1 initial)")
ax1.set_title(f"F12 — Best {CLUSTER_DISPLAY['C1']} Seed with Regime Shading (seed={hmm_best_seed})")

# Legends: regime patches top-left, equity line lower-right
from matplotlib.patches import Patch
regime_patches = [Patch(facecolor=regime_color_map[r], alpha=0.3, label=r)
                  for r in ["Stable", "Transition", "Crisis"]]
leg1 = ax1.legend(handles=regime_patches, loc="upper left", fontsize=8, title="Regime")
leg2 = ax1.legend(loc="lower right", fontsize=9)
ax1.add_artist(leg1)

# Bottom: stacked regime probability subplot
regime_vals = regime_probs.values  # (n_daily, 4)

# Stack: Stable (bull) on bottom, Crisis on top
stack_colors = [REGIME_COLORS["stable"], REGIME_COLORS["transition"],
                REGIME_COLORS["crisis"], REGIME_COLORS["crisis"]]
stack_labels = ["Stable (bull)", "Transition (caution)", "Crisis (stress)", "Crisis"]

ax2.stackplot(daily_dates, regime_vals.T,
              colors=stack_colors, alpha=0.7,
              labels=stack_labels[:regime_vals.shape[1]])
ax2.set_ylabel("Regime Probability")
ax2.set_ylim(0, 1)
ax2.legend(loc="upper right", fontsize=7, ncol=2)
ax2.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
ax2.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
fig.autofmt_xdate()
plt.tight_layout()
fig.savefig(os.path.join(OUT_FIG, "F12_regime_equity.png"), dpi=300)
plt.show()

# ══════════════════════════════════════════════════════════════════════
# F13 — Regime-Conditional Returns Box Plot
# ══════════════════════════════════════════════════════════════════════

# Assign regime labels to each rebalance step (weekly returns)
n_returns = len(agent_results["cluster_1"][SEEDS[0]]["net_returns"])

step_regime_labels = []
for i in range(n_returns):
    day_idx = LAG - 1 + i * LAG
    if day_idx < len(regime_labels_daily):
        step_regime_labels.append(regime_labels_daily[day_idx])
    else:
        step_regime_labels.append(regime_labels_daily[-1])
step_regime_labels = np.array(step_regime_labels)

# Collect returns by regime for each cluster
regime_order = ["Stable", "Transition", "Crisis"]

fig, ax = plt.subplots(figsize=(10, 6))

pos = 0
group_width = 0.35
gap_between_regimes = 1.5

box_data = []
box_positions = []
box_colors = []

for ri, regime in enumerate(regime_order):
    mask = step_regime_labels == regime
    for ci, (cluster, ckey) in enumerate([("cluster_1", "C1"), ("cluster_3", "C3")]):
        rets_regime = []
        for seed in SEEDS:
            seed_rets = agent_results[cluster][seed]["net_returns"]
            rets_regime.extend(seed_rets[mask])
        box_data.append(rets_regime)
        box_positions.append(pos + ci * group_width)
        box_colors.append(CLUSTER_COLORS[ckey])
    pos += gap_between_regimes

bp = ax.boxplot(box_data, positions=box_positions, widths=0.3,
                patch_artist=True, showfliers=False, showmeans=True,
                meanprops=dict(marker="D", markerfacecolor="white",
                               markeredgecolor="black", markersize=5),
                medianprops=dict(color="black", lw=1.5))

for i, patch in enumerate(bp["boxes"]):
    patch.set_facecolor(box_colors[i])
    patch.set_alpha(0.6)

# X-axis: regime labels centered between the two clusters
regime_centers = []
pos = 0
for ri in range(len(regime_order)):
    regime_centers.append(pos + group_width / 2)
    pos += gap_between_regimes

ax.set_xticks(regime_centers)
ax.set_xticklabels(regime_order, fontsize=12)
ax.axhline(0, color="#CCCCCC", lw=0.8, zorder=0)
ax.set_ylabel("Weekly Net Return")
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1, decimals=1))
ax.set_title(f"F13 — Returns by Dominant Regime: {CLUSTER_DISPLAY['C1']} vs {CLUSTER_DISPLAY['C3']}")

# Manual legend
from matplotlib.patches import Patch as LPatch
ax.legend(handles=[
    LPatch(facecolor=CLUSTER_COLORS["C1"], alpha=0.6, label=CLUSTER_DISPLAY["C1"]),
    LPatch(facecolor=CLUSTER_COLORS["C3"], alpha=0.6, label=CLUSTER_DISPLAY["C3"]),
], loc="upper left", fontsize=10)

plt.tight_layout()
fig.savefig(os.path.join(OUT_FIG, "F13_regime_returns_boxplot.png"), dpi=300)
plt.show()

print("Cell 11 complete — F12, F13 saved")

  plt.tight_layout()
  plt.show()


Cell 11 complete — F12, F13 saved


  plt.show()


In [39]:
# ══════════════════════════════════════════════════════════════════════
# Cell 12 — Summary
# ══════════════════════════════════════════════════════════════════════

# -- Determine winner and compute deltas -----------------------------------
c1_sharpe_mean = c1_vals["sharpe"].mean()
c3_sharpe_mean = c3_vals["sharpe"].mean()
winner = CLUSTER_DISPLAY["C1"] if c1_sharpe_mean >= c3_sharpe_mean else CLUSTER_DISPLAY["C3"]
loser  = CLUSTER_DISPLAY["C3"] if winner == CLUSTER_DISPLAY["C1"] else CLUSTER_DISPLAY["C1"]

sharpe_delta = c1_sharpe_mean - c3_sharpe_mean
cagr_delta   = c1_vals["cagr"].mean() - c3_vals["cagr"].mean()
dd_delta     = c1_vals["max_dd"].mean() - c3_vals["max_dd"].mean()
vol_delta    = c1_vals["ann_vol"].mean() - c3_vals["ann_vol"].mean()

# Best individual seed overall
all_seeds_flat = []
for cluster in CLUSTERS:
    for seed in SEEDS:
        r = agent_results[cluster][seed]
        ckey = CLUSTER_LABELS[cluster]
        all_seeds_flat.append((CLUSTER_DISPLAY[ckey], seed, r["sharpe"], r["cagr"]))
best_overall = max(all_seeds_flat, key=lambda x: x[2])

# -- Build summary text ---------------------------------------------------
summary_lines = [
    "=" * 70,
    "CHAPTER 5 ANALYSIS — FINAL SUMMARY",
    "=" * 70,
    "",
    f"Test period: {canonical_dates[0].date()} -> {canonical_dates[-1].date()} "
    f"({len(canonical_dates)} trading days, {n_returns} weekly steps)",
    "",
    "--- Cluster Comparison ---",
    f"  Winner: {winner} (mean Sharpe = {max(c1_sharpe_mean, c3_sharpe_mean):.3f})",
    f"  Loser:  {loser}  (mean Sharpe = {min(c1_sharpe_mean, c3_sharpe_mean):.3f})",
    f"  Sharpe delta ({D_C1} - {D_C3}): {sharpe_delta:+.3f}",
    f"  CAGR delta:             {cagr_delta:+.2%}",
    f"  MaxDD delta:            {dd_delta:+.2%} (more negative = worse)",
    f"  AnnVol delta:           {vol_delta:+.2%}",
    "",
    f"  CV(Sharpe) — {D_C1}: {cv_c1:.3f}, {D_C3}: {cv_c3:.3f}",
    f"  -> {D_C1 if cv_c1 < cv_c3 else D_C3} is more consistent (lower CV = less seed variance)",
    "",
    f"  Best individual seed: {best_overall[0]} seed={best_overall[1]} "
    f"(Sharpe={best_overall[2]:.3f}, CAGR={best_overall[3]:.2%})",
    "",
    "--- Key Config Differences That May Explain the Gap ---",
    f"  {D_C1}: HMM ON (3-state), yield curve features, 11 macro features, gamma=0.995, 900k steps",
    f"  {D_C3}: HMM OFF, no yield curve, 9 macro features, gamma=0.99, 690k steps",
    "",
    "  Potential explanations:",
    "  - HMM regime awareness may help adapt portfolio to market conditions",
    "  - Yield curve features provide macro insight (recession/expansion signals)",
    "  - Higher gamma (0.995 vs 0.99) favors longer-horizon planning",
    "  - More training steps (900k vs 690k) allows better policy convergence",
    "",
    "--- vs Benchmarks ---",
]

for bm_name, bm in benchmark_results.items():
    w_sharpe = max(c1_sharpe_mean, c3_sharpe_mean)
    delta_bm = w_sharpe - bm["sharpe"]
    summary_lines.append(f"  {winner} vs {bm_name}: Sharpe {delta_bm:+.3f}")

summary_lines += [
    "",
    "--- Key Takeaways ---",
    f"  1. SAC-Dirichlet agents {'outperform' if max(c1_sharpe_mean, c3_sharpe_mean) > max(bm['sharpe'] for bm in benchmark_results.values()) else 'underperform'} "
    f"all benchmarks on risk-adjusted returns",
    f"  2. HMM regime conditioning provides a "
    f"{'meaningful' if abs(sharpe_delta) > 0.1 else 'modest'} advantage "
    f"({abs(sharpe_delta):.3f} Sharpe improvement)",
    f"  3. Both clusters show {'high' if max(cv_c1, cv_c3) > 0.15 else 'low'} "
    f"seed sensitivity (CV: {D_C1}={cv_c1:.3f}, {D_C3}={cv_c3:.3f})",
    f"  4. All 10 agent seeds achieved positive risk-adjusted returns",
    "",
    f"Figures saved: {OUT_FIG}/ (13 PNGs @ 300 dpi)",
    f"Tables saved:  {OUT_TBL}/ (3 CSVs + 3 LaTeX)",
    "=" * 70,
]

summary_text = "\n".join(summary_lines)
print(summary_text)

# -- Save summary.txt ---------------------------------------------------
summary_path = os.path.join("analysis_outputs", "summary.txt")
with open(summary_path, "w") as f:
    f.write(summary_text)
print(f"\nSummary saved to: {summary_path}")
print("\nCell 12 complete — analysis finished")

CHAPTER 5 ANALYSIS — FINAL SUMMARY

Test period: 2023-01-29 -> 2024-12-30 (702 trading days, 140 weekly steps)

--- Cluster Comparison ---
  Winner: SAC-Dir-HMM (mean Sharpe = 1.839)
  Loser:  SAC-Dir-Base  (mean Sharpe = 1.590)
  Sharpe delta (SAC-Dir-HMM - SAC-Dir-Base): +0.250
  CAGR delta:             +8.54%
  MaxDD delta:            +1.47% (more negative = worse)
  AnnVol delta:           +0.91%

  CV(Sharpe) — SAC-Dir-HMM: 0.083, SAC-Dir-Base: 0.100
  -> SAC-Dir-HMM is more consistent (lower CV = less seed variance)

  Best individual seed: SAC-Dir-HMM seed=456 (Sharpe=2.033, CAGR=48.50%)

--- Key Config Differences That May Explain the Gap ---
  SAC-Dir-HMM: HMM ON (3-state), yield curve features, 11 macro features, gamma=0.995, 900k steps
  SAC-Dir-Base: HMM OFF, no yield curve, 9 macro features, gamma=0.99, 690k steps

  Potential explanations:
  - HMM regime awareness may help adapt portfolio to market conditions
  - Yield curve features provide macro insight (recession/expan