In [7]:
# =========================
# 0) Imports & Config
# =========================
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from dataclasses import dataclass
from typing import List, Dict, Optional

FMP_API_BASE_URL = "https://financialmodelingprep.com/api/v3"
API_KEY = "ELnS9z5XsFT6Ne7ovmqrgxY9WOP3aFgS"  # TODO: set me

# =========================
# 1) (Optional) Screener & Fetch
# =========================
@dataclass
class Company:
    ticker: str
    name: str
    market_cap: Optional[int] = None
    country: Optional[str] = None
    sector: Optional[str] = None
    industry: Optional[str] = None

def get_companies_smallcap_ai_semis(limit: int = 1000) -> List[Company]:
    """
    Example screener: US, NASDAQ/NYSE, 'AI/semis/autonomy'-ish sectors/industries,
    small/mid caps: 300M–20B (adjust as needed).
    """
    params = {
        "marketCapMoreThan": 300_000_000,
        "marketCapLowerThan": 20_000_000_000,
        "country": "US",
        "exchange": "NASDAQ,NYSE",
        "isActivelyTrading": "true",
        "limit": limit,
        "apikey": API_KEY,
    }
    r = requests.get(f"{FMP_API_BASE_URL}/stock-screener", params=params)
    r.raise_for_status()
    data = r.json() if isinstance(r.json(), list) else []

    def looks_relevant(row):
        text = " ".join(str(row.get(k, "")) for k in ["sector","industry","companyName"]).lower()
        keys = ["semi", "chip", "ai", "autonom", "ev", "vision", "sensor"]
        return any(k in text for k in keys)

    comps = []
    for row in data:
        if row.get("exchangeShortName") in {"NASDAQ","NYSE"} and looks_relevant(row):
            comps.append(Company(
                ticker=row.get("symbol"),
                name=row.get("companyName"),
                market_cap=row.get("marketCap"),
                country=row.get("country"),
                sector=row.get("sector"),
                industry=row.get("industry"),
            ))
    return comps

def get_ohlcv(ticker: str, days: int = 750) -> pd.DataFrame:
    """Fetch OHLCV; returns columns [date, open, high, low, close, volume, ticker]."""
    url = f"{FMP_API_BASE_URL}/historical-price-full/{ticker}"
    params = {"timeseries": days, "apikey": API_KEY}
    r = requests.get(url, params=params); r.raise_for_status()
    data = r.json()
    if "historical" not in data:
        return pd.DataFrame()
    df = pd.DataFrame(data["historical"])[["date","open","high","low","close","volume"]]
    df["ticker"] = ticker
    df["date"] = pd.to_datetime(df["date"])
    return df.sort_values("date").reset_index(drop=True)

def merge_wide(frames: List[pd.DataFrame]) -> pd.DataFrame:
    """
    Merge per-ticker frames into a wide table: date + T_open/T_close/T_volume ...
    """
    out = None
    for df in frames:
        t = df["ticker"].iloc[0]
        slim = df[["date","open","close","volume"]].copy()
        slim = slim.rename(columns={"open":f"{t}_open","close":f"{t}_close","volume":f"{t}_volume"})
        out = slim if out is None else out.merge(slim, on="date", how="outer")
    if out is not None:
        out = out.sort_values("date").reset_index(drop=True)
    return out

def ensure_long_panel(all_price_wide: pd.DataFrame) -> pd.DataFrame:
    """Wide → long: ['date','ticker','open','close','volume']"""
    cols = [c for c in all_price_wide.columns if c != "date"]
    tickers = sorted({c.split("_")[0] for c in cols})
    frames = []
    for t in tickers:
        frames.append(pd.DataFrame({
            "date": all_price_wide["date"],
            "ticker": t,
            "open": all_price_wide.get(f"{t}_open"),
            "close": all_price_wide.get(f"{t}_close"),
            "volume": all_price_wide.get(f"{t}_volume"),
        }))
    df = pd.concat(frames, ignore_index=True).dropna(subset=["open","close"])
    df["date"] = pd.to_datetime(df["date"])
    return df.sort_values(["ticker","date"]).reset_index(drop=True)

# =========================
# 2) Indicators & Strategy
# =========================
def add_indicators(g: pd.DataFrame) -> pd.DataFrame:
    g = g.copy()
    g["ret_o2o"] = g["open"].pct_change().fillna(0.0)  # execution horizon: open→open
    for w in [5,10,20,50,200,250]:
        g[f"ma{w}"] = g["close"].rolling(w).mean()
    if g["volume"].notna().any():
        g["vol_ma20"] = g["volume"].rolling(20).mean()
        g["vol_x_ma"] = g["volume"] / g["vol_ma20"]
    else:
        g["vol_ma20"] = np.nan; g["vol_x_ma"] = np.nan
    g["chg_60d"] = g["close"] / g["close"].shift(60) - 1.0
    g["ret_3d"] = g["close"].pct_change(3)
    g["ret_5d"] = g["close"].pct_change(5)
    g["up_trend"] = (g["ma50"] > g["ma200"]) & (g["ma50"].diff() > 0) & (g["ma200"].diff() > 0)
    g["down_trend"] = (g["ma50"] < g["ma200"]) & (g["ma50"].diff() < 0)
    g["pullback_hold"] = (g["close"] < g["ma20"]) & (g["close"] > g["ma50"]) & g["up_trend"]
    win = 30
    g["hh30"] = g["close"].rolling(win).max()
    g["breakout_30"] = (g["close"] > g["hh30"].shift(1))
    g["base_break_vol"] = g["vol_x_ma"] > 1.5
    return g

@dataclass
class StrategyParams:
    bull_reduce_3d10: float = 0.10
    bear_probe_5d15: float = -0.15
    high_run_50: float = 0.50
    low_drop_30: float = -0.30
    vol_big_up: float = 2.0
    vol_big_down: float = 1.5
    max_pos_bull: float = 0.4
    max_pos_bear: float = 0.1
    step_small: float = 0.10
    step_medium: float = 0.20
    step_large: float = 0.30

def target_position(g: pd.DataFrame, p: StrategyParams) -> pd.Series:
    pos = np.zeros(len(g), dtype=float)
    for i in range(1, len(g)):
        pos[i] = pos[i-1]
        cap = p.max_pos_bull if bool(g.iloc[i]["up_trend"]) else p.max_pos_bear

        if pd.notna(g.iloc[i]["ret_3d"]) and g.iloc[i]["ret_3d"] >= p.bull_reduce_3d10:
            pos[i] -= p.step_medium
        if pd.notna(g.iloc[i]["ret_5d"]) and g.iloc[i]["ret_5d"] <= p.bear_probe_5d15:
            pos[i] += p.step_medium

        vx = g.iloc[i]["vol_x_ma"]
        if pd.notna(vx):
            prev_vol = g["vol_x_ma"].iloc[max(0,i-3):i].mean()
            if prev_vol < 1.0 and (g.iloc[i]["ret_o2o"] > 0) and vx >= p.vol_big_up:
                pos[i] -= p.step_medium
            if (g.iloc[i]["ret_o2o"] < 0) and vx >= p.vol_big_down:
                pos[i] -= p.step_medium

        if pd.notna(g.iloc[i]["chg_60d"]) and (g.iloc[i]["chg_60d"] >= p.high_run_50) and (g.iloc[i]["ret_o2o"] < 0):
            if pd.notna(vx) and vx >= p.vol_big_down:
                pos[i] = min(pos[i], p.max_pos_bear)

        if pd.notna(g.iloc[i]["chg_60d"]) and (g.iloc[i]["chg_60d"] <= p.low_drop_30) and (g.iloc[i]["ret_o2o"] > 0):
            if pd.notna(vx) and vx >= p.vol_big_down:
                pos[i] += p.step_medium

        if bool(g.iloc[i]["up_trend"]) and bool(g.iloc[i]["breakout_30"]) and bool(g.iloc[i]["base_break_vol"]):
            pos[i] += p.step_large

        if bool(g.iloc[i]["pullback_hold"]):
            pos[i] += p.step_small
        if bool(g.iloc[i]["down_trend"]) and (g.iloc[i]["close"] < g.iloc[i]["ma50"]):
            pos[i] -= p.step_medium

        pos[i] = float(np.clip(pos[i], 0.0, cap))
    return pd.Series(pos, index=g.index, name="tgt_pos")

# =========================
# 3) Backtest
# =========================
@dataclass
class BTParams:
    fee_bps: float = 5.0
    slip_bps: float = 5.0

def backtest(panel_long: pd.DataFrame,
             strat_params: StrategyParams = StrategyParams(),
             bt_params: BTParams = BTParams(),
             start_date: Optional[str] = None,
             end_date: Optional[str] = None) -> Dict[str, pd.DataFrame]:
    df = panel_long.sort_values(["ticker","date"]).copy()
    df = df.groupby("ticker", group_keys=False).apply(add_indicators)
    df["tgt_pos"] = df.groupby("ticker", group_keys=False).apply(lambda g: target_position(g, strat_params))
    df["pos_exec"] = df.groupby("ticker")["tgt_pos"].shift(1).fillna(0.0)
    fee = (bt_params.fee_bps + bt_params.slip_bps) / 1e4
    df["turnover"] = df.groupby("ticker")["pos_exec"].diff().abs().fillna(df["pos_exec"])
    df["cost"] = df["turnover"] * fee
    df["daily_pnl"] = df["pos_exec"] * df["ret_o2o"]
    df["daily_pnl_net"] = df["daily_pnl"] - df["cost"]
    df["equity"] = (1 + df["daily_pnl_net"]).groupby(df["ticker"]).cumprod()

    if start_date: df = df[df["date"] >= pd.to_datetime(start_date)]
    if end_date:   df = df[df["date"] <= pd.to_datetime(end_date)]

    port = df.groupby("date")["daily_pnl_net"].mean().to_frame("ret")
    port["equity"] = (1 + port["ret"]).cumprod()

    ann = 252
    def max_dd(s):
        pk = s.cummax()
        return (s/pk - 1.0).min()

    stats = {
        "Total Return": (port["equity"].iloc[-1] - 1.0) if len(port) else np.nan,
        "CAGR": (port["equity"].iloc[-1] ** (ann/len(port)) - 1.0) if len(port) else np.nan,
        "Vol (ann)": port["ret"].std() * np.sqrt(ann) if len(port)>1 else np.nan,
        "Sharpe (ann)": (port["ret"].mean()/port["ret"].std()*np.sqrt(ann)) if port["ret"].std()>0 else np.nan,
        "Max Drawdown": max_dd(port["equity"]) if len(port)>1 else np.nan,
        "Turnover (daily avg)": df.groupby("date")["turnover"].mean().mean() if len(df)>0 else np.nan,
    }
    return {"panel": df, "portfolio": port, "stats": pd.Series(stats, name="Portfolio")}

# =========================
# 4) Benchmark & Metrics
# =========================
def get_benchmark_price(ticker: str, days: int = 1500) -> pd.DataFrame:
    url = f"{FMP_API_BASE_URL}/historical-price-full/{ticker}"
    params = {"timeseries": days, "apikey": API_KEY}
    r = requests.get(url, params=params); r.raise_for_status()
    data = r.json()
    if "historical" not in data:
        raise ValueError(f"No historical data for {ticker}")
    df = pd.DataFrame(data["historical"])[["date","open","close"]].copy()
    df["date"] = pd.to_datetime(df["date"])
    return df.sort_values("date").reset_index(drop=True)

def universe_equal_weight_benchmark(panel_long: pd.DataFrame) -> pd.DataFrame:
    df = panel_long.sort_values(["ticker","date"]).copy()
    df["ret_cc"] = df.groupby("ticker")["close"].pct_change()
    bench = df.groupby("date")["ret_cc"].mean().to_frame("bench_ret").fillna(0.0)
    bench["equity"] = (1 + bench["bench_ret"]).cumprod()
    return bench.reset_index()

def benchmark_metrics(port: pd.DataFrame,
                      bench_df: pd.DataFrame,
                      start_date: Optional[str] = None,
                      end_date: Optional[str] = None,
                      use_open_to_open: bool = True) -> Dict[str, pd.Series]:
    p = port.reset_index().rename(columns={"index":"date"}) if "date" not in port.columns else port.copy()
    p["date"] = pd.to_datetime(p["date"])
    if start_date: p = p[p["date"] >= pd.to_datetime(start_date)]
    if end_date:   p = p[p["date"] <= pd.to_datetime(end_date)]

    b = bench_df.copy()
    b["date"] = pd.to_datetime(b["date"])
    if start_date: b = b[b["date"] >= pd.to_datetime(start_date)]
    if end_date:   b = b[b["date"] <= pd.to_datetime(end_date)]

    if "bench_ret" not in b.columns:
        b["bench_ret"] = (b["open"].pct_change() if use_open_to_open else b["close"].pct_change()).fillna(0.0)
    b = b[["date","bench_ret"]]

    df = p.merge(b, on="date", how="inner").dropna(subset=["ret","bench_ret"])
    if len(df) < 2:
        return {"msg": "Not enough overlap"}

    port_eq = (1 + df["ret"]).cumprod()
    bench_eq = (1 + df["bench_ret"]).cumprod()

    ann = 252
    def max_dd(s):
        pk = s.cummax()
        return (s/pk - 1.0).min()

    cov = np.cov(df["ret"], df["bench_ret"], ddof=1)
    beta = cov[0,1] / cov[1,1] if cov[1,1] != 0 else np.nan

    stats = {
        "Port Total Ret": port_eq.iloc[-1] - 1,
        "Bench Total Ret": bench_eq.iloc[-1] - 1,
        "Port CAGR": port_eq.iloc[-1] ** (ann/len(df)) - 1,
        "Bench CAGR": bench_eq.iloc[-1] ** (ann/len(df)) - 1,
        "Port Vol (ann)": df["ret"].std()*np.sqrt(ann),
        "Bench Vol (ann)": df["bench_ret"].std()*np.sqrt(ann),
        "Port Sharpe": (df["ret"].mean()/df["ret"].std())*np.sqrt(ann) if df["ret"].std()>0 else np.nan,
        "Bench Sharpe": (df["bench_ret"].mean()/df["bench_ret"].std())*np.sqrt(ann) if df["bench_ret"].std()>0 else np.nan,
        "Tracking Error (ann)": (df["ret"]-df["bench_ret"]).std()*np.sqrt(ann),
        "Information Ratio": ((df["ret"]-df["bench_ret"]).mean()/ (df["ret"]-df["bench_ret"]).std())*np.sqrt(ann) if (df["ret"]-df["bench_ret"]).std()>0 else np.nan,
        "Beta": beta,
        "Corr": np.corrcoef(df["ret"], df["bench_ret"])[0,1],
        "Alpha (ann)": (df["ret"].mean() - beta*df["bench_ret"].mean())*ann if not np.isnan(beta) else np.nan,
        "Port MaxDD": max_dd(port_eq),
        "Bench MaxDD": max_dd(bench_eq),
    }
    return {"joined": df, "stats": pd.Series(stats, name="Bench vs Port"),
            "port_equity": port_eq, "bench_equity": bench_eq}

# =========================
# 5) Plots (single-plot, no colors set)
# =========================
import matplotlib.pyplot as plt
import pandas as pd

def plot_equity_vs_benchmark(portfolio_df, bench_res, title="Portfolio vs Benchmark",
                             show=True, savepath=None, ax=None):
    # 准备数据
    p = portfolio_df.copy()
    if "date" in p.columns:
        p = p.set_index("date")
    p.index = pd.to_datetime(p.index)
    if "equity" not in p.columns or p["equity"].empty:
        raise ValueError("portfolio_df 缺少 'equity' 列或为空")

    port_eq = p["equity"].dropna()

    bench_eq = bench_res.get("bench_equity", None)
    if bench_eq is None:
        j = bench_res["joined"].copy()
        if len(j) == 0:
            raise ValueError("benchmark joined 为空，没有可绘制的基准数据（检查日期对齐）")
        j["date"] = pd.to_datetime(j["date"])
        j = j.sort_values("date")
        j["bench_equity"] = (1 + j["bench_ret"]).cumprod()
        bench_eq = j.set_index("date")["bench_equity"]
    bench_eq = bench_eq.dropna()

    # 对齐
    idx = port_eq.index.intersection(bench_eq.index)
    if len(idx) == 0:
        raise ValueError("组合与基准没有重叠日期，请检查 start_date / end_date / 数据源")

    # 画图
    created_fig = False
    if ax is None:
        fig, ax = plt.subplots()
        created_fig = True
    else:
        fig = ax.figure

    ax.plot(port_eq.loc[idx].index, port_eq.loc[idx].values, label="Portfolio")
    ax.plot(bench_eq.loc[idx].index, bench_eq.loc[idx].values, label="Benchmark")
    ax.set_title(title)
    ax.set_xlabel("Date")
    ax.set_ylabel("Equity")
    ax.legend()

    if savepath:
        fig.savefig(savepath, bbox_inches="tight", dpi=150)
    if show and created_fig:
        plt.show()
    return fig, ax

def plot_drawdowns(portfolio_df, bench_res, title="Drawdowns",
                   show=True, savepath=None, ax=None):
    def dd(s):
        pk = s.cummax()
        return s/pk - 1.0

    p = portfolio_df.copy()
    if "date" in p.columns:
        p = p.set_index("date")
    p.index = pd.to_datetime(p.index)
    if "equity" not in p.columns or p["equity"].empty:
        raise ValueError("portfolio_df 缺少 'equity' 列或为空")
    port_dd = dd(p["equity"].dropna())

    bench_eq = bench_res.get("bench_equity", None)
    if bench_eq is None:
        j = bench_res["joined"].copy()
        if len(j) == 0:
            raise ValueError("benchmark joined 为空，没有可绘制的基准数据（检查日期对齐）")
        j["date"] = pd.to_datetime(j["date"])
        j = j.sort_values("date")
        j["bench_equity"] = (1 + j["bench_ret"]).cumprod()
        bench_eq = j.set_index("date")["bench_equity"]
    bench_eq = bench_eq.dropna()
    bench_dd = dd(bench_eq)

    idx = port_dd.index.intersection(bench_dd.index)
    if len(idx) == 0:
        raise ValueError("组合与基准没有重叠日期，请检查 start_date / end_date / 数据源")

    created_fig = False
    if ax is None:
        fig, ax = plt.subplots()
        created_fig = True
    else:
        fig = ax.figure

    ax.plot(port_dd.loc[idx].index, port_dd.loc[idx].values, label="Portfolio")
    ax.plot(bench_dd.loc[idx].index, bench_dd.loc[idx].values, label="Benchmark")
    ax.set_title(title)
    ax.set_xlabel("Date")
    ax.set_ylabel("Drawdown")
    ax.legend()

    if savepath:
        fig.savefig(savepath, bbox_inches="tight", dpi=150)
    if show and created_fig:
        plt.show()
    return fig, ax

def plot_rolling_corr_beta(bench_joined: pd.DataFrame, window=60, title_prefix="Rolling"):
    j = bench_joined.copy()
    j["date"] = pd.to_datetime(j["date"])
    j = j.set_index("date").sort_index()
    r = j["ret"]; b = j["bench_ret"]

    roll_corr = r.rolling(window).corr(b)
    # rolling beta via Cov/Var
    beta = []
    idx = r.index
    for i in range(len(idx)):
        j0 = max(0, i-window+1)
        rv, bv = r.iloc[j0:i+1], b.iloc[j0:i+1]
        if len(rv) < 2 or bv.var() == 0: beta.append(np.nan)
        else: beta.append(np.cov(rv, bv, ddof=1)[0,1]/bv.var())
    roll_beta = pd.Series(beta, index=idx)

    plt.figure(); plt.plot(roll_corr.index, roll_corr.values)
    plt.title(f"{title_prefix} Correlation ({window}D)"); plt.xlabel("Date"); plt.ylabel("Correlation"); plt.show()

    plt.figure(); plt.plot(roll_beta.index, roll_beta.values)
    plt.title(f"{title_prefix} Beta ({window}D)"); plt.xlabel("Date"); plt.ylabel("Beta"); plt.show()

def plot_calendar_returns(portfolio: pd.DataFrame, title="Calendar-Year Returns"):
    p = portfolio.copy()
    if "date" in p.columns: p = p.set_index("date")
    p.index = pd.to_datetime(p.index)
    yearly = (1 + p["ret"]).groupby(pd.Grouper(freq="A")).apply(lambda x: (1+x).prod() - 1.0)
    yearly = yearly.dropna()
    plt.figure(); plt.bar(yearly.index.year, yearly.values)
    plt.title(title); plt.xlabel("Year"); plt.ylabel("Return"); plt.show()

# =========================
# 6) Example: minimal run
# =========================
if __name__ == "__main__":
    # (A) pick tickers manually (recommended for control), or use screener above
    tickers = ["AMD","NVDA","AVGO","TSM"]  # edit as you like

    frames = []
    for t in tickers:
        df_t = get_ohlcv(t, days=1000)
        if not df_t.empty:
            frames.append(df_t)
    if not frames:
        raise SystemExit("No data fetched.")

    wide = merge_wide(frames)
    panel = ensure_long_panel(wide)

    # Backtest (open→open model), with date window
    res = backtest(panel, start_date="2024-01-01", end_date=None)
    print("\n=== Portfolio Stats ===")
    print(res["stats"])

    # Benchmark: QQQ (open→open to match model)
    bench_px = get_benchmark_price("TSLA", days=1500)
    bench_res = benchmark_metrics(res["portfolio"], bench_px, start_date="2024-01-01", use_open_to_open=True)
    print("\n=== Benchmark vs Portfolio ===")
    print(bench_res["stats"])

    # Plots
    fig, ax = plot_equity_vs_benchmark(res["portfolio"], bench_res, title="Portfolio vs QQQ")
    fig, ax = plot_drawdowns(res["portfolio"], bench_res, title="Drawdown: Portfolio vs QQQ")
    plot_rolling_corr_beta(bench_res["joined"], window=60, title_prefix="Rolling 60D")
    plot_calendar_returns(res["portfolio"], title="Calendar Returns (Portfolio)")

  df = df.groupby("ticker", group_keys=False).apply(add_indicators)



=== Portfolio Stats ===
Total Return            0.077424
CAGR                    0.046097
Vol (ann)               0.079360
Sharpe (ann)            0.607581
Max Drawdown           -0.096261
Turnover (daily avg)    0.012230
Name: Portfolio, dtype: float64

=== Benchmark vs Portfolio ===
Port Total Ret          0.077424
Bench Total Ret         0.388476
Port CAGR               0.046097
Bench CAGR              0.219378
Port Vol (ann)          0.079360
Bench Vol (ann)         0.670414
Port Sharpe             0.607581
Bench Sharpe            0.626704
Tracking Error (ann)    0.653936
Information Ratio      -0.568762
Beta                    0.031284
Corr                    0.264283
Alpha (ann)             0.035073
Port MaxDD             -0.096261
Bench MaxDD            -0.529775
Name: Bench vs Port, dtype: float64


  df["tgt_pos"] = df.groupby("ticker", group_keys=False).apply(lambda g: target_position(g, strat_params))


ValueError: 组合与基准没有重叠日期，请检查 start_date / end_date / 数据源

In [4]:
print("portfolio len:", len(res["portfolio"]))
print("bench joined len:", len(bench_res["joined"]))
if len(bench_res["joined"]) > 0:
    print("date range:",
          bench_res["joined"]["date"].min(),
          "->",
          bench_res["joined"]["date"].max())

portfolio len: 918
bench joined len: 918
date range: 2022-01-03 00:00:00 -> 2025-08-29 00:00:00
