In [None]:
!pip -q install alpaca-py

import os
import numpy as np
import pandas as pd
from datetime import time

from alpaca.data.historical import StockHistoricalDataClient
from alpaca.data.requests import StockBarsRequest
from alpaca.data.timeframe import TimeFrame


In [None]:
assert "APCA_API_KEY_ID" in os.environ, "Missing APCA_API_KEY_ID env var"
assert "APCA_API_SECRET_KEY" in os.environ, "Missing APCA_API_SECRET_KEY env var"

client = StockHistoricalDataClient(
    os.environ["APCA_API_KEY_ID"],
    os.environ["APCA_API_SECRET_KEY"]
)


In [None]:
def fetch_minute_df(symbol: str, start="2024-01-01", end="2024-06-01") -> pd.DataFrame:
    req = StockBarsRequest(
        symbol_or_symbols=symbol,
        timeframe=TimeFrame.Minute,
        start=start,
        end=end
    )
    bars = client.get_stock_bars(req)
    df = bars.df.reset_index()
    df["symbol"] = symbol
    return df

df_spy = fetch_minute_df("SPY")
df_qqq = fetch_minute_df("QQQ")

print("SPY rows:", len(df_spy), "| QQQ rows:", len(df_qqq))
df_spy.head()


In [None]:
def build_pm_window_with_vwap(df: pd.DataFrame) -> pd.DataFrame:
    d = df.copy()
    d["timestamp"] = pd.to_datetime(d["timestamp"], utc=True)
    d["ts_ct"] = d["timestamp"].dt.tz_convert("America/Chicago")
    d["date_ct"] = d["ts_ct"].dt.date
    d["time_ct"] = d["ts_ct"].dt.time
    d = d.sort_values("ts_ct")

    # VWAP starts at 4:00am CT
    d["in_vwap"] = d["time_ct"] >= time(4, 0)

    d["typical"] = (d["high"] + d["low"] + d["close"]) / 3.0
    d["tpv"] = d["typical"] * d["volume"]
    d.loc[~d["in_vwap"], ["tpv", "volume"]] = 0

    d["cum_tpv"] = d.groupby("date_ct")["tpv"].cumsum()
    d["cum_vol"] = d.groupby("date_ct")["volume"].cumsum()
    d["vwap"] = d["cum_tpv"] / d["cum_vol"].replace(0, np.nan)

    # Trade window: 6:00â€“7:00am CT
    pm = d[(d["time_ct"] >= time(6, 0)) & (d["time_ct"] < time(7, 0))].copy()
    return pm.dropna(subset=["vwap"])

spy_pm = build_pm_window_with_vwap(df_spy)
qqq_pm = build_pm_window_with_vwap(df_qqq)

print("SPY pm rows:", len(spy_pm), "| days:", spy_pm["date_ct"].nunique())
print("QQQ pm rows:", len(qqq_pm), "| days:", qqq_pm["date_ct"].nunique())
spy_pm[["ts_ct","close","vwap","volume"]].head()


In [None]:
def strength_to_multiplier(score: float) -> float:
    if score < 0.5:
        return 0.75
    if score < 1.2:
        return 1.0
    return 1.25


In [None]:
def backtest_vwap_reclaim_trades_only(
    pm: pd.DataFrame,
    symbol: str,
    min_below_minutes=3,
    vwap_buffer=0.0001,
    take_profit=0.0015,
    stop_loss=0.0010,
    cost_bps_per_side=1.0
) -> pd.DataFrame:
    d = pm.sort_values("ts_ct").copy()
    d = d.dropna(subset=["vwap"]).copy()

    d["below_vwap"] = d["close"] < d["vwap"]
    d["above_vwap"] = d["close"] > d["vwap"] * (1 + vwap_buffer)

    d["below_streak"] = d.groupby("date_ct")["below_vwap"].transform(
        lambda s: s.astype(int).groupby((s != s.shift()).cumsum()).cumsum()
    )
    d["prev_below"]  = d.groupby("date_ct")["below_vwap"].shift(1).fillna(False)
    d["prev_streak"] = d.groupby("date_ct")["below_streak"].shift(1).fillna(0)

    prev_close = d.groupby("date_ct")["close"].shift(1)
    prev_vwap  = d.groupby("date_ct")["vwap"].shift(1)
    d["vwap_dist_prev"] = ((prev_vwap - prev_close) / prev_vwap).fillna(0)
    d["reclaim_impulse"] = ((d["close"] - d["vwap"]) / d["vwap"]).fillna(0)

    vol_mean = d.groupby("date_ct")["volume"].transform("mean")
    vol_std  = d.groupby("date_ct")["volume"].transform("std").replace(0, np.nan)
    d["volume_z"] = ((d["volume"] - vol_mean) / vol_std).fillna(0)

    d["reclaim"] = d["prev_below"] & d["above_vwap"] & (d["prev_streak"] >= min_below_minutes)
    rt_cost = (2 * cost_bps_per_side) / 10000.0

    trades = []
    traded_dates = set()
    in_pos = False
    entry_price = None
    entry_time = None
    mult = 1.0
    score = 0.0

    for _, row in d.iterrows():
        date_ct = row["date_ct"]
        price = float(row["close"])

        # 1 trade/day per ticker
        if (date_ct in traded_dates) and (not in_pos):
            continue

        if (not in_pos) and bool(row["reclaim"]):
            score = (
                0.5 * min(float(row["prev_streak"]) / 5.0, 2.0) +
                2.0 * min(float(row["vwap_dist_prev"]) / 0.001, 2.0) +
                2.0 * min(float(row["reclaim_impulse"]) / 0.001, 2.0) +
                0.3 * min(max(float(row["volume_z"]), 0.0), 2.0)
            )
            mult = strength_to_multiplier(score)
            entry_price = price
            entry_time = row["ts_ct"]
            in_pos = True
            continue

        if in_pos:
            if price >= entry_price * (1 + take_profit):
                reason = "take_profit"
            elif price <= entry_price * (1 - stop_loss):
                reason = "stop_loss"
            else:
                continue

            ret_net = ((price - entry_price) / entry_price) - rt_cost

            trades.append({
                "symbol": symbol,
                "date_ct": date_ct,
                "entry_time": entry_time,
                "exit_time": row["ts_ct"],
                "entry": entry_price,
                "exit": price,
                "reason": reason,
                "ret_net": ret_net,
                "multiplier": float(mult),
                "score": float(score),
            })

            in_pos = False
            traded_dates.add(date_ct)

    return pd.DataFrame(trades)


In [None]:
spy_trades = backtest_vwap_reclaim_trades_only(spy_pm, "SPY", cost_bps_per_side=1.0)
qqq_trades = backtest_vwap_reclaim_trades_only(qqq_pm, "QQQ", cost_bps_per_side=1.0)

all_trades = (
    pd.concat([spy_trades, qqq_trades], ignore_index=True)
      .sort_values("exit_time")
      .reset_index(drop=True)
)

print("SPY trades:", len(spy_trades), "| QQQ trades:", len(qqq_trades), "| Combined:", len(all_trades))
all_trades.head()


In [None]:
def simulate_portfolio_with_guardrails_pct(
    trades: pd.DataFrame,
    starting_equity: float = 10000.0,
    base_risk_per_trade: float = 100.0,
    stop_loss_frac: float = 0.0010,
    max_leverage: float = 1.0,
    daily_max_loss_pct: float = 0.02,   # 2% of start-of-day equity
) -> pd.DataFrame:
    t = trades.sort_values("exit_time").reset_index(drop=True).copy()

    equity = starting_equity
    current_day = None
    day_start_equity = None
    day_pnl = 0.0
    day_locked = False

    out = []
    for _, row in t.iterrows():
        date_ct = row["date_ct"]

        if current_day != date_ct:
            current_day = date_ct
            day_start_equity = equity
            day_pnl = 0.0
            day_locked = False

        daily_max_loss_dollars = day_start_equity * daily_max_loss_pct

        if day_locked:
            out.append({**row, "risk_$": 0.0, "shares": 0, "notional_$": 0.0,
                        "pnl_$": 0.0, "equity_$": equity, "skipped": True,
                        "skip_reason": "daily_max_loss_locked"})
            continue

        entry = float(row["entry"])
        mult = float(row["multiplier"])
        risk_dollars = base_risk_per_trade * mult

        risk_per_share = entry * stop_loss_frac
        shares = int(np.floor(risk_dollars / risk_per_share)) if risk_per_share > 0 else 0

        notional = shares * entry
        max_notional = equity * max_leverage
        if notional > max_notional and entry > 0:
            shares = int(np.floor(max_notional / entry))
            notional = shares * entry

        pnl = notional * float(row["ret_net"])
        equity += pnl
        day_pnl += pnl

        if day_pnl <= -daily_max_loss_dollars:
            day_locked = True

        out.append({**row, "risk_$": risk_dollars, "shares": shares, "notional_$": notional,
                    "pnl_$": pnl, "equity_$": equity, "skipped": False, "skip_reason": ""})

    return pd.DataFrame(out)


In [None]:
def summarize_portfolio(portfolio_trades: pd.DataFrame, starting_equity: float = 10000.0):
    t = portfolio_trades.copy()
    if t.empty:
        print("No trades.")
        return

    total_pnl = t["pnl_$"].sum()
    end_eq = t["equity_$"].iloc[-1]
    win_rate = (t["pnl_$"] > 0).mean()

    gains = t.loc[t["pnl_$"] > 0, "pnl_$"].sum()
    losses = -t.loc[t["pnl_$"] <= 0, "pnl_$"].sum()
    pf = gains / losses if losses > 0 else float("inf")

    eq = t["equity_$"]
    peak = eq.cummax()
    dd = (eq - peak) / peak
    max_dd = dd.min() * 100

    print("Trades:", len(t))
    print("Win rate:", round(win_rate * 100, 2), "%")
    print("Total PnL: $", round(total_pnl, 2))
    print("End equity: $", round(end_eq, 2))
    print("Profit factor ($):", round(pf, 3))
    print("Worst trade: $", round(t["pnl_$"].min(), 2))
    print("Best trade: $", round(t["pnl_$"].max(), 2))
    print("Max drawdown:", round(max_dd, 2), "%")

portfolio = simulate_portfolio_with_guardrails_pct(
    all_trades,
    starting_equity=10000.0,
    base_risk_per_trade=100.0,
    stop_loss_frac=0.0010,
    max_leverage=1.0,
    daily_max_loss_pct=0.02
)

summarize_portfolio(portfolio, starting_equity=10000.0)

print("\nPnL by symbol:")
print(portfolio.groupby("symbol")["pnl_$"].agg(["count","sum","mean","min","max"]).round(2))

print("\nSkip reasons:")
print(portfolio["skip_reason"].value_counts(dropna=False))


In [None]:
portfolio.to_csv("spy_qqq_portfolio_results.csv", index=False)
print("Saved spy_qqq_portfolio_results.csv")
