In [6]:
# ================================
# Global Parameters & Imports
# ================================
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import pytz
import math
import time
import yfinance as yf
from datetime import datetime, timedelta
from IPython.display import display, Markdown

# ===== Configuration =====
TICKER = "TQQQ"
START_DATE = "2016-01-01"
END_DATE = "2023-02-17"
OUTPUT_FILE = f"{TICKER}_unadjusted.csv"

utc_tz = pytz.timezone('UTC')
nyc_tz = pytz.timezone('America/New_York')

# ================================
# Data Download Functions (Yahoo Finance)
# ================================
# ================================
# Modified Data Download Function
# ================================
def get_yfinance_data(ticker=TICKER, start_date=START_DATE, end_date=END_DATE):
    """Fetch intraday data from Yahoo Finance with automatic interval adjustment."""
    print(f"Downloading {ticker} data from {start_date} to {end_date}...")

    # Determine if we need both 1m and 5m data
    end_dt = datetime.strptime(end_date, '%Y-%m-%d')
    cutoff_date = datetime.now() - timedelta(days=30)

    if end_dt <= cutoff_date:
        # Entire range is older than 30 days - use 5m data
        print("Using 5m data for full historical range")
        return get_yfinance_chunked(ticker, start_date, end_date, "5m")
    elif datetime.strptime(start_date, '%Y-%m-%d') >= cutoff_date:
        # Entire range is within last 30 days - use 1m data
        print("Using 1m data for recent range")
        return get_yfinance_chunked(ticker, start_date, end_date, "1m")
    else:
        # Need to combine 5m (historical) and 1m (recent) data
        print("Combining 5m (historical) and 1m (recent) data")
        historical = get_yfinance_chunked(ticker, start_date, cutoff_date.strftime('%Y-%m-%d'), "5m")
        recent = get_yfinance_chunked(ticker, cutoff_date.strftime('%Y-%m-%d'), end_date, "1m")
        return pd.concat([historical, recent]).sort_index()

def get_yfinance_chunked(ticker, start_date, end_date, interval):
    """Fetch data in chunks to handle Yahoo limitations."""
    data_chunks = []
    current_start = datetime.strptime(start_date, '%Y-%m-%d')
    end_dt = datetime.strptime(end_date, '%Y-%m-%d')
    chunk_size = timedelta(days=7) if interval == "1m" else timedelta(days=59)

    while current_start < end_dt:
        current_end = min(current_start + chunk_size, end_dt)
        print(f"Fetching {interval} data for {current_start.date()} to {current_end.date()}...")

        try:
            chunk = yf.download(
                tickers=ticker,
                start=current_start,
                end=current_end,
                interval=interval,
                progress=False,
                auto_adjust=False
            )
            if not chunk.empty:
                data_chunks.append(chunk)
        except Exception as e:
            print(f"Error fetching {current_start} to {current_end}: {str(e)}")

        current_start = current_end + timedelta(days=1)
        time.sleep(1)  # Rate limiting

    return pd.concat(data_chunks).sort_index() if data_chunks else pd.DataFrame()

# ================================
# Update the download_and_merge_data function
# ================================
def download_and_merge_data():
    """Download and merge intraday and daily data using Yahoo Finance."""
    # Get daily data for ATR
    daily_data = get_daily_adjusted_data()
    if daily_data.empty:
        print("Unable to get daily data. Exiting.")
        return None

    # Get intraday data (automatically handles 1m/5m selection)
    print(f"Downloading intraday data for {TICKER} from {START_DATE}...")
    intraday_data = get_yfinance_data()

    if intraday_data.empty:
        print("No intraday data found.")
        return None

    # Process and merge
    final_df = process_data(intraday_data)
    if not final_df.empty:
        final_df = pd.merge(final_df, daily_data, on='day', how='left')
        final_df['caldt'] += pd.Timedelta(minutes=1)
        cols = ['caldt', 'open', 'high', 'low', 'close', 'volume', 'day', 'dOpen', 'ATR']
        final_df[cols].to_csv(OUTPUT_FILE, index=False)
        print(f"Data saved to {OUTPUT_FILE}")
        print(f"Total records: {len(final_df)}")
        return final_df
    else:
        print("No data after processing.")
        return None

# The rest of your code remains the same...

# ================================
# Performance Analysis & Backtesting Functions
# ================================
def price2return(price, n=1):
    """Convert a series of prices into returns."""
    price = np.array(price)
    T = len(price)
    y = np.full_like(price, np.nan, dtype=float)
    if T > n:
        y[n:] = price[n:] / price[:T-n] - 1
    return y

def summary_statistics(dailyReturns):
    """Calculate performance metrics and return a summary table."""
    riskFreeRate = 0
    tradingDays = 252
    dailyReturns = np.array(dailyReturns)
    dailyReturns = dailyReturns[~np.isnan(dailyReturns)]
    totalReturn = np.prod(1 + dailyReturns) - 1
    numYears = len(dailyReturns) / tradingDays
    CAGR = (1 + totalReturn)**(1/numYears) - 1
    volatility = np.std(dailyReturns, ddof=0) * np.sqrt(tradingDays)
    sharpeRatio = (np.mean(dailyReturns) - riskFreeRate/tradingDays) / np.std(dailyReturns, ddof=0) * np.sqrt(tradingDays)
    nav = np.cumprod(1 + dailyReturns)
    peak = np.maximum.accumulate(nav)
    drawdown = (nav - peak) / peak
    MDD = np.min(drawdown)
    metrics = ["Total Return (%)", "CAGR (%)", "Volatility (%)", "Sharpe Ratio", "Max Drawdown (%)"]
    values = [totalReturn*100, CAGR*100, volatility*100, sharpeRatio, MDD*100]
    formatted_values = [f"{v:.4f}" if i < 3 or i == 4 else f"{v:.6f}" for i,v in enumerate(values)]
    performance_table = pd.DataFrame({'Metric': metrics, 'Value': formatted_values})
    return performance_table

def monthly_performance_table(returns, dates):
    """Create a table of monthly returns."""
    returns_series = pd.Series(returns, index=pd.DatetimeIndex(dates))
    returns_series = returns_series[~np.isnan(returns_series)]
    df = pd.DataFrame({'return': returns_series,
                       'year': returns_series.index.year,
                       'month': returns_series.index.month})
    monthly_returns = df.groupby(['year', 'month'])['return'].apply(lambda x: np.prod(1 + x) - 1).reset_index()
    pivot_table = monthly_returns.pivot(index='year', columns='month', values='return')
    pivot_table['Year Total'] = pivot_table.apply(lambda row: np.prod(1 + row.dropna()) - 1
                                                   if not row.dropna().empty else np.nan, axis=1)
    formatted_table = pivot_table.apply(lambda col: col.map(lambda x: f"{x*100:.2f}%" if not pd.isna(x) else ""))
    month_names = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun',
                   7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
    formatted_table = formatted_table.rename(columns=month_names)
    return formatted_table

def backtest(days, p, orb_m, target_R, risk, max_Lev, AUM_0, commission):
    """Perform an optimized backtest for the ORB strategy."""
    start_time = time.time()
    str_df = pd.DataFrame()
    str_df['Date'] = days
    str_df['AUM'] = np.nan
    str_df.loc[0, 'AUM'] = AUM_0
    str_df['pnl_R'] = np.nan
    or_candles = orb_m
    day_groups = dict(tuple(p.groupby(p['day'].dt.date)))
    trade_id = 0
    trade_log = []

    for t in range(1, len(days)):
        current_day = days[t].date()
        if current_day not in day_groups:
            str_df.loc[t, 'pnl_R'] = 0
            str_df.loc[t, 'AUM'] = str_df.loc[t-1, 'AUM']
            continue

        day_data = day_groups[current_day]
        if len(day_data) <= or_candles:
            str_df.loc[t, 'pnl_R'] = 0
            str_df.loc[t, 'AUM'] = str_df.loc[t-1, 'AUM']
            continue

        OHLC = day_data[['open', 'high', 'low', 'close']].values
        split_adj = OHLC[0, 0] / day_data['dOpen'].iloc[0]
        atr_raw = day_data['ATR'].iloc[0] * split_adj
        side = np.sign(OHLC[or_candles-1, 3] - OHLC[0, 0])
        entry = OHLC[or_candles, 0] if len(OHLC) > or_candles else np.nan

        if side == 1:
            stop = abs(np.min(OHLC[:or_candles, 2]) / entry - 1)
        elif side == -1:
            stop = abs(np.max(OHLC[:or_candles, 1]) / entry - 1)
        else:
            stop = np.nan

        if side == 0 or math.isnan(stop) or math.isnan(entry):
            str_df.loc[t, 'pnl_R'] = 0
            str_df.loc[t, 'AUM'] = str_df.loc[t-1, 'AUM']
            continue

        if entry == 0 or stop == 0:
            shares = 0
        else:
            shares = math.floor(min(str_df.loc[t-1, 'AUM'] * risk / (entry * stop),
                                    max_Lev * str_df.loc[t-1, 'AUM'] / entry))

        if shares == 0:
            str_df.loc[t, 'pnl_R'] = 0
            str_df.loc[t, 'AUM'] = str_df.loc[t-1, 'AUM']
            continue

        OHLC_post_entry = OHLC[or_candles:, :]

        exit_reason = None
        exit_idx    = None
        exit_price  = None

        if side == 1:  # Long trade
            stop_price   = entry * (1 - stop)
            target_price = entry * (1 + target_R * stop) if np.isfinite(target_R) else float('inf')

            stop_hits   = OHLC_post_entry[:, 2] <= stop_price
            target_hits = OHLC_post_entry[:, 1]  > target_price

            if np.any(stop_hits) and np.any(target_hits):
                idx_stop, idx_target = np.argmax(stop_hits), np.argmax(target_hits)
                if idx_target < idx_stop:
                    exit_idx   = idx_target
                    exit_price = max(target_price, OHLC_post_entry[idx_target, 0])
                    exit_reason = 'Target'
                else:
                    exit_idx   = idx_stop
                    exit_price = min(stop_price, OHLC_post_entry[idx_stop, 0])
                    exit_reason = 'Stop'
            elif np.any(stop_hits):
                exit_idx   = np.argmax(stop_hits)
                exit_price = min(stop_price, OHLC_post_entry[exit_idx, 0])
                exit_reason = 'Stop'
            elif np.any(target_hits):
                exit_idx   = np.argmax(target_hits)
                exit_price = max(target_price, OHLC_post_entry[exit_idx, 0])
                exit_reason = 'Target'
            else:  # EOD exit
                exit_idx   = -1
                exit_price = OHLC_post_entry[-1, 3]
                exit_reason = 'Close'

            PnL_T = exit_price - entry

        elif side == -1:  # Short trade
            stop_price   = entry * (1 + stop)
            target_price = entry * (1 - target_R * stop) if np.isfinite(target_R) else 0

            stop_hits   = OHLC_post_entry[:, 1] >= stop_price
            target_hits = OHLC_post_entry[:, 2]  < target_price

            if np.any(stop_hits) and np.any(target_hits):
                idx_stop, idx_target = np.argmax(stop_hits), np.argmax(target_hits)
                if idx_target < idx_stop:
                    exit_idx   = idx_target
                    exit_price = min(target_price, OHLC_post_entry[idx_target, 0])
                    exit_reason = 'Target'
                else:
                    exit_idx   = idx_stop
                    exit_price = max(stop_price, OHLC_post_entry[idx_stop, 0])
                    exit_reason = 'Stop'
            elif np.any(stop_hits):
                exit_idx   = np.argmax(stop_hits)
                exit_price = max(stop_price, OHLC_post_entry[exit_idx, 0])
                exit_reason = 'Stop'
            elif np.any(target_hits):
                exit_idx   = np.argmax(target_hits)
                exit_price = min(target_price, OHLC_post_entry[exit_idx, 0])
                exit_reason = 'Target'
            else:  # EOD exit
                exit_idx   = -1
                exit_price = OHLC_post_entry[-1, 3]
                exit_reason = 'Close'

            PnL_T = entry - exit_price

        str_df.loc[t, 'AUM'] = str_df.loc[t-1, 'AUM'] + shares * PnL_T - shares * commission * 2
        str_df.loc[t, 'pnl_R'] = (str_df.loc[t, 'AUM'] - str_df.loc[t-1, 'AUM']) / (risk * str_df.loc[t-1, 'AUM'])

        entry_ts = day_data['caldt'].iloc[or_candles]
        exit_ts  = day_data['caldt'].iloc[or_candles + exit_idx]

        trade_id += 1
        trade_log.append({
            'TradeID'     : trade_id,
            'Date'        : current_day,
            'Side'        : 'Long' if side == 1 else 'Short',
            'EntryTime'   : entry_ts,
            'EntryPx'     : entry,
            'StopPx'      : stop_price,
            'TargetPx'    : target_price if np.isfinite(target_R) else np.nan,
            'ExitTime'    : exit_ts,
            'ExitPx'      : exit_price,
            'ExitReason'  : exit_reason,
            'Shares'      : shares,
            'PnL_$'       : shares * PnL_T,
            'PnL_R'       : (shares * PnL_T) / (risk * str_df.loc[t-1, 'AUM']),
            'AUM_After'   : str_df.loc[t, 'AUM']
        })

    end_time = time.time()
    print(f"******** Optimized Backtest Completed in {round(end_time - start_time, 2)} seconds! ********")
    print(f"Starting AUM: ${AUM_0:,.2f}")
    print(f"Final AUM: ${str_df['AUM'].iloc[-1]:,.2f}")
    print(f"Total Return: {(str_df['AUM'].iloc[-1]/AUM_0 - 1)*100:.4f}%")

    trades_df = pd.DataFrame(trade_log)

    return str_df, trades_df

def plot_equity_curve(str_df, AUM_0, orb_m, target_R, ticker):
    """Plot the equity curve with weekly resampling and highlight out-of-sample period."""
    fig, ax = plt.subplots(figsize=(12, 7))
    df_plot = str_df.copy()
    if 'Date' in df_plot.columns:
        df_plot = df_plot.set_index('Date')
    try:
        weekly_data = df_plot['AUM'].resample('W').last().dropna()
    except Exception as e:
        print("Resampling failed, using original data.", e)
        weekly_data = df_plot['AUM'].dropna()

    p1, = ax.plot(weekly_data.index, weekly_data.values, 'r-', linewidth=2, label='Equity')
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %y'))
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
    plt.xticks(rotation=90)
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'${x:,.0f}'))
    ax.grid(True, linestyle=':')

    min_val = weekly_data.min() if not weekly_data.empty else AUM_0
    max_val = weekly_data.max() if not weekly_data.empty else AUM_0
    ax.set_ylim([0.9 * min_val, 1.25 * max_val])

    target_str = f"Target {target_R}R" if np.isfinite(target_R) else "No Target"
    ax.set_title(f"{orb_m}m-ORB - Stop @ OR High/Low - {target_str}\nFull Period - Ticker = {ticker}", fontsize=12)

    # Highlight out-of-sample period starting from a specific date
    start_date = datetime(2023, 2, 17)
    if not weekly_data.empty and start_date >= weekly_data.index[0] and start_date <= weekly_data.index[-1]:
        p2 = ax.axvspan(start_date, weekly_data.index[-1], alpha=0.1, color='green', label='Out-of-Sample')
        ax.legend(handles=[p1, p2], loc='upper left')
    else:
        ax.legend(loc='upper left')

    ax.set_yscale('log')
    ax.yaxis.set_major_locator(mticker.LogLocator(base=10.0, subs=None))
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'${x:,.0f}'))
    return fig, ax

# ================================
# Main Execution
# ================================
if __name__ == "__main__":
    # Download & merge data
    data = download_and_merge_data()
    if data is None:
        raise Exception("Data download failed.")

    # Load the exported intraday data
    p = pd.read_csv(OUTPUT_FILE, parse_dates=['caldt', 'day'])
    days = pd.to_datetime(p['day'].unique())
    days = pd.DatetimeIndex(sorted(days))

    # Backtest parameters
    orb_m = 5             # Opening Range (minutes)
    target_R = float('inf')  # Profit target (use inf for no target)
    commission = 0.0005   # Commission per share
    risk = 0.01           # Equity risk per trade (1% of AUM)
    max_Lev = 4           # Maximum leverage
    AUM_0 = 25000         # Starting capital

    # Run the backtest
    str_df, trade_df = backtest(days, p, orb_m, target_R, risk, max_Lev, AUM_0, commission)

    # Save trade details
    trade_df.to_csv('trade_details.csv', index=False)

    # Performance analysis
    returns = price2return(str_df['AUM'].values)

    display(Markdown("### Performance Summary"))
    display(summary_statistics(returns))

    display(Markdown("### Monthly Performance"))
    display(monthly_performance_table(returns, str_df['Date']))

    # Plot equity curve
    fig, ax = plot_equity_curve(str_df, AUM_0, orb_m, target_R, TICKER)
    plt.tight_layout()
    plt.show()

Fetching daily adjusted data for ATR calculation...


TypeError: get_yfinance_data() got an unexpected keyword argument 'interval'