# Get Index ETF Data

In [1]:
def get_data(tickers: str = None):
    import yfinance as yf
    import pandas as pd

    df = yf.download(tickers,
                     period='max',
                     interval='1d',
                     # start="2020-12-01",
                     # end="2023-12-31",
                     group_by="ticker",
                     back_adjust=True,
                     progress=False)

    df = df.stack(level=0, future_stack=True).reset_index()
    df.columns = [col.lower() for col in df.columns]
    df = df.dropna()
    df = df[["date", "ticker", "close"]]
    df["date"] = pd.to_datetime(df["date"].dt.date)
    df = df.sort_values(["ticker", "date"], ascending=[True, True])
    df = df.reset_index(drop=True)
    return df

spy_df = get_data(tickers = ['SPY'])
spy_df

Unnamed: 0,date,ticker,close
0,1993-01-29,SPY,43.937500
1,1993-02-01,SPY,44.250000
2,1993-02-02,SPY,44.343750
3,1993-02-03,SPY,44.812500
4,1993-02-04,SPY,45.000000
...,...,...,...
8010,2024-11-20,SPY,590.500000
8011,2024-11-21,SPY,593.669983
8012,2024-11-22,SPY,595.510010
8013,2024-11-25,SPY,597.530029


### Risk Free Rate

In [2]:
# 10 year
0.04304

0.04304

In [3]:
daily_risk_free_rate = 0.0000167222

((1 + daily_risk_free_rate) ** (252*10)) - 1

0.043040068291641864

# Quants Combined

In [6]:
def calc_backtest(df):

    #roi
    df["cumulative_returns"] = ( 1 + df['close'].pct_change() ).cumprod() - 1
    
    # mdd
    df['rolling_max_drawdown'] = ((df['close'] - (df['close'].cummax())) / df['close'].cummax()).cummin()
    
    # sharpe
    df['cumulative_sharpe'] = ( df['close'].pct_change() - daily_risk_free_rate  ).expanding().mean() / df['close'].pct_change().expanding().std()

    return df

In [5]:
calc_backtest(spy_df)

Unnamed: 0,date,ticker,close,cumulative_returns,rolling_max_drawdown,cumulative_sharpe
0,1993-01-29,SPY,43.937500,,0.000000,
1,1993-02-01,SPY,44.250000,0.007112,0.000000,
2,1993-02-02,SPY,44.343750,0.009246,0.000000,1.302366
3,1993-02-03,SPY,44.812500,0.019915,0.000000,1.549419
4,1993-02-04,SPY,45.000000,0.024182,0.000000,1.627643
...,...,...,...,...,...,...
8010,2024-11-20,SPY,590.500000,12.439545,-0.564737,0.032057
8011,2024-11-21,SPY,593.669983,12.511692,-0.564737,0.032111
8012,2024-11-22,SPY,595.510010,12.553571,-0.564737,0.032142
8013,2024-11-25,SPY,597.530029,12.599545,-0.564737,0.032176


# Loop over windows sizes 1 year to 32.5 year

In [8]:
investment_window_sizes = list(range(252, 252*32, 252 // 2))
print(investment_window_sizes)

[252, 378, 504, 630, 756, 882, 1008, 1134, 1260, 1386, 1512, 1638, 1764, 1890, 2016, 2142, 2268, 2394, 2520, 2646, 2772, 2898, 3024, 3150, 3276, 3402, 3528, 3654, 3780, 3906, 4032, 4158, 4284, 4410, 4536, 4662, 4788, 4914, 5040, 5166, 5292, 5418, 5544, 5670, 5796, 5922, 6048, 6174, 6300, 6426, 6552, 6678, 6804, 6930, 7056, 7182, 7308, 7434, 7560, 7686, 7812, 7938]


### Start at beginning Jan 29, 1993 and shift window, 1 day at a time

In [9]:
investment_windows = []

# loop over various investment periods 
for investment_window_size in investment_window_sizes:
    
    # loop over day ranges in entire history
    for i in range(0, len(spy_df)-investment_window_size + 1):

        investment_windows.append(dict(start_idx = i, end_idx = i + investment_window_size, window_size = investment_window_size))

In [10]:
len(investment_windows)

243102

In [11]:
investment_windows[0]

{'start_idx': 0, 'end_idx': 252, 'window_size': 252}

In [12]:
investment_windows[1]

{'start_idx': 1, 'end_idx': 253, 'window_size': 252}

In [13]:
investment_windows[-1]

{'start_idx': 77, 'end_idx': 8015, 'window_size': 7938}

# Run Backtests (naive)
### total time: 10:01
### per backtest: 403.94 it/s

In [14]:
len(spy_df)

8015

In [15]:
from tqdm import tqdm 

naive_bt_results = []

# loop over various investment periods 
for investment_window in tqdm(investment_windows):

    start_idx = investment_window['start_idx']
    end_idx = investment_window['end_idx']
    window_df = spy_df.iloc[start_idx: end_idx].copy()
    
    window_df = calc_backtest(window_df)
    
    naive_bt_results.append({
        
        "win_sz": investment_window_size,
        
        "start_date": window_df['date'].iloc[0],
        
        "end_date": window_df['date'].iloc[-1],
        
        "roi": window_df['rolling_max_drawdown'].iloc[-1],        
        "mdd": window_df['rolling_max_drawdown'].min(),
        "sharpe": window_df['cumulative_sharpe'].iloc[-1]
        
    })

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 243102/243102 [20:37<00:00, 196.38it/s]


# Faster Backtests
### total time: 00:49
### per backtest: 162.25 it/s

In [None]:
from tqdm import tqdm

fast_bt_results = []
calc_count = 0

spy_df['pct_change'] = spy_df['close'].pct_change()

for start_idx in tqdm(range(0,len(spy_df)-1)):
    
    # roi
    gains  = ( 1 +  spy_df.loc[start_idx:, 'pct_change'] ).cumprod() - 1
    
    # mdd
    mdd    = ((spy_df.loc[start_idx:, 'close'] - (spy_df.loc[start_idx:, 'close'].cummax())) / spy_df.loc[start_idx:, 'close'].cummax()).cummin()
    
    # sharpe
    sharpe = (spy_df.loc[start_idx:, 'pct_change'] - daily_risk_free_rate).expanding().mean() / spy_df.loc[start_idx:, 'pct_change'].expanding().std()
    
    
    for investment_winow_size in investment_winow_sizes:
        
        if start_idx+investment_winow_size-1 >= len(spy_df):
            continue                
            
        last_gains = gains.loc[start_idx+investment_winow_size-1].item()

        fast_bt_results.append({
            "win_sz" : investment_winow_size,
            "start_date" : spy_df.loc[start_idx]['date'],
            "end_date" : spy_df.loc[start_idx+investment_winow_size-1]['date'],
            "roi" : gains.loc[start_idx+investment_winow_size-1].item(),
            "mdd": mdd.loc[start_idx+investment_winow_size-1].item(),
            "sharpe": sharpe.loc[start_idx+investment_winow_size-1].item()
        })

# Compare  

In [None]:
pd.DataFrame(fast_bt_results)

In [None]:
pd.DataFrame(naive_bt_results)

In [None]:
import yfinance as yf
import numpy as np
import pandas as pd

# 1. Download SPY data
data = yf.download("SPY", start="1993-01-29", end="2024-01-01")
data = data[['Adj Close']].rename(columns={'Adj Close': 'Close'})
data['Returns'] = data['Close'].pct_change()
data

In [None]:
# 3. Calculate cumulative returns and rolling statistics
df = data.copy()
df['LogClose'] = np.log(df['Close'])
cumulative_returns = pd.DataFrame(index=df.index)

In [None]:
for period in investment_periods:
    
    # Rolling returns (log returns for stability)
    df[f'Return_{period}d'] = (df['LogClose'].shift(-period) - df['LogClose']).fillna(0)
    
    # # Rolling drawdowns
    df[f'MaxClose_{period}d'] = df['Close'].rolling(window=period).max()
    # df[f'Drawdown_{period}d'] = df['Close'] / df[f'MaxClose_{period}d'] - 1

    # # Rolling Sharpe ratio
    # rolling_return = df['Returns'].rolling(window=period).mean()
    # rolling_std = df['Returns'].rolling(window=period).std()
    # df[f'Sharpe_{period}d'] = (rolling_return / rolling_std) * np.sqrt(252)

In [None]:
df

In [None]:
# 4. Organize results for analysis
results = []
for period in investment_periods:
    max_dd = df[f'Drawdown_{period}d'].min()
    avg_return = df[f'Return_{period}d'].mean()
    avg_sharpe = df[f'Sharpe_{period}d'].mean()
    
    results.append({
        'Investment Period (Years)': period / 252,
        'Max Drawdown': max_dd,
        'Average Return': avg_return,
        'Average Sharpe Ratio': avg_sharpe
    })

results_df = pd.DataFrame(results)

# Display final result
print(results_df)

### start at beginning of DF and calculate for each day - 8,014 calculations instead of 243,040

In [None]:
spy_df

### Investment Window (10 Days)

In [None]:
INVESTMENT_PERIOD = 10 # 252
df = df.head(INVESTMENT_PERIOD).copy().sort_values("date")
df

# Calculate Returns and Risks Metrics

### Cumulative Returns  (expanded code)

In [None]:
df.loc[:, 'daily_returns'] = df['close'].pct_change()
df

In [None]:
df['daily_returns_plus_one'] = df['daily_returns'] + 1
df

In [None]:
df["returns_pls_1_cumprod"] = df['daily_returns_plus_one'].cumprod()
df

In [None]:
df["cum_returns"] = df['returns_pls_1_cumprod'] - 1
df

### Cumulative Returns  (1 liner)

In [None]:
df = get_data(tickers = ['SPY'])
df = df.head(10)

In [None]:
df["cum_returns"] = ( 1 + df['close'].pct_change() ).cumprod() - 1
df

### Maximum Drawdown (expanded)

In [None]:
df = get_data(tickers = ['SPY'])
df = df.head(10)

### Cumulative Max

In [None]:
df['rolling_max'] = df['close'].cummax()
df

### Close - Max

In [None]:
df['close_max_diff'] = df['close'] - df['rolling_max']
df

### Drawdown

In [None]:
df['drawdown'] = df['close_max_diff'] / df['rolling_max']
df

### Max Drawdown

In [None]:
df['rolling_max_drawdown'] = df['drawdown'].cummin()
df

### Maximum Drawdown (1-liner)

In [None]:
# rolling max
df['rolling_max'] = df['close'].cummax()
# close - max
df['close_max_diff'] = df['close'] - df['rolling_max']
# drawdown
df['drawdown'] = df['close_max_diff'] / df['rolling_max']
# max drawdown
df['rolling_max_drawdown'] = df['drawdown'].cummin()
df

In [None]:
df = get_data(tickers = ['SPY'])
df = df.head(10)

In [None]:
# rolling max
# df['rolling_max'] = (df['close'].cummax())
# close - max
# df['close_max_diff'] = (df['close'] - df['rolling_max'])
# drawdown
# df['drawdown'] = (df['close_max_diff'] / df['rolling_max'])
# max drawdown
df['rolling_max_drawdown'] = ((df['close'] - (df['close'].cummax())) / df['close'].cummax()).cummin()
df

### Sharpe Ratio

In [None]:
df = get_data(tickers = ['SPY'])
df = df.head(10)

### Rolling Average Returns

In [None]:
df['daily_returns'] = df['close'].pct_change()
df['returns_rolling_mean'] = df['daily_returns'].expanding().mean()
df


### Rolling Standard Deviation

In [None]:
df['returns_rolling_std'] = df['daily_returns'].expanding().std()
df


### Sharpe Ratio

In [None]:
df["sharpe"] = df['returns_rolling_mean'] / df['returns_rolling_std']
df

### Sharpe Ratio (1-liner)

In [None]:
df = get_data(tickers = ['SPY'])
df = df.head(10)

In [None]:
df['sharpe'] = ( 
                    df['close'].pct_change() - daily_risk_free_rate 
               ).expanding().mean() / df['close'].pct_change().expanding().std()
df

# Quants Combined

In [None]:
def calc_backtest(df):

    #roi
    df["cumulative_returns"] = ( 1 + df['close'].pct_change() ).cumprod() - 1
    
    # mdd
    df['rolling_max_drawdown'] = ((df['close'] - (df['close'].cummax())) / df['close'].cummax()).cummin()
    
    # sharpe
    df['cumulative_sharpe'] = ( df['close'].pct_change() - daily_risk_free_rate  ).expanding().mean() / df['close'].pct_change().expanding().std()

    return df

In [None]:
df = get_data(tickers = ['SPY'])
df = df.head(10)
df = calc_backtest(df)
df

# Sliding Window

In [None]:
TRADING_YR = 252
window_addend = TRADING_YR // 2
TRADING_YR, window_addend

In [None]:
df = get_data(tickers = ['SPY'])
df.shape

In [None]:
investment_winow_sizes = list(range(TRADING_YR, len(df), window_addend))
investment_winow_sizes[0:3], investment_winow_sizes[-4:-1]

In [None]:
backtest_results = []

# loop over various investment periods 
for investment_winow_size in tqdm(investment_winow_sizes):
    
    # loop over day ranges in entire history
    for i in range(0, len(df)-investment_winow_size + 1):
        
        start_idx = i
        end_idx = i + investment_winow_size                        
                    
        window_df = df.iloc[start_idx: end_idx].copy()
        window_df = calc_backtest(window_df)
        
        backtest_results.append({
            "holding_days": investment_winow_size,
            "start_date": window_df['date'].iloc[0],
            "end_date": window_df['date'].iloc[-1],
            "roi": window_df['rolling_max_drawdown'].iloc[-1],
            "sharpe": window_df['cumulative_sharpe'].iloc[-1],
            "mdd": window_df['rolling_max_drawdown'].min()
        })

In [None]:
import pandas as pd
pd.DataFrame(backtest_results)