In [6]:
#Downloading and formatting the dataset

from statsmodels.regression.rolling import RollingOLS
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime as dt
import yfinance as yf
import pandas_ta
import warnings
warnings.filterwarnings('ignore')

sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
sp500['Symbol'] = sp500['Symbol'].str.replace('.', '-')
symbols_list = sp500['Symbol'].unique().tolist()

end_date = dt.datetime.now()
start_date = pd.to_datetime(end_date) - pd.DateOffset(365*8)

df = yf.download(tickers = symbols_list, start = start_date, end = end_date)
df = df.stack()
df.index.names = ['date', 'ticker']
df.columns = df.columns.str.lower()

[*********************100%***********************]  503 of 503 completed


In [7]:
#Computing the technical indicators

df['garman_klass_vol'] = ((np.log(df['high']) - np.log(df['low'])) ** 2) / 2 - (2 * np.log(2) - 1) * ((np.log(df['adj close']) - np.log(df['open'])) ** 2)
df['rsi'] = df.groupby(level = 1)['adj close'].transform(lambda x: pandas_ta.rsi(close = x, length = 20))

df['bb_low'] = df.groupby(level = 1)['adj close'].transform(lambda x: pandas_ta.bbands(close = np.log1p(x), length = 20).iloc[:,0])
df['bb_mid'] = df.groupby(level = 1)['adj close'].transform(lambda x: pandas_ta.bbands(close = np.log1p(x), length = 20).iloc[:,1]) 
df['bb_high'] = df.groupby(level = 1)['adj close'].transform(lambda x: pandas_ta.bbands(close = np.log1p(x), length = 20).iloc[:,2])

def compute_atr(stock_data):
    atr = pandas_ta.atr(high = stock_data['high'],
                        low = stock_data['low'],
                        close = stock_data['close'],
                        length = 14)
    return atr.sub(atr.mean()).div(atr.std())

df['atr'] = df.groupby(level = 1, group_keys = False).apply(compute_atr)

def compute_macd(close):
    macd = pandas_ta.macd(close=close, length=20)
    if macd is not None:
        return macd.iloc[:, 0].sub(macd.iloc[:, 0].mean()).div(macd.iloc[:, 0].std())
    else:
        return pd.Series([np.nan] * len(close), index=close.index)

df['macd'] = df.groupby(level = 1, group_keys = False)['adj close'].apply(compute_macd)
df

Unnamed: 0_level_0,Price,adj close,close,high,low,open,volume,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2016-11-07 00:00:00+00:00,A,42.965031,45.709999,45.759998,44.950001,44.990002,1724000.0,-0.000660,,,,,,
2016-11-07 00:00:00+00:00,AAPL,25.591118,27.602501,27.627501,27.365000,27.520000,130240000.0,-0.001994,,,,,,
2016-11-07 00:00:00+00:00,ABBV,41.782856,58.910000,61.160000,56.639999,56.810001,17763100.0,-0.033514,,,,,,
2016-11-07 00:00:00+00:00,ABT,34.488918,39.860001,39.959999,39.540001,39.639999,6458800.0,-0.007429,,,,,,
2016-11-07 00:00:00+00:00,ACGL,26.170000,26.170000,26.196667,25.713333,25.776667,972900.0,0.000085,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-01 00:00:00+00:00,XYL,119.459999,119.459999,122.440002,119.360001,121.440002,1925000.0,0.000220,27.233404,4.820718,4.891695,4.962671,0.843935,-1.636832
2024-11-01 00:00:00+00:00,YUM,132.339996,132.339996,133.339996,131.820007,132.199997,1888600.0,0.000065,45.493509,4.888754,4.903457,4.918160,0.228566,-0.602281
2024-11-01 00:00:00+00:00,ZBH,107.269997,107.269997,109.500000,107.139999,107.400002,1905700.0,0.000237,52.567678,4.624335,4.659489,4.694642,-0.532082,-0.002549
2024-11-01 00:00:00+00:00,ZBRA,384.640015,384.640015,386.119995,381.040009,381.970001,523300.0,0.000069,62.624332,5.886588,5.924520,5.962452,0.037315,0.550093
