In [46]:
import yfinance as yf
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Get the S&P 500 constituents
sp500_constituents_url = "https://en.wikipedia.org/wiki/List_of_S&P_500_companies"
sp500_constituents_df = pd.read_html(sp500_constituents_url)[0]

#symbols = sp500_constituents_df["Symbol"].tolist()

# Get the market cap for each constituent
market_caps = []
for symbol in sp500_constituents_df["Symbol"]:
    # Get the market cap from Yahoo Finance
    market_cap_url = f"https://finance.yahoo.com/quote/{symbol}?p={symbol}"
    market_cap_response = requests.get(market_cap_url)
    market_cap_data = market_cap_response.content.decode()

    # Find the market cap in the HTML response
    market_cap_start_index = market_cap_data.find("Market Cap:")
    market_cap_end_index = market_cap_data.find(
        "</span>", market_cap_start_index)
    market_cap = market_cap_data[market_cap_start_index +
                                 12:market_cap_end_index].strip()

    market_caps.append(market_cap)

# Add the market cap to the S&P 500 constituents DataFrame
sp500_constituents_df["Market Cap"] = market_caps

# Sort the DataFrame by market cap in descending order
sp500_constituents_df = sp500_constituents_df.sort_values(
    by=["Market Cap"], ascending=False)

# Get the top 100 stock tickers by market cap
top_100_sp500_tickers = sp500_constituents_df["Symbol"][:100]

# Print the top 100 stock tickers by market cap
print(top_100_sp500_tickers)

# Download historical price data for S&P 500 stocks
stock_data = yf.download(top_100_sp500_tickers.tolist(),
                         start="2011-01-01", end="2015-01-01")['Adj Close']

# Calculate the covariance matrix
cov_matrix = stock_data.pct_change().cov()

sum_of_covariances = cov_matrix.sum(axis=1)
sorted_tickers = sum_of_covariances.sort_values()

top_50_stocks = sorted_tickers[sorted_tickers != 0.0][:50].index.tolist()

#print(top_50_stocks)

165    EBAY
502     ZTS
501    ZION
500     ZBH
499    ZBRA
       ... 
419    SWKS
418     SPG
416     NOW
415     SRE
414     SEE
Name: Symbol, Length: 100, dtype: object
[*********************100%%**********************]  100 of 100 completed


5 Failed downloads:
['WRK', 'VLTO', 'VICI', 'SEDG', 'KHC']: Exception("%ticker%: Data doesn't exist for startDate = 1293858000, endDate = 1420088400")





In [49]:
top_50_stocks


['SO',
 'ZTS',
 'WMT',
 'SYF',
 'PG',
 'CLX',
 'HSY',
 'XEL',
 'WEC',
 'VZ',
 'KO',
 'SYY',
 'TGT',
 'SJM',
 'KR',
 'SRE',
 'WBA',
 'XYL',
 'WELL',
 'WM',
 'UPS',
 'TJX',
 'TSN',
 'VTR',
 'TRV',
 'SNPS',
 'SHW',
 'HD',
 'YUM',
 'PGR',
 'UNH',
 'SPG',
 'ZBH',
 'SYK',
 'VRSN',
 'VFC',
 'ALL',
 'NOW',
 'V',
 'SBUX',
 'GWW',
 'EL',
 'TRGP',
 'CI',
 'BA',
 'TDG',
 'ULTA',
 'DIS',
 'WBD',
 'TSCO']

In [48]:
import pandas_ta as ta
import yfinance as yf

for ticker in top_50_stocks:
    print(f"Processing {ticker}")
    df = yf.download(ticker, start="2015-01-01", end="2021-01-01")

    #df["RSI(2)"] = ta.rsi(df['Close'], length=2)
    df["RSI(7)"] = ta.rsi(df['Close'], length=7)
    #df["RSI(14)"] = ta.rsi(df['Close'], length=14)
    #df["CCI(30)"] = ta.cci(close=df['Close'],
                       #length=30, high=df["High"], low=df["Low"])
    #df["CCI(50)"] = ta.cci(close=df['Close'],
                       #length=50, high=df["High"], low=df["Low"])
    #df["CCI(100)"] = ta.cci(close=df['Close'],
                        #length=100, high=df["High"], low=df["Low"])
    
    #Generate signals if RSI(2) <= 30, position = 1, if RSI(2) >= 70, position = -1, else position = 0
    df["position"] = np.where(df["RSI(7)"] <= 30, 1, np.where(df["RSI(7)"] >= 70, -1, 0))
    
    #Shift position by 1 day to avoid look ahead bias
    df.position = df.position.shift(1)
    df.dropna(inplace=True)
    print(df.head())    
    df.to_csv(f"data/{ticker}.csv")

Processing SO
[*********************100%%**********************]  1 of 1 completed
                 Open       High        Low      Close  Adj Close   Volume  \
Date                                                                         
2015-01-13  50.009998  50.500000  49.520000  49.680000  33.696617  5422100   
2015-01-14  49.700001  50.200001  49.439999  50.189999  34.042549  6205300   
2015-01-15  50.410000  50.880001  50.119999  50.799999  34.456303  5685100   
2015-01-16  50.680000  51.389999  50.619999  51.349998  34.829342  5718600   
2015-01-20  51.400002  51.680000  51.169998  51.660000  35.039619  4081400   

               RSI(7)  position  
Date                             
2015-01-13  50.847719       0.0  
2015-01-14  67.258534       0.0  
2015-01-15  77.664632       0.0  
2015-01-16  83.260919      -1.0  
2015-01-20  85.628745      -1.0  
Processing ZTS
[*********************100%%**********************]  1 of 1 completed
                 Open       High        Low     