In [13]:
## Import Dependencies
import yfinance as yf
import pandas as pd
from datetime import date, timedelta, datetime

In [14]:
## Ticker List, start date
tickers = ["BTC-USD", "ETH-USD", "^CMC200", #Bitcoin, ether, crypto index
           "GLD", "SLV", "CL=F", #Gold, silver, crude
           "^VIX", #Volatility index
           "^IXIC", "^GSPC", "^DJI", #Nasdaq, s&p, dow
           "META", "AMZN", "AAPL", "NFLX", "GOOG", "TSLA", #faangs, telsa
           "XLF", #financial sector index
           "EURUSD=X", #usd vs eur
           "^TNX" #10yr
           ]
start_date = "2019-10-31"

In [15]:
## Interval lengths
long = 30
med = 15
short = 5

In [16]:
## Look up ticker and create csv
for ticker in tickers:
    data = pd.DataFrame(yf.download(ticker, start=start_date))
    filename = ticker + '.csv'
    data.to_csv(filename)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [17]:
## Create comprehensive list of dates
start = datetime.strptime(start_date, '%Y-%m-%d').date()
end = date.today() - timedelta(days = 1)
delta = timedelta(days=1)

dates = []
while start <= end:
    dates.append(start.isoformat())
    start += delta

dates = pd.DataFrame(dates)
dates.columns =['Date']
dates['Date'] = pd.to_datetime(dates['Date'])
dates

Unnamed: 0,Date
0,2019-10-31
1,2019-11-01
2,2019-11-02
3,2019-11-03
4,2019-11-04
...,...
1250,2023-04-03
1251,2023-04-04
1252,2023-04-05
1253,2023-04-06


In [18]:
## Loop thru asset files and add metrics
for ticker in tickers:
    ## Upload csv datasets
    filename = ticker + '.csv'
    temp = pd.read_csv(filename)
    
    ## Format date
    temp['Date'] = pd.to_datetime(temp['Date'])
    
    ## Merge dfs to capture all dates including weekends and market holidays
    df = pd.merge(dates, temp, how = 'left', on='Date')
    
    ## Fill in weekends and market holidays using last trading day
    df['Open'].fillna(method='ffill', inplace=True)
    df['High'].fillna(method='ffill', inplace=True)
    df['Low'].fillna(method='ffill', inplace=True)
    df['Close'].fillna(method='ffill', inplace=True)
    df['Adj Close'].fillna(method='ffill', inplace=True)
    df['Volume'].fillna(method='ffill', inplace=True)

    ## Shift metrics to get yesterday's value
    df['hi_shift'] = df['High'].shift(1)
    df['lo_shift'] = df['Low'].shift(1)
    df['adjclose_shift'] = df['Adj Close'].shift(1)
    df['vol_shift'] = df['Volume'].shift(1)
    
    ## Calculate 'Adj Close' for past and future
    df['close_last_long'] = df['Adj Close'].shift(long)
    df['close_last_med'] = df['Adj Close'].shift(med)
    df['close_last_short'] = df['Adj Close'].shift(short)
    df['close_last_1'] = df['Adj Close'].shift(2)
    df['close_next_1'] = df['Adj Close']
    df['close_next_short'] = df['Adj Close'].shift(-short + 1)
    df['close_next_med'] = df['Adj Close'].shift(-med + 1)
    df['close_next_long'] = df['Adj Close'].shift(-long + 1)
    
    ## Calculate short, med, long hi/lo, avg vol, return
    ## Past
    df['avgvol_last_long'] = df['vol_shift'].rolling(long).sum() / long
    df['hi_to_lo_last_long'] = (df['adjclose_shift'].rolling(long).max() / df['adjclose_shift'].rolling(long).min()) - 1
    df['return_last_long'] = (df['adjclose_shift'] / df['close_last_long']) - 1
    df['avgvol_last_med'] = df['vol_shift'].rolling(med).sum() / med
    df['hi_to_lo_last_med'] = (df['adjclose_shift'].rolling(med).max() / df['adjclose_shift'].rolling(med).min()) - 1
    df['return_last_med'] = (df['adjclose_shift'] / df['close_last_med']) - 1
    df['avgvol_last_short'] = df['vol_shift'].rolling(short).sum() / short
    df['hi_to_lo_last_short'] = (df['adjclose_shift'].rolling(short).max() / df['adjclose_shift'].rolling(short).min()) - 1
    df['return_last_short'] = (df['adjclose_shift'] / df['close_last_short']) - 1
    df['hi_to_lo_last_1'] = (df['hi_shift'] / df['lo_shift']) - 1
    df['return_last_1'] = (df['adjclose_shift'] / df['close_last_1']) - 1
    ## Future
    df['return_next_1'] = (df['close_next_1'] / df['adjclose_shift']) - 1
    df['return_next_short'] = (df['close_next_short'] / df['adjclose_shift']) - 1
    df['return_next_med'] = (df['close_next_med'] / df['adjclose_shift']) - 1
    df['return_next_long'] = (df['close_next_long'] / df['adjclose_shift']) - 1
    
    ## Filter out Nulls
    df = df.dropna()
    
    ## Drop unused fields
    df = df.drop(columns=['hi_shift', 'lo_shift', 'adjclose_shift',
                          'close_last_long', 'close_last_med', 'close_last_short', 'close_last_1', 'close_next_1', 'close_next_long', 'close_next_med', 'close_next_short'])
    
    ## Rename and rearrange columns
    ## Rename
    df = df.rename(columns={"vol_shift": "avgvol_last_1",
                            "avgvol_last_long": "avgvol_last_" + str(long), 
                            "hi_to_lo_last_long": "hi_to_lo_last_" + str(long),
                            "return_last_long": "return_last_" + str(long),
                            "avgvol_last_med": "avgvol_last_" + str(med), 
                            "hi_to_lo_last_med": "hi_to_lo_last_" + str(med),
                            "return_last_med": "return_last_" + str(med),
                            "avgvol_last_short": "avgvol_last_" + str(short), 
                            "hi_to_lo_last_short": "hi_to_lo_last_" + str(short),
                            "return_last_short": "return_last_" + str(short),
                            "return_next_long": "return_next_" + str(long),
                            "return_next_med": "return_next_" + str(med),
                            "return_next_short": "return_next_" + str(short)})
    ## Rearrange
    df = df[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 
             'avgvol_last_' + str(long), 'hi_to_lo_last_' + str(long), 'return_last_' + str(long),
             'avgvol_last_' + str(med), 'hi_to_lo_last_' + str(med), 'return_last_' + str(med),
             'avgvol_last_' + str(short), 'hi_to_lo_last_' + str(short), 'return_last_' + str(short),
             'avgvol_last_1', 'hi_to_lo_last_1', 'return_last_1',
             'return_next_1', 'return_next_' + str(short), 'return_next_' + str(med),'return_next_' + str(long)]]
    
    ## Write to csv
    df.to_csv(filename)