In [1]:
#! pip install yfinance

In [2]:
## Import Dependencies
import yfinance as yf
import pandas as pd
from datetime import date, timedelta, datetime

In [3]:
## Ticker List and Start, End date, delta
tickers = ["BTC-USD", "ETH-USD", "^CMC200", "GLD", "SLV", "^VIX", "CL=F", "NQ=F", "ES=F", "XLF", "EURUSD=X", "^TNX"]
start_date = "2019-10-31"

In [4]:
## Interval lengths
long = 30
med = 15
short = 5

In [5]:
## Look up ticker and create dataset
for ticker in tickers:
    data = pd.DataFrame(yf.download(ticker, start=start_date))
    filename = ticker + '.csv'
    data.to_csv(filename)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [6]:
## Create list of dates
start = datetime.strptime(start_date, '%Y-%m-%d').date()
end = date.today()
delta = timedelta(days=1)

dates = []
while start <= end:
    dates.append(start.isoformat())
    start += delta

dates = pd.DataFrame(dates)
dates.columns =['Date']
dates['Date'] = pd.to_datetime(dates['Date'])
dates

Unnamed: 0,Date
0,2019-10-31
1,2019-11-01
2,2019-11-02
3,2019-11-03
4,2019-11-04
...,...
1250,2023-04-03
1251,2023-04-04
1252,2023-04-05
1253,2023-04-06


In [7]:
## Upload csv datasets
gld = pd.read_csv(tickers[3] + '.csv')
gld['Date'] = pd.to_datetime(gld['Date'])
gld

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2019-10-31,141.979996,142.690002,141.889999,142.429993,142.429993,7050900
1,2019-11-01,142.210007,142.899994,142.009995,142.559998,142.559998,8090000
2,2019-11-04,142.259995,142.449997,141.740005,142.149994,142.149994,4740300
3,2019-11-05,140.649994,140.889999,139.380005,139.850006,139.850006,11782300
4,2019-11-06,140.119995,140.789993,140.000000,140.449997,140.449997,6416500
...,...,...,...,...,...,...,...
859,2023-03-31,184.110001,184.419998,182.800003,183.220001,183.220001,8000400
860,2023-04-03,183.850006,185.039993,183.320007,184.539993,184.539993,9206600
861,2023-04-04,184.720001,188.229996,184.660004,187.979996,187.979996,13765400
862,2023-04-05,188.339996,188.860001,186.779999,187.830002,187.830002,11440800


In [8]:
## Merge dfs
test = pd.merge(dates, gld, how = 'left', on='Date')
test

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2019-10-31,141.979996,142.690002,141.889999,142.429993,142.429993,7050900.0
1,2019-11-01,142.210007,142.899994,142.009995,142.559998,142.559998,8090000.0
2,2019-11-02,,,,,,
3,2019-11-03,,,,,,
4,2019-11-04,142.259995,142.449997,141.740005,142.149994,142.149994,4740300.0
...,...,...,...,...,...,...,...
1250,2023-04-03,183.850006,185.039993,183.320007,184.539993,184.539993,9206600.0
1251,2023-04-04,184.720001,188.229996,184.660004,187.979996,187.979996,13765400.0
1252,2023-04-05,188.339996,188.860001,186.779999,187.830002,187.830002,11440800.0
1253,2023-04-06,186.889999,187.259995,185.949997,186.490005,186.490005,6643300.0


In [9]:
## Fill in weekends and market holidays using last trading day
test['Open'].fillna(method='ffill', inplace=True)
test['High'].fillna(method='ffill', inplace=True)
test['Low'].fillna(method='ffill', inplace=True)
test['Close'].fillna(method='ffill', inplace=True)
test['Adj Close'].fillna(method='ffill', inplace=True)
test['Volume'].fillna(method='ffill', inplace=True)
test

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2019-10-31,141.979996,142.690002,141.889999,142.429993,142.429993,7050900.0
1,2019-11-01,142.210007,142.899994,142.009995,142.559998,142.559998,8090000.0
2,2019-11-02,142.210007,142.899994,142.009995,142.559998,142.559998,8090000.0
3,2019-11-03,142.210007,142.899994,142.009995,142.559998,142.559998,8090000.0
4,2019-11-04,142.259995,142.449997,141.740005,142.149994,142.149994,4740300.0
...,...,...,...,...,...,...,...
1250,2023-04-03,183.850006,185.039993,183.320007,184.539993,184.539993,9206600.0
1251,2023-04-04,184.720001,188.229996,184.660004,187.979996,187.979996,13765400.0
1252,2023-04-05,188.339996,188.860001,186.779999,187.830002,187.830002,11440800.0
1253,2023-04-06,186.889999,187.259995,185.949997,186.490005,186.490005,6643300.0


In [10]:
## Create past and future dates
# Past
test['date_last_long'] = test['Date'] - pd.to_timedelta(long, unit='d')
test['date_last_med'] = test['Date'] - pd.to_timedelta(med, unit='d')
test['date_last_short'] = test['Date'] - pd.to_timedelta(short, unit='d')

# Future
test['date_next_1'] = test['Date'] + pd.to_timedelta(1, unit='d')
test['date_next_short'] = test['Date'] + pd.to_timedelta(short, unit='d')
test['date_next_med'] = test['Date'] + pd.to_timedelta(med, unit='d')
test['date_next_long'] = test['Date'] + pd.to_timedelta(long, unit='d')

test

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,date_last_long,date_last_med,date_last_short,date_next_1,date_next_short,date_next_med,date_next_long
0,2019-10-31,141.979996,142.690002,141.889999,142.429993,142.429993,7050900.0,2019-10-01,2019-10-16,2019-10-26,2019-11-01,2019-11-05,2019-11-15,2019-11-30
1,2019-11-01,142.210007,142.899994,142.009995,142.559998,142.559998,8090000.0,2019-10-02,2019-10-17,2019-10-27,2019-11-02,2019-11-06,2019-11-16,2019-12-01
2,2019-11-02,142.210007,142.899994,142.009995,142.559998,142.559998,8090000.0,2019-10-03,2019-10-18,2019-10-28,2019-11-03,2019-11-07,2019-11-17,2019-12-02
3,2019-11-03,142.210007,142.899994,142.009995,142.559998,142.559998,8090000.0,2019-10-04,2019-10-19,2019-10-29,2019-11-04,2019-11-08,2019-11-18,2019-12-03
4,2019-11-04,142.259995,142.449997,141.740005,142.149994,142.149994,4740300.0,2019-10-05,2019-10-20,2019-10-30,2019-11-05,2019-11-09,2019-11-19,2019-12-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1250,2023-04-03,183.850006,185.039993,183.320007,184.539993,184.539993,9206600.0,2023-03-04,2023-03-19,2023-03-29,2023-04-04,2023-04-08,2023-04-18,2023-05-03
1251,2023-04-04,184.720001,188.229996,184.660004,187.979996,187.979996,13765400.0,2023-03-05,2023-03-20,2023-03-30,2023-04-05,2023-04-09,2023-04-19,2023-05-04
1252,2023-04-05,188.339996,188.860001,186.779999,187.830002,187.830002,11440800.0,2023-03-06,2023-03-21,2023-03-31,2023-04-06,2023-04-10,2023-04-20,2023-05-05
1253,2023-04-06,186.889999,187.259995,185.949997,186.490005,186.490005,6643300.0,2023-03-07,2023-03-22,2023-04-01,2023-04-07,2023-04-11,2023-04-21,2023-05-06


In [11]:
## Add last day returns
test['hi_to_lo_last_1'] = (test['High'] / test['Low']) - 1
test['return_last_1'] = (test['Close'] / test['Open']) - 1
test

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,date_last_long,date_last_med,date_last_short,date_next_1,date_next_short,date_next_med,date_next_long,hi_to_lo_last_1,return_last_1
0,2019-10-31,141.979996,142.690002,141.889999,142.429993,142.429993,7050900.0,2019-10-01,2019-10-16,2019-10-26,2019-11-01,2019-11-05,2019-11-15,2019-11-30,0.005638,0.003169
1,2019-11-01,142.210007,142.899994,142.009995,142.559998,142.559998,8090000.0,2019-10-02,2019-10-17,2019-10-27,2019-11-02,2019-11-06,2019-11-16,2019-12-01,0.006267,0.002461
2,2019-11-02,142.210007,142.899994,142.009995,142.559998,142.559998,8090000.0,2019-10-03,2019-10-18,2019-10-28,2019-11-03,2019-11-07,2019-11-17,2019-12-02,0.006267,0.002461
3,2019-11-03,142.210007,142.899994,142.009995,142.559998,142.559998,8090000.0,2019-10-04,2019-10-19,2019-10-29,2019-11-04,2019-11-08,2019-11-18,2019-12-03,0.006267,0.002461
4,2019-11-04,142.259995,142.449997,141.740005,142.149994,142.149994,4740300.0,2019-10-05,2019-10-20,2019-10-30,2019-11-05,2019-11-09,2019-11-19,2019-12-04,0.005009,-0.000773
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1250,2023-04-03,183.850006,185.039993,183.320007,184.539993,184.539993,9206600.0,2023-03-04,2023-03-19,2023-03-29,2023-04-04,2023-04-08,2023-04-18,2023-05-03,0.009382,0.003753
1251,2023-04-04,184.720001,188.229996,184.660004,187.979996,187.979996,13765400.0,2023-03-05,2023-03-20,2023-03-30,2023-04-05,2023-04-09,2023-04-19,2023-05-04,0.019333,0.017648
1252,2023-04-05,188.339996,188.860001,186.779999,187.830002,187.830002,11440800.0,2023-03-06,2023-03-21,2023-03-31,2023-04-06,2023-04-10,2023-04-20,2023-05-05,0.011136,-0.002708
1253,2023-04-06,186.889999,187.259995,185.949997,186.490005,186.490005,6643300.0,2023-03-07,2023-03-22,2023-04-01,2023-04-07,2023-04-11,2023-04-21,2023-05-06,0.007045,-0.002140


In [12]:
## Remove unused fields
test = test.drop(columns=['Open', 'High', 'Low', 'Close'])
test

Unnamed: 0,Date,Adj Close,Volume,date_last_long,date_last_med,date_last_short,date_next_1,date_next_short,date_next_med,date_next_long,hi_to_lo_last_1,return_last_1
0,2019-10-31,142.429993,7050900.0,2019-10-01,2019-10-16,2019-10-26,2019-11-01,2019-11-05,2019-11-15,2019-11-30,0.005638,0.003169
1,2019-11-01,142.559998,8090000.0,2019-10-02,2019-10-17,2019-10-27,2019-11-02,2019-11-06,2019-11-16,2019-12-01,0.006267,0.002461
2,2019-11-02,142.559998,8090000.0,2019-10-03,2019-10-18,2019-10-28,2019-11-03,2019-11-07,2019-11-17,2019-12-02,0.006267,0.002461
3,2019-11-03,142.559998,8090000.0,2019-10-04,2019-10-19,2019-10-29,2019-11-04,2019-11-08,2019-11-18,2019-12-03,0.006267,0.002461
4,2019-11-04,142.149994,4740300.0,2019-10-05,2019-10-20,2019-10-30,2019-11-05,2019-11-09,2019-11-19,2019-12-04,0.005009,-0.000773
...,...,...,...,...,...,...,...,...,...,...,...,...
1250,2023-04-03,184.539993,9206600.0,2023-03-04,2023-03-19,2023-03-29,2023-04-04,2023-04-08,2023-04-18,2023-05-03,0.009382,0.003753
1251,2023-04-04,187.979996,13765400.0,2023-03-05,2023-03-20,2023-03-30,2023-04-05,2023-04-09,2023-04-19,2023-05-04,0.019333,0.017648
1252,2023-04-05,187.830002,11440800.0,2023-03-06,2023-03-21,2023-03-31,2023-04-06,2023-04-10,2023-04-20,2023-05-05,0.011136,-0.002708
1253,2023-04-06,186.490005,6643300.0,2023-03-07,2023-03-22,2023-04-01,2023-04-07,2023-04-11,2023-04-21,2023-05-06,0.007045,-0.002140


In [13]:
## Merge with previous and future dates df
# Past
last_long = test[['Date', 'Adj Close']]
last_long = last_long.rename(columns={"Date": "date_last_long", "Adj Close": "close_last_long"})
test = pd.merge(test, last_long, how = 'left', on='date_last_long')
test['avgvol_last_long'] = test['Volume'].rolling(long).sum() / long
test['hi_to_lo_last_long'] = (test['Adj Close'].rolling(long).max() / test['Adj Close'].rolling(long).min()) - 1

last_med = test[['Date', 'Adj Close']]
last_med = last_med.rename(columns={"Date": "date_last_med", "Adj Close": "close_last_med"})
test = pd.merge(test, last_med, how = 'left', on='date_last_med')
test['avgvol_last_med'] = test['Volume'].rolling(med).sum() / med
test['hi_to_lo_last_med'] = (test['Adj Close'].rolling(med).max() / test['Adj Close'].rolling(med).min()) - 1


last_short = test[['Date', 'Adj Close']]
last_short = last_short.rename(columns={"Date": "date_last_short", "Adj Close": "close_last_short"})
test = pd.merge(test, last_short, how = 'left', on='date_last_short')
test['avgvol_last_short'] = test['Volume'].rolling(short).sum() / short
test['hi_to_lo_last_short'] = (test['Adj Close'].rolling(short).max() / test['Adj Close'].rolling(short).min()) - 1


# Future
next_1 = test[['Date', 'Adj Close']]
next_1 = next_1.rename(columns={"Date": "date_next_1", "Adj Close": "close_next_1"})
test = pd.merge(test, next_1, how = 'left', on='date_next_1')

next_short = test[['Date', 'Adj Close']]
next_short = next_short.rename(columns={"Date": "date_next_short", "Adj Close": "close_next_short"})
test = pd.merge(test, next_short, how = 'left', on='date_next_short')

next_med = test[['Date', 'Adj Close']]
next_med = next_med.rename(columns={"Date": "date_next_med", "Adj Close": "close_next_med"})
test = pd.merge(test, next_med, how = 'left', on='date_next_med')

next_long = test[['Date', 'Adj Close']]
next_long = next_long.rename(columns={"Date": "date_next_long", "Adj Close": "close_next_long"})
test = pd.merge(test, next_long, how = 'left', on='date_next_long')

test


Unnamed: 0,Date,Adj Close,Volume,date_last_long,date_last_med,date_last_short,date_next_1,date_next_short,date_next_med,date_next_long,...,close_last_med,avgvol_last_med,hi_to_lo_last_med,close_last_short,avgvol_last_short,hi_to_lo_last_short,close_next_1,close_next_short,close_next_med,close_next_long
0,2019-10-31,142.429993,7050900.0,2019-10-01,2019-10-16,2019-10-26,2019-11-01,2019-11-05,2019-11-15,2019-11-30,...,,,,,,,142.559998,139.850006,138.210007,137.860001
1,2019-11-01,142.559998,8090000.0,2019-10-02,2019-10-17,2019-10-27,2019-11-02,2019-11-06,2019-11-16,2019-12-01,...,,,,,,,142.559998,140.449997,138.210007,137.860001
2,2019-11-02,142.559998,8090000.0,2019-10-03,2019-10-18,2019-10-28,2019-11-03,2019-11-07,2019-11-17,2019-12-02,...,,,,,,,142.559998,138.270004,138.210007,137.789993
3,2019-11-03,142.559998,8090000.0,2019-10-04,2019-10-19,2019-10-29,2019-11-04,2019-11-08,2019-11-18,2019-12-03,...,,,,,,,142.149994,137.389999,138.619995,139.110001
4,2019-11-04,142.149994,4740300.0,2019-10-05,2019-10-20,2019-10-30,2019-11-05,2019-11-09,2019-11-19,2019-12-04,...,,,,,7212240.0,0.002884,139.850006,137.389999,138.690002,138.919998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1250,2023-04-03,184.539993,9206600.0,2023-03-04,2023-03-19,2023-03-29,2023-04-04,2023-04-08,2023-04-18,2023-05-03,...,183.770004,8.756560e+06,0.029772,182.529999,7913620.0,0.007204,187.979996,,,
1251,2023-04-04,187.979996,13765400.0,2023-03-05,2023-03-20,2023-03-30,2023-04-05,2023-04-09,2023-04-19,2023-05-04,...,183.839996,8.884540e+06,0.042191,184.179993,9394640.0,0.025980,187.830002,,,
1252,2023-04-05,187.830002,11440800.0,2023-03-06,2023-03-21,2023-03-31,2023-04-06,2023-04-10,2023-04-20,2023-05-05,...,180.369995,8.931627e+06,0.033141,183.220001,10082720.0,0.025980,186.490005,,,
1253,2023-04-06,186.490005,6643300.0,2023-03-07,2023-03-22,2023-04-01,2023-04-07,2023-04-11,2023-04-21,2023-05-06,...,183.440002,8.576540e+06,0.033141,183.220001,9811300.0,0.025980,186.490005,,,


In [14]:
## Filter out NaN
test = test.dropna()
test

Unnamed: 0,Date,Adj Close,Volume,date_last_long,date_last_med,date_last_short,date_next_1,date_next_short,date_next_med,date_next_long,...,close_last_med,avgvol_last_med,hi_to_lo_last_med,close_last_short,avgvol_last_short,hi_to_lo_last_short,close_next_1,close_next_short,close_next_med,close_next_long
30,2019-11-30,137.860001,4516400.0,2019-10-31,2019-11-15,2019-11-25,2019-12-01,2019-12-05,2019-12-15,2019-12-30,...,138.210007,5.406727e+06,0.012773,137.080002,4395840.0,0.006204,137.860001,139.000000,139.050003,142.630005
31,2019-12-01,137.860001,4516400.0,2019-11-01,2019-11-16,2019-11-26,2019-12-02,2019-12-06,2019-12-16,2019-12-31,...,138.210007,5.034040e+06,0.012773,137.740005,4354240.0,0.006204,137.789993,137.619995,139.039993,142.899994
32,2019-12-02,137.789993,8274700.0,2019-11-02,2019-11-17,2019-11-27,2019-12-03,2019-12-07,2019-12-17,2020-01-01,...,138.210007,4.911907e+06,0.012773,137.009995,5186980.0,0.006204,139.110001,137.619995,139.009995,142.899994
33,2019-12-03,139.110001,8481700.0,2019-11-03,2019-11-18,2019-11-28,2019-12-04,2019-12-08,2019-12-18,2020-01-02,...,138.619995,5.096160e+06,0.015327,137.009995,6061120.0,0.009580,138.919998,137.619995,139.020004,143.949997
34,2019-12-04,138.919998,6322300.0,2019-11-04,2019-11-19,2019-11-29,2019-12-05,2019-12-09,2019-12-19,2020-01-03,...,138.690002,5.196020e+06,0.015327,137.860001,6422300.0,0.009580,139.000000,137.580002,139.380005,145.860001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1220,2023-03-04,172.490005,6893400.0,2023-02-02,2023-02-17,2023-02-27,2023-03-05,2023-03-09,2023-03-19,2023-04-03,...,171.259995,5.515520e+06,0.024592,169.009995,6217920.0,0.015962,172.490005,170.199997,183.770004,184.539993
1221,2023-03-05,172.490005,6893400.0,2023-02-03,2023-02-18,2023-02-28,2023-03-06,2023-03-10,2023-03-20,2023-04-04,...,171.259995,5.654427e+06,0.024592,169.779999,6244280.0,0.010723,171.619995,173.869995,183.839996,187.979996
1222,2023-03-06,171.619995,4264100.0,2023-02-04,2023-02-19,2023-03-01,2023-03-07,2023-03-11,2023-03-21,2023-04-05,...,171.259995,5.618047e+06,0.024592,170.759995,5779000.0,0.010723,168.619995,173.869995,180.369995,187.830002
1223,2023-03-07,168.619995,8713300.0,2023-02-05,2023-02-20,2023-03-02,2023-03-08,2023-03-12,2023-03-22,2023-04-06,...,171.259995,5.878280e+06,0.024592,170.660004,6731520.0,0.022951,168.539993,173.869995,183.440002,186.490005


In [15]:
## Calculated fields
test['return_last_long'] = (test['Adj Close'] / test['close_last_long']) - 1
test['return_last_med'] = (test['Adj Close'] / test['close_last_med']) - 1
test['return_last_short'] = (test['Adj Close'] / test['close_last_short']) - 1
test['return_next_1'] = (test['Adj Close'] / test['close_next_1']) - 1
test['return_next_short'] = (test['Adj Close'] / test['close_next_short']) - 1
test['return_next_med'] = (test['Adj Close'] / test['close_next_med']) - 1
test['return_next_long'] = (test['Adj Close'] / test['close_next_long']) - 1
test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_

Unnamed: 0,Date,Adj Close,Volume,date_last_long,date_last_med,date_last_short,date_next_1,date_next_short,date_next_med,date_next_long,...,close_next_short,close_next_med,close_next_long,return_last_long,return_last_med,return_last_short,return_next_1,return_next_short,return_next_med,return_next_long
30,2019-11-30,137.860001,4516400.0,2019-10-31,2019-11-15,2019-11-25,2019-12-01,2019-12-05,2019-12-15,2019-12-30,...,139.000000,139.050003,142.630005,-0.032086,-0.002532,0.005690,0.000000,-0.008201,-0.008558,-0.033443
31,2019-12-01,137.860001,4516400.0,2019-11-01,2019-11-16,2019-11-26,2019-12-02,2019-12-06,2019-12-16,2019-12-31,...,137.619995,139.039993,142.899994,-0.032969,-0.002532,0.000871,0.000508,0.001744,-0.008487,-0.035269
32,2019-12-02,137.789993,8274700.0,2019-11-02,2019-11-17,2019-11-27,2019-12-03,2019-12-07,2019-12-17,2020-01-01,...,137.619995,139.009995,142.899994,-0.033460,-0.003039,0.005693,-0.009489,0.001235,-0.008776,-0.035759
33,2019-12-03,139.110001,8481700.0,2019-11-03,2019-11-18,2019-11-28,2019-12-04,2019-12-08,2019-12-18,2020-01-02,...,137.619995,139.020004,143.949997,-0.024200,0.003535,0.015327,0.001368,0.010827,0.000647,-0.033623
34,2019-12-04,138.919998,6322300.0,2019-11-04,2019-11-19,2019-11-29,2019-12-05,2019-12-09,2019-12-19,2020-01-03,...,137.580002,139.380005,145.860001,-0.022722,0.001658,0.007689,-0.000576,0.009740,-0.003300,-0.047580
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1220,2023-03-04,172.490005,6893400.0,2023-02-02,2023-02-17,2023-02-27,2023-03-05,2023-03-09,2023-03-19,2023-04-03,...,170.199997,183.770004,184.539993,-0.030410,0.007182,0.020591,0.000000,0.013455,-0.061381,-0.065297
1221,2023-03-05,172.490005,6893400.0,2023-02-03,2023-02-18,2023-02-28,2023-03-06,2023-03-10,2023-03-20,2023-04-04,...,173.869995,183.839996,187.979996,-0.005592,0.007182,0.015962,0.005069,-0.007937,-0.061738,-0.082402
1222,2023-03-06,171.619995,4264100.0,2023-02-04,2023-02-19,2023-03-01,2023-03-07,2023-03-11,2023-03-21,2023-04-05,...,173.869995,180.369995,187.830002,-0.010608,0.002102,0.005036,0.017791,-0.012941,-0.048511,-0.086301
1223,2023-03-07,168.619995,8713300.0,2023-02-05,2023-02-20,2023-03-02,2023-03-08,2023-03-12,2023-03-22,2023-04-06,...,173.869995,183.440002,186.490005,-0.027903,-0.015415,-0.011954,0.000475,-0.030195,-0.080789,-0.095823


In [16]:
## Drop unused fields
test = test.drop(columns=['Adj Close',
                          'date_last_long', 'date_last_med', 'date_last_short', 'date_next_1', 'date_next_long', 'date_next_med', 'date_next_short',
                          'close_last_long', 'close_last_med', 'close_last_short', 'close_next_long', 'close_next_med', 'close_next_short'])
test

Unnamed: 0,Date,Volume,hi_to_lo_last_1,return_last_1,avgvol_last_long,hi_to_lo_last_long,avgvol_last_med,hi_to_lo_last_med,avgvol_last_short,hi_to_lo_last_short,close_next_1,return_last_long,return_last_med,return_last_short,return_next_1,return_next_short,return_next_med,return_next_long
30,2019-11-30,4516400.0,0.007660,0.005543,7.074850e+06,0.040508,5.406727e+06,0.012773,4395840.0,0.006204,137.860001,-0.032086,-0.002532,0.005690,0.000000,-0.008201,-0.008558,-0.033443
31,2019-12-01,4516400.0,0.007660,0.005543,6.955730e+06,0.040508,5.034040e+06,0.012773,4354240.0,0.006204,137.789993,-0.032969,-0.002532,0.000871,0.000508,0.001744,-0.008487,-0.035269
32,2019-12-02,8274700.0,0.005754,0.003423,6.961887e+06,0.040508,4.911907e+06,0.012773,5186980.0,0.006204,139.110001,-0.033460,-0.003039,0.005693,-0.009489,0.001235,-0.008776,-0.035759
33,2019-12-03,8481700.0,0.004318,0.000504,6.974943e+06,0.037516,5.096160e+06,0.015327,6061120.0,0.009580,138.919998,-0.024200,0.003535,0.015327,0.001368,0.010827,0.000647,-0.033623
34,2019-12-04,6322300.0,0.004185,-0.001725,7.027677e+06,0.025108,5.196020e+06,0.015327,6422300.0,0.009580,139.000000,-0.022722,0.001658,0.007689,-0.000576,0.009740,-0.003300,-0.047580
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1220,2023-03-04,6893400.0,0.009475,0.006066,6.066843e+06,0.035996,5.515520e+06,0.024592,6217920.0,0.015962,172.490005,-0.030410,0.007182,0.020591,0.000000,0.013455,-0.061381,-0.065297
1221,2023-03-05,6893400.0,0.009475,0.006066,5.840810e+06,0.035996,5.654427e+06,0.024592,6244280.0,0.010723,171.619995,-0.005592,0.007182,0.015962,0.005069,-0.007937,-0.061738,-0.082402
1222,2023-03-06,4264100.0,0.004839,-0.004062,5.527133e+06,0.035996,5.618047e+06,0.024592,5779000.0,0.010723,168.619995,-0.010608,0.002102,0.005036,0.017791,-0.012941,-0.048511,-0.086301
1223,2023-03-07,8713300.0,0.011571,-0.010562,5.361763e+06,0.035996,5.878280e+06,0.024592,6731520.0,0.022951,168.539993,-0.027903,-0.015415,-0.011954,0.000475,-0.030195,-0.080789,-0.095823


In [17]:
## Rename and rearrange columns
# Rename
test = test.rename(columns={"Volume": "avgvol_last_1",
                            "avgvol_last_long": "avgvol_last_" + str(long), 
                            "hi_to_lo_last_long": "hi_to_lo_last_" + str(long),
                            "return_last_long": "return_last_" + str(long),
                            "avgvol_last_med": "avgvol_last_" + str(med), 
                            "hi_to_lo_last_med": "hi_to_lo_last_" + str(med),
                            "return_last_med": "return_last_" + str(med),
                            "avgvol_last_short": "avgvol_last_" + str(short), 
                            "hi_to_lo_last_short": "hi_to_lo_last_" + str(short),
                            "return_last_short": "return_last_" + str(short),
                            "return_next_long": "return_next_" + str(long),
                            "return_next_med": "return_next_" + str(med),
                            "return_next_short": "return_next_" + str(short)})

# Rearrange
test = test[['Date',
             'avgvol_last_' + str(long), 'hi_to_lo_last_' + str(long), 'return_last_' + str(long),
             'avgvol_last_' + str(med), 'hi_to_lo_last_' + str(med), 'return_last_' + str(med),
             'avgvol_last_' + str(short), 'hi_to_lo_last_' + str(short), 'return_last_' + str(short),
             'avgvol_last_1', 'hi_to_lo_last_1', 'return_last_1',
             'return_next_1', 'return_next_' + str(short), 'return_next_' + str(med),'return_next_' + str(long)]]
test

Unnamed: 0,Date,avgvol_last_30,hi_to_lo_last_30,return_last_30,avgvol_last_15,hi_to_lo_last_15,return_last_15,avgvol_last_5,hi_to_lo_last_5,return_last_5,avgvol_last_1,hi_to_lo_last_1,return_last_1,return_next_1,return_next_5,return_next_15,return_next_30
30,2019-11-30,7.074850e+06,0.040508,-0.032086,5.406727e+06,0.012773,-0.002532,4395840.0,0.006204,0.005690,4516400.0,0.007660,0.005543,0.000000,-0.008201,-0.008558,-0.033443
31,2019-12-01,6.955730e+06,0.040508,-0.032969,5.034040e+06,0.012773,-0.002532,4354240.0,0.006204,0.000871,4516400.0,0.007660,0.005543,0.000508,0.001744,-0.008487,-0.035269
32,2019-12-02,6.961887e+06,0.040508,-0.033460,4.911907e+06,0.012773,-0.003039,5186980.0,0.006204,0.005693,8274700.0,0.005754,0.003423,-0.009489,0.001235,-0.008776,-0.035759
33,2019-12-03,6.974943e+06,0.037516,-0.024200,5.096160e+06,0.015327,0.003535,6061120.0,0.009580,0.015327,8481700.0,0.004318,0.000504,0.001368,0.010827,0.000647,-0.033623
34,2019-12-04,7.027677e+06,0.025108,-0.022722,5.196020e+06,0.015327,0.001658,6422300.0,0.009580,0.007689,6322300.0,0.004185,-0.001725,-0.000576,0.009740,-0.003300,-0.047580
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1220,2023-03-04,6.066843e+06,0.035996,-0.030410,5.515520e+06,0.024592,0.007182,6217920.0,0.015962,0.020591,6893400.0,0.009475,0.006066,0.000000,0.013455,-0.061381,-0.065297
1221,2023-03-05,5.840810e+06,0.035996,-0.005592,5.654427e+06,0.024592,0.007182,6244280.0,0.010723,0.015962,6893400.0,0.009475,0.006066,0.005069,-0.007937,-0.061738,-0.082402
1222,2023-03-06,5.527133e+06,0.035996,-0.010608,5.618047e+06,0.024592,0.002102,5779000.0,0.010723,0.005036,4264100.0,0.004839,-0.004062,0.017791,-0.012941,-0.048511,-0.086301
1223,2023-03-07,5.361763e+06,0.035996,-0.027903,5.878280e+06,0.024592,-0.015415,6731520.0,0.022951,-0.011954,8713300.0,0.011571,-0.010562,0.000475,-0.030195,-0.080789,-0.095823


In [18]:
## Write to csv
test.to_csv('test.csv')
# for ticker in tickers:
#     data = pd.DataFrame(yf.download(ticker, start=start))
#     filename = ticker + '.csv'
#     data.to_csv(filename)