In [1]:
# Author: Michael Djaballah
# Last edited: 
# Last edited by: Michael Djaballah

import yfinance as yf
import pandas as pd
from datetime import datetime
import os
from time import sleep
from dateutil.relativedelta import relativedelta
import sklearn as sk
from sklearn.preprocessing import StandardScaler
import seaborn as sns

In [2]:
# Functions to maintain and call the S&P 500 from a current date
# Author Michael Djaballah
# Time last edited: 5:56 PM June 1, 2020
# Last edited by: Michael Djaballah

# Takes no input
# Output is newly saved CSV's containing the current makeup of the S&P 500 
# and its historical additions and removals
# data_path is changeable depending on desired save location
def get_snp_store(data_path='data/'):
    curr_raw = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    
    curr = curr_raw[0]
    hist = curr_raw[1]
    
    new_hist = pd.DataFrame(hist['Date'])
    new_hist['Added'] = hist['Added', 'Ticker']
    new_hist['Removed'] = hist['Removed', 'Ticker']
    
    os.makedirs(data_path, exist_ok=True)
    
    curr.to_csv(data_path + 'snp_current.csv', index=False)
    new_hist.to_csv(data_path + 'snp_hist.csv', index=False)
    return None


# Input: a date in string form with its corresponding format:
# Ex: 'January 1, 2020', '%B %d, %Y'
# Output: a list containing the S&P 500 at the input date
def build_snp(date, date_format, data_path='data/'):
    curr = pd.read_csv(data_path + 'snp_current.csv')
    hist = pd.read_csv(data_path + 'snp_hist.csv')
    
    start_date = datetime.strptime(date, date_format)
    
    snp_set = set(curr['Symbol'])
    
    for i in range(len(hist)):
        temp_date = datetime.strptime(hist.iloc[i]['Date'], date_format)
        if temp_date < start_date:
            break

        tb_removed = hist.iloc[i]['Added']
        tb_added = hist.iloc[i]['Removed']

        if tb_removed in snp_set:
            snp_set.remove(tb_removed)
        if not type(tb_added) == float:
            snp_set.add(tb_added)
    
    return list(snp_set)

In [3]:
from portfolio import Portfolio

In [4]:
start_date = '2015-01-01'
end_date = '2019-12-01'
universe = build_snp('January 1, 2015', '%B %d, %Y')

hist_depth = 12
train_depth = 4

In [5]:
%%time
port = Portfolio(universe, hist_depth=hist_depth, 
                 train_depth=train_depth, features = ['Close', 'Volume'])

CPU times: user 1.06 s, sys: 46 ms, total: 1.11 s
Wall time: 1.12 s


In [6]:
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor(n_estimators=50, random_state=407, n_jobs=-1)

In [None]:
%%time
results = port.backtest(rfr, start_date, end_date)

2015-01-01 0.557418
2015-02-01 0.085801
2015-03-01 -0.005165


In [9]:
# for key in port.__dict__.keys():
#     if key != 'portfolio':
#         print(key, port.__dict__[key])