In [15]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
from pandas_datareader import data as pdr
yf.pdr_override() # <== that's all it takes :-)
import numpy as np
import statistics
from pprint import pprint
from math import isnan
import itertools

sp500_history = pd.read_excel('./sp500_rebalance_announcements.xlsx')
sp500_history['implementation_date'] = pd.to_datetime(sp500_history['implementation_date']) 
sp500_history['effective_date'] = pd.to_datetime(sp500_history['effective_date']) 
sp500_history['announcement_date'] = pd.to_datetime(sp500_history['announcement_date']) 
sp500_history = sp500_history[['announcement_date', 'implementation_date', 'effective_date', 'ticker', 'name', 'type']]

start_date = datetime.strptime('2015-12-20', '%Y-%m-%d')
end_date = datetime.strptime('2020-12-31', '%Y-%m-%d')

# greater than the start date and smaller than the end date
mask = (sp500_history['announcement_date'] > start_date) & (sp500_history['announcement_date'] <= end_date)
sp500_history = sp500_history.loc[mask]

# Sort
sp500_history = sp500_history.sort_values(['announcement_date', 'type'], ascending=[True, True])

sp500_history = sp500_history.drop_duplicates()
sp500_history = sp500_history.reset_index(drop=True)

sp500_history

Unnamed: 0,announcement_date,implementation_date,effective_date,ticker,name,type
0,2015-12-28,2016-01-04,NaT,TW,Willis Towers Watson,ADDED
1,2015-12-28,2016-01-04,NaT,FOSL,Fossil Group,DELETED
2,2016-01-13,2016-01-15,NaT,EXR,Extra Space Storage,ADDED
3,2016-01-13,2016-01-15,NaT,CB,Chubb,DELETED
4,2016-01-22,2016-01-29,NaT,FRT,Federal Realty Trust,ADDED
...,...,...,...,...,...,...
151,2020-02-27,NaT,2020-03-03,XEC,Cimarex Energy,DELETED
152,2020-03-31,NaT,2020-04-03,OTIS,Otis Worldwide,ADDED
153,2020-03-31,NaT,2020-04-03,CARR,Carrier Global,ADDED
154,2020-03-31,NaT,2020-04-06,RTN,Raytheon,DELETED


In [4]:
calendar = pd.read_excel('./Nasdaq_Trading_Calendar.xlsx', sheet_name=None)
rebal_dates = []
for year in calendar:
    sheet = calendar[year]
    mask = sheet['S&P Indexes Rebalance S&P 500, S&P 400, and S&P 600'] == 1
    year_rebal_dates = sheet.loc[mask]
    for date in year_rebal_dates['Date'].values:
        rebal_dates.append(date)
pprint(rebal_dates)

[numpy.datetime64('2020-03-20T00:00:00.000000000'),
 numpy.datetime64('2020-06-19T00:00:00.000000000'),
 numpy.datetime64('2020-09-18T00:00:00.000000000'),
 numpy.datetime64('2020-12-18T00:00:00.000000000'),
 numpy.datetime64('2019-03-15T00:00:00.000000000'),
 numpy.datetime64('2019-06-21T00:00:00.000000000'),
 numpy.datetime64('2019-09-20T00:00:00.000000000'),
 numpy.datetime64('2019-12-20T00:00:00.000000000'),
 numpy.datetime64('2018-03-16T00:00:00.000000000'),
 numpy.datetime64('2018-06-15T00:00:00.000000000'),
 numpy.datetime64('2018-09-21T00:00:00.000000000'),
 numpy.datetime64('2018-12-21T00:00:00.000000000'),
 numpy.datetime64('2017-03-17T00:00:00.000000000'),
 numpy.datetime64('2017-06-16T00:00:00.000000000'),
 numpy.datetime64('2017-09-15T00:00:00.000000000'),
 numpy.datetime64('2017-12-15T00:00:00.000000000'),
 numpy.datetime64('2016-03-18T00:00:00.000000000'),
 numpy.datetime64('2016-06-17T00:00:00.000000000'),
 numpy.datetime64('2016-09-16T00:00:00.000000000'),
 numpy.datet

In [61]:
# strategy_type is eiter 'announcement' or 'implementation'
# rebal_type is 'regular' or 'ad_hoc'
# add_delete is 'add' or 'delete'
# entry_date and exit_date are in terms of number of days before (-) or after (+) effective date
# entry_time and exit_time are 'Open' or 'Close'
strategy_attributes = ['strategy_type', 'rebal_type', 'add_delete', 'entry_date', 'entry_time', 'exit_date', 'exit_time']

output_columns = ['announcement_date', 'implementation_date', 'effective_date']
output_columns.extend(strategy_attributes)
output_columns.extend(['total', 'up', 'count'])

df_output = pd.DataFrame(columns=output_columns)

In [39]:
def update_output_table(df_output, ann_date, impl_date, eff_date, strategy_type, rebal_type, add_delete, 
                        entry_date, entry_time, exit_date, exit_time, price_change):
    if price_change > 0:
        up = 1
    else:
        up = 0

    conditions = ((df_output['announcement_date']==ann_date) &
                (df_output['implementation_date']==impl_date) & 
                (df_output['effective_date']==eff_date) & 
                (df_output['strategy_type']==strategy_type) & 
                 (df_output['rebal_type']==rebal_type) & 
                 (df_output['add_delete']==add_delete) &
                (df_output['entry_date']==entry_date) & 
                (df_output['entry_time']==entry_time) & 
                (df_output['exit_date']==exit_date) & 
                (df_output['exit_time']==exit_time))

    # new row
    if not conditions.any():
        data = [{'announcement_date': ann_date, 'implementation_date': impl_date, 'effective_date': eff_date,
                 'strategy_type': strategy_type, 'rebal_type': rebal_type, 'add_delete': add_delete, 
                 'entry_date': entry_date, 'entry_time': entry_time, 
                 'exit_date': exit_date, 'exit_time': exit_time, 
                 'total': price_change, 'up': up, 'count': 1}]
        df_output = df_output.append(pd.DataFrame(data), ignore_index=True)

    # updating existing row
    else:
        df_output.loc[conditions, 'total'] += price_change
        df_output.loc[conditions, 'up'] += up
        df_output.loc[conditions, 'count'] += 1
    
    return df_output

In [41]:
def find_price_at_index(prices, date_index, index_offset, time_of_day):
    '''
    Returns -1 if index not valid
    '''
    adj_index = date_index + index_offset
    if adj_index < 0 or adj_index >= len(prices):
        return -1
    price = prices.iloc[adj_index][time_of_day]
    if isnan(price):
        return -1
    return price

In [42]:
def find_price_change(prices, date_index, entry_date, entry_time, exit_date, exit_time):
    '''
    Returns percentage change in price between entry and exit
    Or NaN if anything invalid
    '''
    entry_price = find_price_at_index(prices, date_index, entry_date, entry_time)
    if entry_price < 0 or isnan(entry_price):
        return float('NaN')
    exit_price = find_price_at_index(prices, date_index, exit_date, exit_time)
    if exit_price < 0 or isnan(exit_price):
        return float('NaN')
    return exit_price / entry_price - 1 

In [60]:
def analyze_strategies(times_of_day, strategy_type, prices, df_output, ann_date, impl_date, eff_date, 
                       rebal_type, add_delete):
    # Find date index in the prices
    try:
        if strategy_type == 'announcement':
            date_index = prices.index.get_loc(ann_date, method='backfill')
        elif strategy_type == 'implementation':
            date_index = prices.index.get_loc(impl_date)
        else:
            print('Wrong strategy type!')
            print(strategy_type)
            return df_output
    except:
        print("Error: No price for date was found among yahoo prices")
        print('strategy_type:' + strategy_type)
        print('announcement_date:' + str(ann_date))
        print('implementation_date:' + str(impl_date))
        print(prices)
        return df_output
    if date_index < 0:
        return df_output
    
    # Enter before the implementation date, and exit on the implementation date
    # Not applicable for announcement date analysis
    if strategy_type == 'implementation':
        entry_date = max(-4, -date_index)
        exit_date = 0
        while entry_date < 0:
            for time_of_day in times_of_day:
                entry_time = time_of_day

                price_change = find_price_change(prices, date_index, entry_date, entry_time, exit_date, exit_time)
                if isnan(price_change):
                    continue

                df_output = update_output_table(df_output, ann_date, impl_date, eff_date, strategy_type, rebal_type,
                            add_delete, entry_date, entry_time, exit_date, exit_time, price_change)

            entry_date += 1

    # Enter on the announcement/implementation date, and exit afterwards
    entry_date = 0
    exit_date = min(4, len(prices) - 1)
    while exit_date > 0:
        for time_of_day in times_of_day:
            # If announcement date falls on a trading day (i.e. has prices),
            # it's not possible to enter at market opening
            if prices.iloc[date_index].name == ann_date and time_of_day == 'Open':
                continue
            
            entry_time = time_of_day

            price_change = find_price_change(prices, date_index, entry_date, entry_time, exit_date, exit_time)
            if isnan(price_change):
                continue

            df_output = update_output_table(df_output, ann_date, impl_date, eff_date, strategy_type, rebal_type,
                        add_delete, entry_date, entry_time, exit_date, exit_time, price_change)

        exit_date -= 1
    return df_output

In [62]:
days_bef_aft = 20 # the range of days before and after effective date to pull from Yahoo finance

times_of_day = ['Open', 'Close']

has_impl_date = False

for idx, row in sp500_history.iterrows():
    '''
    Main Loop
    '''
    # Has implementation date, but not effective date
    if pd.isna(row['effective_date']) and not pd.isna(row['implementation_date']):
        has_impl_date = True
        impl_date = row['implementation_date']

    # Has effective date but not implementation date
    elif pd.isna(row['implementation_date']) and not pd.isna(row['effective_date']):
        has_impl_date = False
        eff_date = row['effective_date']
        
    else:
        print("Error: Has neither implementation or effective date.")
        print(row)
        continue
    
    tickers = row['ticker'].split(';')
    for ticker in tickers:
        
        # End date is exclusive, so need to increase by 1
        if has_impl_date:
            impl_prices = pdr.get_data_yahoo(ticker, start=impl_date - timedelta(days=days_bef_aft), 
                                             end=impl_date + timedelta(days=days_bef_aft + 1))
        else:
            impl_prices = pdr.get_data_yahoo(ticker, start=eff_date - timedelta(days=days_bef_aft), 
                                             end=eff_date + timedelta(days=days_bef_aft + 1))
        
        if len(impl_prices) == 0:
            continue

        # Find the implementation date or effective date whichever is not available in the data
        if has_impl_date:
            try:
                eff_date_index = impl_prices.index.get_loc(impl_date + timedelta(days=1),method='backfill')
            except:
                print("Error: No date after implementation date was found among yahoo impl_prices")
                print(impl_date)
                continue
            if eff_date_index < 0:
                continue
            eff_date = impl_prices.iloc[eff_date_index].name
            
        else:
            try:
                impl_date_index = impl_prices.index.get_loc(eff_date - timedelta(days=1),method='pad')
            except:
                print("Error: No date before effective date was found among yahoo impl_prices")
                print(eff_date)
                continue
            if impl_date_index < 0:
                print('Error: implementation date not in range')
                continue
            impl_date = impl_prices.iloc[impl_date_index].name

        if impl_date in rebal_dates:
            rebal_type = 'regular'
        else:
            rebal_type = 'ad_hoc'

        if row['type'] == 'ADDED':
            add_delete = 'add'
        else:
            add_delete = 'delete'
        
        # Sanity check
        print('impl_date: ' + str(impl_date))
        print('eff_date: ' + str(eff_date))
        if impl_date == eff_date:
            print('Wrong!')
            
        # Analyze implementation date strategies:
        strategy_type = 'implementation'
        df_output = analyze_strategies(times_of_day, strategy_type, impl_prices, df_output, row['announcement_date'], 
                           impl_date, eff_date, rebal_type, add_delete)
        
        # Analyze announcement date strategies:
        strategy_type = 'announcement'
        ann_prices = pdr.get_data_yahoo(ticker, start=row['announcement_date'] - timedelta(days=days_bef_aft), 
                                        end=row['announcement_date'] + timedelta(days=days_bef_aft + 1))
        if len(ann_prices) == 0:
            continue
        df_output = analyze_strategies(times_of_day, strategy_type, ann_prices, df_output, row['announcement_date'], 
                           impl_date, eff_date, rebal_type, add_delete)
        

[*********************100%***********************]  1 of 1 completed

1 Failed download:
- TW: Data doesn't exist for startDate = 1450108800, endDate = 1453651200
[*********************100%***********************]  1 of 1 completed
impl_date: 2016-01-04 00:00:00
eff_date: 2016-01-05 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2016-01-15 00:00:00
eff_date: 2016-01-19 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2016-01-15 00:00:00
eff_date: 2016-01-19 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2016-01-29 00:00:00
eff_date: 2016-02-01 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***************

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2016-06-30 00:00:00
eff_date: 2016-07-01 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2016-09-02 00:00:00
eff_date: 2016-09-06 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2016-09-02 00:00:00
eff_date: 2016-09-06 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2016-09-30 00:00:00
eff_date: 2016-10-03 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2016-09-30 00:00:00
eff_date: 2016-10-03 00:00:00
[****

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2018-05-30 00:00:00
eff_date: 2018-05-31 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- WYN: No data found for this date range, symbol may be delisted
[*********************100%***********************]  1 of 1 completed
impl_date: 2018-06-06 00:00:00
eff_date: 2018-06-07 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2018-06-06 00:00:00
eff_date: 2018-06-07 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2018-06-15 00:00:00
eff_date: 2018-06-18 00:00:00
[*********************100%***********************]

impl_date: 2019-03-19 00:00:00
eff_date: 2019-03-20 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2019-03-19 00:00:00
eff_date: 2019-03-20 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2019-04-01 00:00:00
eff_date: 2019-04-02 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2019-04-02 00:00:00
eff_date: 2019-04-03 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2019-05-31 00:00:00
eff_date: 2019-06-03 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_

In [63]:
df_output

Unnamed: 0,announcement_date,implementation_date,effective_date,strategy_type,rebal_type,add_delete,entry_date,entry_time,exit_date,exit_time,total,up,count
0,2015-12-28,2016-01-04,2016-01-05,implementation,ad_hoc,delete,-4,Open,0,Close,-0.081436,0,1
1,2015-12-28,2016-01-04,2016-01-05,implementation,ad_hoc,delete,-4,Close,0,Close,-0.065413,0,1
2,2015-12-28,2016-01-04,2016-01-05,implementation,ad_hoc,delete,-3,Open,0,Close,-0.077234,0,1
3,2015-12-28,2016-01-04,2016-01-05,implementation,ad_hoc,delete,-3,Close,0,Close,-0.075243,0,1
4,2015-12-28,2016-01-04,2016-01-05,implementation,ad_hoc,delete,-2,Open,0,Close,-0.074993,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2000,2020-03-31,2020-04-03,2020-04-06,implementation,ad_hoc,delete,0,Close,1,Close,0.170478,1,1
2001,2020-03-31,2020-04-03,2020-04-06,announcement,ad_hoc,delete,0,Close,4,Close,0.146640,1,1
2002,2020-03-31,2020-04-03,2020-04-06,announcement,ad_hoc,delete,0,Close,3,Close,-0.020367,0,1
2003,2020-03-31,2020-04-03,2020-04-06,announcement,ad_hoc,delete,0,Close,2,Close,-0.093686,0,1


In [64]:
def get_uniques(input_list):
    # insert the list to the set 
    unique_set = set(input_list) 
    # convert the set to the list 
    unique_list = (list(unique_set)) 
    return unique_list

In [65]:
'''
Calculate population statistics
'''

stats_df_columns = []
stats_df_columns.extend(strategy_attributes)
stats_df_columns.extend(['total', 'up', 'count', 'mean', 'std', 'max', 'min', 'sharpe', 'uppct'])

stats_df = pd.DataFrame(columns=stats_df_columns)

# get the unique values for each strategy attribute
strategy_attribute_unique_values = map(lambda strategy_attribute: get_uniques(df_output[strategy_attribute]), strategy_attributes)   

# find strategies consisting of different combinations of unique values for each strategy attribute
strategies = list(itertools.product(*strategy_attribute_unique_values)) 

for strategy in strategies:
    # find the relevant rows for each strategy 
    relevant_rows = df_output.loc[(df_output[strategy_attributes]==strategy).all(axis=1), :]
    if len(relevant_rows) == 0:
        continue
    
    relevant_rows_sum = relevant_rows.loc[:, 'total':].sum(axis=0)
    mean = relevant_rows_sum['total'] / relevant_rows_sum['count']
    if relevant_rows_sum['count'] > 1:
        std = statistics.stdev(relevant_rows['total'])
    else:
        std = 0
    max_val = max(relevant_rows['total'])
    min_val = min(relevant_rows['total'])
    if std == 0:
        sharpe = mean / 0.00001 * np.sqrt(252)
    else:
        sharpe = mean / std * np.sqrt(252)
    uppct = relevant_rows_sum['up'] / relevant_rows_sum['count']
    strategy_dict = dict(zip(strategy_attributes, strategy)) 
    data = {'total': relevant_rows_sum['total'], 
            'up': relevant_rows_sum['up'], 'count': relevant_rows_sum['count'], 
            'mean': mean, 'std': std, 'max': max_val, 'min': min_val, 'sharpe': sharpe, 'uppct': uppct} 
    data.update(strategy_dict)
    stats_df = stats_df.append(pd.DataFrame([data]), ignore_index=True)


In [66]:
'''
Writing to excel
'''
writer = pd.ExcelWriter('sp500_announcement_analysis.xlsx', engine='xlsxwriter')
df_output.to_excel(writer, sheet_name='data')
stats_df.to_excel(writer, sheet_name='strategy_stats')
writer.save()