In [3]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
from pandas_datareader import data as pdr
yf.pdr_override()
import numpy as np
import statistics
from math import isnan
import itertools

In [4]:
sp500_history = pd.read_excel('./sp500_rebalance_announcements.xlsx')
sp500_history['implementation_date'] = pd.to_datetime(sp500_history['implementation_date']) 
sp500_history['effective_date'] = pd.to_datetime(sp500_history['effective_date']) 
sp500_history['announcement_date'] = pd.to_datetime(sp500_history['announcement_date']) 
sp500_history = sp500_history[['announcement_date', 'implementation_date', 'effective_date', 'ticker', 'name', 'type']]

start_date = datetime.strptime('2015-1-1', '%Y-%m-%d')
end_date = datetime.strptime('2020-12-31', '%Y-%m-%d')

# greater than the start date and smaller than the end date
mask = (sp500_history['announcement_date'] >= start_date) & (sp500_history['announcement_date'] <= end_date)
sp500_history = sp500_history.loc[mask]

# Sort
sp500_history = sp500_history.sort_values(['announcement_date', 'type'], ascending=[True, True])

sp500_history = sp500_history.drop_duplicates()
sp500_history = sp500_history.reset_index(drop=True)

sp500_history

Unnamed: 0,announcement_date,implementation_date,effective_date,ticker,name,type
0,2015-01-21,2015-01-26,NaT,ENDP,Endo International,ADDED
1,2015-01-21,2015-01-26,NaT,COV,Covidien,DELETED
2,2015-03-09,2015-03-11,NaT,SWKS,Skyworks Solutions,ADDED
3,2015-03-09,2015-03-11,NaT,PETM,PetSmart,DELETED
4,2015-03-13,2015-03-17,NaT,HSIC,Henry Schein,ADDED
...,...,...,...,...,...,...
211,2020-05-06,NaT,2020-05-12,DPZ,Domino’s Pizza,ADDED
212,2020-05-06,NaT,2020-05-12,AGN,Allergan,DELETED
213,2020-05-06,NaT,2020-05-12,CPRI,Capri Holdings,DELETED
214,2020-05-18,NaT,2020-05-22,WST,West Pharmaceutical Services,ADDED


In [5]:
calendar = pd.read_excel('./Nasdaq_Trading_Calendar.xlsx', sheet_name=None)
rebal_dates = []
for year in calendar:
    sheet = calendar[year]
    mask = sheet['S&P Indexes Rebalance S&P 500, S&P 400, and S&P 600'] == 1
    year_rebal_dates = sheet.loc[mask]
    for date in year_rebal_dates['Date'].values:
        rebal_dates.append(date)
calendar_2015 = pd.read_excel('2015_rebal_dates.xlsx')
rebal_dates = pd.DataFrame(data=rebal_dates)
rebal_dates = rebal_dates.append(calendar_2015).reset_index(drop=True)
rebal_dates = rebal_dates[0]

In [6]:
# strategy_type is eiter 'announcement' or 'implementation'
# rebal_type is 'regular' or 'ad_hoc'
# add_delete is 'add' or 'delete'
# entry_date and exit_date are in terms of number of days before (-) or after (+) effective date
# entry_time and exit_time are 'Open' or 'Close'
strategy_attributes = ['strategy_type', 'rebal_type', 'add_delete', 'entry_date', 'entry_time', 'exit_date', 'exit_time']

output_columns = ['announcement_date', 'implementation_date', 'effective_date']
output_columns.extend(strategy_attributes)
output_columns.extend(['total', 'up', 'count'])

df_output = pd.DataFrame(columns=output_columns)

In [7]:
def update_output_table(df_output, ann_date, impl_date, eff_date, strategy_type, rebal_type, add_delete, 
                        entry_date, entry_time, exit_date, exit_time, price_change):
    if price_change > 0:
        up = 1
    else:
        up = 0

    conditions = ((df_output['announcement_date']==ann_date) &
                (df_output['implementation_date']==impl_date) & 
                (df_output['effective_date']==eff_date) & 
                (df_output['strategy_type']==strategy_type) & 
                 (df_output['rebal_type']==rebal_type) & 
                 (df_output['add_delete']==add_delete) &
                (df_output['entry_date']==entry_date) & 
                (df_output['entry_time']==entry_time) & 
                (df_output['exit_date']==exit_date) & 
                (df_output['exit_time']==exit_time))

    # new row
    if not conditions.any():
        data = [{'announcement_date': ann_date, 'implementation_date': impl_date, 'effective_date': eff_date,
                 'strategy_type': strategy_type, 'rebal_type': rebal_type, 'add_delete': add_delete, 
                 'entry_date': entry_date, 'entry_time': entry_time, 
                 'exit_date': exit_date, 'exit_time': exit_time, 
                 'total': price_change, 'up': up, 'count': 1}]
        df_output = df_output.append(pd.DataFrame(data), ignore_index=True)

    # updating existing row
    else:
        df_output.loc[conditions, 'total'] += price_change
        df_output.loc[conditions, 'up'] += up
        df_output.loc[conditions, 'count'] += 1
    
    return df_output

In [8]:
def find_price_at_index(prices, date_index, index_offset, time_of_day):
    '''
    Returns -1 if index not valid
    '''
    adj_index = date_index + index_offset
    if adj_index < 0 or adj_index >= len(prices):
        return -1
    price = prices.iloc[adj_index][time_of_day]
    if isnan(price):
        return -1
    return price

In [9]:
def find_price_change(prices, date_index, entry_date, entry_time, exit_date, exit_time):
    '''
    Returns percentage change in price between entry and exit
    Or NaN if anything invalid
    '''
    entry_price = find_price_at_index(prices, date_index, entry_date, entry_time)
    if entry_price < 0 or isnan(entry_price):
        return float('NaN')
    exit_price = find_price_at_index(prices, date_index, exit_date, exit_time)
    if exit_price < 0 or isnan(exit_price):
        return float('NaN')
    return exit_price / entry_price - 1 

In [12]:
def analyze_strategies(times_of_day, strategy_type, prices, df_output, ann_date, impl_date, eff_date, 
                       rebal_type, add_delete):
    # Find date index in the prices
    try:
        if strategy_type == 'announcement':
            date_index = prices.index.get_loc(ann_date, method='ffill')
        elif strategy_type == 'implementation':
            date_index = prices.index.get_loc(impl_date)
        else:
            print('Wrong strategy type!')
            print(strategy_type)
            return df_output
    except:
        print("Error: No price for date was found among yahoo prices")
        print('strategy_type:' + strategy_type)
        print('announcement_date:' + str(ann_date))
        print('implementation_date:' + str(impl_date))
        print(prices)
        return df_output
    if date_index < 0:
        return df_output
    
    # For implementation strategy, enter before the implementation date, and exit on implementation date
    # For announcement strategy, enter before or on the announcement date, and exit one day after the announcement date
    exit_date = 0 if strategy_type == 'implementation' else 1
    entry_date = max(-4, -date_index)
    while entry_date <= (0 if strategy_type == 'implementation' else 1):
        for time_of_day in times_of_day:
            entry_time = time_of_day
    
            for time_of_day2 in times_of_day:
                exit_time = time_of_day2
                
                # Don't enter and exit at the same date and time, or exit before enter
                if entry_date == exit_date and (entry_time == exit_time or (entry_time == 'Close' and exit_time == 'Open')):
                    continue
                
                price_change = find_price_change(prices, date_index, entry_date, entry_time, exit_date, exit_time)
                if isnan(price_change):
                    continue

                df_output = update_output_table(df_output, ann_date, impl_date, eff_date, strategy_type, rebal_type,
                            add_delete, entry_date, entry_time, exit_date, exit_time, price_change)

        entry_date += 1

    # For implementation strategy, enter on the implementation date, and exit afterwards
    # For announcement strategy, enter on one day after announcement date, and exit afterwards
    entry_date = 0 if strategy_type == 'implementation' else 1
    exit_date = min(4 if strategy_type == 'implementation' else 5, len(prices) - 1)
    while exit_date > (0 if strategy_type == 'announcement' else -1):
        for time_of_day in times_of_day:
            entry_time = time_of_day
    
            for time_of_day2 in times_of_day:
                exit_time = time_of_day2
                
                # Don't enter and exit at the same date and time, or exit before enter
                if entry_date == exit_date and (entry_time == exit_time or (entry_time == 'Close' and exit_time == 'Open')):
                    continue
            
                price_change = find_price_change(prices, date_index, entry_date, entry_time, exit_date, exit_time)
                if isnan(price_change):
                    continue

                df_output = update_output_table(df_output, ann_date, impl_date, eff_date, strategy_type, rebal_type,
                            add_delete, entry_date, entry_time, exit_date, exit_time, price_change)

        exit_date -= 1
    return df_output

In [13]:
days_bef_aft = 20 # the range of days before and after effective date to pull from Yahoo finance

times_of_day = ['Open', 'Close']

has_impl_date = False

for idx, row in sp500_history.iterrows():
    '''
    Main Loop
    '''
    # Has implementation date, but not effective date
    if pd.isna(row['effective_date']) and not pd.isna(row['implementation_date']):
        has_impl_date = True
        impl_date = row['implementation_date']

    # Has effective date but not implementation date
    elif pd.isna(row['implementation_date']) and not pd.isna(row['effective_date']):
        has_impl_date = False
        eff_date = row['effective_date']
        
    else:
        print("Error: Has neither implementation or effective date.")
        print(row)
        continue
    
    tickers = row['ticker'].split(';')
    for ticker in tickers:
        
        # End date is exclusive, so need to increase by 1
        if has_impl_date:
            impl_prices = pdr.get_data_yahoo(ticker, start=impl_date - timedelta(days=days_bef_aft), 
                                             end=impl_date + timedelta(days=days_bef_aft + 1))
        else:
            impl_prices = pdr.get_data_yahoo(ticker, start=eff_date - timedelta(days=days_bef_aft), 
                                             end=eff_date + timedelta(days=days_bef_aft + 1))
        
        if len(impl_prices) == 0:
            continue

        # Find the implementation date or effective date whichever is not available in the data
        if has_impl_date:
            try:
                eff_date_index = impl_prices.index.get_loc(impl_date + timedelta(days=1),method='backfill')
            except:
                print("Error: No date after implementation date was found among yahoo impl_prices")
                print(impl_date)
                continue
            if eff_date_index < 0:
                continue
            eff_date = impl_prices.iloc[eff_date_index].name
            
        else:
            try:
                impl_date_index = impl_prices.index.get_loc(eff_date - timedelta(days=1),method='pad')
            except:
                print("Error: No date before effective date was found among yahoo impl_prices")
                print(eff_date)
                continue
            if impl_date_index < 0:
                print('Error: implementation date not in range')
                continue
            impl_date = impl_prices.iloc[impl_date_index].name

        if (impl_date == rebal_dates).any():
            rebal_type = 'regular'
        else:
            rebal_type = 'ad_hoc'

        if row['type'] == 'ADDED':
            add_delete = 'add'
        else:
            add_delete = 'delete'
        
        # Sanity check
        print('impl_date: ' + str(impl_date))
        print('eff_date: ' + str(eff_date))
        if impl_date == eff_date:
            print('Wrong!')
            
        # Analyze implementation date strategies:
        strategy_type = 'implementation'
        df_output = analyze_strategies(times_of_day, strategy_type, impl_prices, df_output, row['announcement_date'], 
                           impl_date, eff_date, rebal_type, add_delete)
        
        # Analyze announcement date strategies:
        strategy_type = 'announcement'
        ann_prices = pdr.get_data_yahoo(ticker, start=row['announcement_date'] - timedelta(days=days_bef_aft), 
                                        end=row['announcement_date'] + timedelta(days=days_bef_aft + 1))
        if len(ann_prices) == 0:
            continue
        df_output = analyze_strategies(times_of_day, strategy_type, ann_prices, df_output, row['announcement_date'], 
                           impl_date, eff_date, rebal_type, add_delete)
        

[*********************100%***********************]  1 of 1 completed
impl_date: 2015-01-26 00:00:00
eff_date: 2015-01-27 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2015-01-26 00:00:00
eff_date: 2015-01-27 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2015-03-11 00:00:00
eff_date: 2015-03-12 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- PETM: Data doesn't exist for startDate = 1424275200, endDate = 1427817600
[*********************100%***********************]  1 of 1 completed
impl_date: 2015-03-17 00:00:00
eff_date: 2015-03-18 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%*************

[*********************100%***********************]  1 of 1 completed

1 Failed download:
- TW: Data doesn't exist for startDate = 1450108800, endDate = 1453651200
[*********************100%***********************]  1 of 1 completed
impl_date: 2016-01-04 00:00:00
eff_date: 2016-01-05 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2016-01-15 00:00:00
eff_date: 2016-01-19 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2016-01-15 00:00:00
eff_date: 2016-01-19 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2016-01-29 00:00:00
eff_date: 2016-02-01 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***************

[*********************100%***********************]  1 of 1 completed
Error: No price for date was found among yahoo prices
strategy_type:announcement
announcement_date:2016-06-23 00:00:00
implementation_date:2016-07-01 00:00:00
                 Open       High        Low      Close  Adj Close   Volume
Date                                                                      
2016-07-05  49.400002  54.340000  48.395000  48.599998  47.747356  6230700
2016-07-06  47.070000  48.599998  46.290001  47.750000  46.912273  6706500
2016-07-07  47.619999  48.000000  47.345001  47.740002  46.902447  5486800
2016-07-08  47.959999  50.610001  47.529999  50.200001  49.319286  4243500
2016-07-11  49.209999  51.230000  48.980000  51.119999  50.223148  6042300
2016-07-12  51.000000  51.060001  50.259998  50.570000  49.682796  5552900
2016-07-13  50.570000  51.099998  49.830002  50.619999  49.731918  4864700
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- TE: No

[*********************100%***********************]  1 of 1 completed
impl_date: 2017-07-25 00:00:00
eff_date: 2017-07-26 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2017-07-25 00:00:00
eff_date: 2017-07-26 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2017-07-25 00:00:00
eff_date: 2017-07-26 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2017-07-25 00:00:00
eff_date: 2017-07-26 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2017-07-25 00:00:00
eff_date: 2017-07-26 00:00:00
[*********************100%***********************]  1 of 1 completed
[****

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2018-09-28 00:00:00
eff_date: 2018-10-01 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2018-09-28 00:00:00
eff_date: 2018-10-01 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2018-10-10 00:00:00
eff_date: 2018-10-11 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2018-10-10 00:00:00
eff_date: 2018-10-11 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2018-11-05 00:00:00
eff_date: 2018-11-06 00:00:00
[****

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2019-10-02 00:00:00
eff_date: 2019-10-03 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
impl_date: 2019-11-20 00:00:00
eff_date: 2019-11-21 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- CELG: No data found, symbol may be delisted
[*********************100%***********************]  1 of 1 completed
impl_date: 2019-12-04 00:00:00
eff_date: 2019-12-05 00:00:00
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- VIAB: No data found, symbol may be delisted
[*********************100%***********************]  1 of 1 compl

In [14]:
df_output

Unnamed: 0,announcement_date,implementation_date,effective_date,strategy_type,rebal_type,add_delete,entry_date,entry_time,exit_date,exit_time,total,up,count
0,2015-01-21,2015-01-26,2015-01-27,implementation,ad_hoc,add,-4,Open,0,Open,-0.022750,0,1
1,2015-01-21,2015-01-26,2015-01-27,implementation,ad_hoc,add,-4,Open,0,Close,-0.016037,0,1
2,2015-01-21,2015-01-26,2015-01-27,implementation,ad_hoc,add,-4,Close,0,Open,-0.029985,0,1
3,2015-01-21,2015-01-26,2015-01-27,implementation,ad_hoc,add,-4,Close,0,Close,-0.023322,0,1
4,2015-01-21,2015-01-26,2015-01-27,implementation,ad_hoc,add,-3,Open,0,Open,-0.025415,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9371,2020-05-18,2020-05-21,2020-05-22,announcement,ad_hoc,delete,1,Close,3,Close,-0.010610,0,1
9372,2020-05-18,2020-05-21,2020-05-22,announcement,ad_hoc,delete,1,Open,2,Open,-0.033500,0,1
9373,2020-05-18,2020-05-21,2020-05-22,announcement,ad_hoc,delete,1,Open,2,Close,-0.061500,0,1
9374,2020-05-18,2020-05-21,2020-05-22,announcement,ad_hoc,delete,1,Close,2,Open,0.025464,1,1


In [15]:
def get_uniques(input_list):
    # insert the list to the set 
    unique_set = set(input_list) 
    # convert the set to the list 
    unique_list = (list(unique_set)) 
    return unique_list

In [16]:
'''
Calculate population statistics
'''

stats_df_columns = []
stats_df_columns.extend(strategy_attributes)
stats_df_columns.extend(['total', 'up', 'count', 'mean', 'std', 'max', 'min', 'sharpe', 'uppct'])

stats_df = pd.DataFrame(columns=stats_df_columns)

# get the unique values for each strategy attribute
strategy_attribute_unique_values = map(lambda strategy_attribute: get_uniques(df_output[strategy_attribute]), strategy_attributes)   

# find strategies consisting of different combinations of unique values for each strategy attribute
strategies = list(itertools.product(*strategy_attribute_unique_values)) 

for strategy in strategies:
    # find the relevant rows for each strategy 
    relevant_rows = df_output.loc[(df_output[strategy_attributes]==strategy).all(axis=1), :]
    if len(relevant_rows) == 0:
        continue
    
    relevant_rows_sum = relevant_rows.loc[:, 'total':].sum(axis=0)
    mean = relevant_rows_sum['total'] / relevant_rows_sum['count']
    if relevant_rows_sum['count'] > 1:
        std = statistics.stdev(relevant_rows['total'])
    else:
        std = 0
    max_val = max(relevant_rows['total'])
    min_val = min(relevant_rows['total'])
    if std == 0:
        sharpe = mean / 0.00001 * np.sqrt(252)
    else:
        sharpe = mean / std * np.sqrt(252)
    uppct = relevant_rows_sum['up'] / relevant_rows_sum['count']
    strategy_dict = dict(zip(strategy_attributes, strategy)) 
    data = {'total': relevant_rows_sum['total'], 
            'up': relevant_rows_sum['up'], 'count': relevant_rows_sum['count'], 
            'mean': mean, 'std': std, 'max': max_val, 'min': min_val, 'sharpe': sharpe, 'uppct': uppct} 
    data.update(strategy_dict)
    stats_df = stats_df.append(pd.DataFrame([data]), ignore_index=True)


In [17]:
'''
Writing to excel
'''
writer = pd.ExcelWriter('sp500_announcement_analysis.xlsx', engine='xlsxwriter')
df_output.to_excel(writer, sheet_name='data', index=False)
stats_df.to_excel(writer, sheet_name='strategy_stats', index=False)
writer.save()