In [None]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pyreadstat
from sklearn.linear_model import LinearRegression
import requests

In [None]:
#load funds index/non-index data
crsp_data = pd.read_excel("Data/CRSP_fund.xlsx")
crsp_data

In [None]:
print(crsp_data.columns)

In [None]:
#subset data and rename columns for ease
crsp_data2 = crsp_data[['Date','Fund Identifier','CUSIP (8-digit)','Fund Name', 'index_fund_flag']]
crsp_cols = ['Date', 'fundid(CRSP)', 'fund_CUSIP', 'fundname(CRSP)', 'indexflag']
crsp_data2.columns = crsp_cols

In [None]:
#Only select the index funds (B and D)
index_flag_funds = crsp_data2[crsp_data2['indexflag'].isin(['D'])]
index_flag_funds

In [None]:
#load funds holdings data
lseg_data, lseg_meta = pyreadstat.read_dta("Data/LSEG_fund.dta")
lseg_data

In [None]:
lseg_data.columns

In [None]:
#subset data and rename columns for ease
lseg_data2 = lseg_data[lseg_data['shrout2'].isna() == False]
lseg_data3 = lseg_data2[['fdate','fundno', 'fundname','cusip','stkname', 'ticker','shrout2']]
lseg_cols = ['Date', 'fundid(LSEG)', 'fundname(LSEG)', 'stock_CUSIP', 'stock_name', 'ticker', 'shares_outstanding']
lseg_data3.columns = lseg_cols
lseg_data3

In [None]:
#convert date columns to datetime format
index_flag_funds['Date'] = pd.to_datetime(index_flag_funds['Date'])
lseg_data3['Date'] = pd.to_datetime(lseg_data3['Date'])

In [None]:
#filter data from 2021 onwards
lseg_data3 = lseg_data3[lseg_data3['Date'] >= "2021-01-01"]
lseg_data3

In [None]:
#load WFICN to CRSP mapping data
CRSP_WFICN = pd.read_excel("Data/CRSP_WFICN.xlsx")
CRSP_WFICN2 = CRSP_WFICN[['Fund Identifier', 'Wharton Financial Instution Center Number']]
CRSP_WFICN2.columns = ['fundid(CRSP)', 'wficn']
CRSP_WFICN2

In [None]:
#merge with index funds data
index_flag_funds2 = pd.merge(index_flag_funds, CRSP_WFICN2, how = "inner", on="fundid(CRSP)")
index_flag_funds2

In [None]:
#load WFICN to LSEG mapping data
FUNDNO_WFICN = pd.read_stata("Data/FUNDO_WFICN.dta")
FUNDNO_WFICN2 = FUNDNO_WFICN[['fundno', 'wficn']]
FUNDNO_WFICN2.columns = ['fundid(LSEG)', 'wficn']
FUNDNO_WFICN2

In [None]:
#remove duplicates and missing values
FUNDNO_WFICN3 = FUNDNO_WFICN2.drop_duplicates(subset=['wficn', 'fundid(LSEG)'], keep='first').reset_index(drop=True)
FUNDNO_WFICN3 = FUNDNO_WFICN3.dropna()
FUNDNO_WFICN3

In [None]:
#merge with funds holdings data
lseg_data4 = pd.merge(lseg_data3, FUNDNO_WFICN3, how = "left", on = 'fundid(LSEG)')
lseg_data4 = lseg_data4.dropna()
lseg_data4

In [None]:
#make funds dataset
merged_data = pd.merge(index_flag_funds2, lseg_data4, on=['Date', 'wficn'], how='inner')
merged_data

In [None]:
#sum over to get each stock passively held
passive_data = (merged_data.groupby(['Date', 'ticker'])['shares_outstanding'].sum().reset_index())

In [None]:
#subset data and rename columns for ease
passive_data.columns = ['Date', 'ticker', 'shares_held_passive']
passive_data_21 = passive_data.copy()
passive_data_21

In [None]:
#write to csv so that don't need to load the big datasets again
passive_data_21.to_csv("Data/passive_21.csv")

In [None]:
#Read data, new starting point
passive_data_21 = pd.read_csv("Data/passive_21.csv")

In [None]:
#drop na
passive_data2 = passive_data_21.dropna()
passive_data3 = passive_data2[['Date', 'ticker', 'shares_held_passive']]
passive_data3

In [None]:
#load earnings dates data
earnings_dates = pd.read_excel("Data/LSEG_earnings.xlsx")
earnings_dates

In [None]:
#rename and subset columns for ease
earnings_dates2 = earnings_dates[['oftic', 'Period End Date, SAS Format' ,'Announce Date, SAS Format', 'Announce time, SAS Format']]
earnings_dates2.columns = ['ticker', 'quarter' ,'earnings_date', 'earnings_time']
earnings_dates2

In [None]:
#convert to datetime format
earnings_dates2['earnings_date'] = pd.to_datetime(earnings_dates2['earnings_date'])
earnings_dates2['earnings_time'] = pd.to_datetime(earnings_dates2['earnings_time'], format='%H:%M:%S').dt.time
earnings_dates2

In [None]:
#if earnings time is after mkt close consider it next day
earnings_dates2['nextday'] = earnings_dates2['earnings_time'] > pd.to_datetime('16:30:00', format='%H:%M:%S').time()


In [None]:
#change earnings date to next day if after mkt close
if earnings_dates2['nextday'].any() == True:
    earnings_dates2['earnings_date'] = earnings_dates2['earnings_date'] + pd.Timedelta(days=1)

earnings_dates2

In [None]:
#subset and add additional Date column for merging
earnings_dates3 = earnings_dates2.drop(columns=['earnings_time', 'nextday', 'quarter'])
earnings_dates3['Date'] = earnings_dates3['earnings_date']
earnings_dates3

In [None]:
#load stock data
stock_data = pd.read_stata("Data/daily_stock.dta")
stock_data

In [None]:
#subset to relevant time and convert date to datetime format
stock_data['DlyCalDt'] = pd.to_datetime(stock_data['DlyCalDt'])

In [None]:
#rename columns and create shares outstanding column
stock_colnames = ["permno", "cusip1", "cusip2", "ticker", "permco", "company", "Date2", "Date", "price", "return", "vol", "shares_outstnading(1000s)", "sp_ret"]
stock_data.columns = stock_colnames
stock_data["shares_outstanding"] = stock_data["shares_outstnading(1000s)"] * 1000 

In [None]:
#subset and drop na
stock_data2 = stock_data.drop("shares_outstnading(1000s)", axis = 1)
stock_data3 = stock_data2.dropna()
stock_data3

In [None]:
#find negative prices
negative_prices = stock_data3[(stock_data3.duplicated(subset=['ticker','Date'], keep=False)) &(stock_data3['price'] < 0)]
negative_prices

In [None]:
#remove negative prices
stock_data3 = stock_data3[stock_data3['price'] >= 0]

In [None]:
#drop duplicates keeping the one with highest shares outstanding, after check those seem more reasonable
stock_data3 = stock_data3.sort_values(['ticker','Date','shares_outstanding'], ascending=[True, True, False])
stock_data3 = stock_data3.drop_duplicates(subset=['ticker','Date'], keep='first')

In [None]:
#clean data for merge
passive_data3['ticker'] = passive_data3['ticker'].astype(str)
passive_data3['ticker'] = passive_data3['ticker'].str.upper()
stock_data3['ticker'] = stock_data3['ticker'].astype(str)
stock_data3['ticker'] = stock_data3['ticker'].str.upper()
passive_data3['Date'] = pd.to_datetime(passive_data3['Date'])
stock_data3['Date'] = pd.to_datetime(stock_data3['Date'])
passive_data3 = passive_data3.sort_values(['ticker', 'Date']).reset_index(drop=True)
stock_data3 = stock_data3.sort_values(['ticker', 'Date']).reset_index(drop=True)

In [None]:
#merge datasets
merge1 = pd.merge(stock_data3, passive_data3, on = ['ticker', 'Date'], how = 'left')
merge1

In [None]:
#backfill passive holdings for each stock for each quarter
merge2 = merge1.copy()
merge2['QuarterEnd'] = merge2['Date'].dt.to_period('Q').dt.end_time
merge2['shares_held_passive'] = (merge2.groupby(['ticker', 'QuarterEnd'])['shares_held_passive']
    .transform(lambda x: x.bfill() if x.notna().any() else x))
merge2

In [None]:
#drop na and create share passive column
merge3 = merge2.dropna()
merge3 = merge3.drop(columns=['QuarterEnd'])
merge3['share_passive'] = merge3['shares_held_passive']/merge3['shares_outstanding']
merge3

In [None]:
#prep data for merge
earnings_dates3['ticker'] = earnings_dates3['ticker'].astype(str)
earnings_dates3['ticker'] = earnings_dates3['ticker'].str.upper()
earnings_dates3['Date'] = pd.to_datetime(earnings_dates3['Date'])
earnings_dates3 = earnings_dates3.sort_values(['ticker', 'Date']).reset_index(drop=True)

In [None]:
#merge
merge4 = pd.merge(merge3, earnings_dates3, on = ['Date', 'ticker'], how = 'left')
merge4

In [None]:
#identify stocks with no earnings dates
no_earn_tickers = merge4.groupby('ticker')['earnings_date'].apply(lambda x: x.notna().any() == False)
no_earn_tickers = no_earn_tickers[no_earn_tickers].index.tolist()
len(no_earn_tickers)

In [None]:
#remove stocks with no earnings dates
merge4 = merge4[~merge4['ticker'].isin(no_earn_tickers)].copy()
merge4

In [None]:
#new column for next earnings date
merge4['earnings_next'] = (merge4.groupby('ticker')['earnings_date'].transform(lambda x: x.bfill()))
merge4

In [None]:
#new column for previous earnings date
merge4['earnings_prev'] = (merge4.groupby('ticker')['earnings_date'].transform(lambda x: x.ffill()))
merge4

In [None]:
#days since previous and until next earnings date
merge4['days_since_prev'] = (merge4['Date'] - merge4['earnings_prev']).dt.days
merge4['days_until_next'] = (merge4['earnings_next'] - merge4['Date']).dt.days
merge4

In [None]:
#function to choose nearest earnings date
def choose_nearest(row):
    # If both exist, pick the closer date in absolute time
    if pd.notna(row['earnings_prev']) and pd.notna(row['earnings_next']):
        return row['earnings_prev'] if abs(row['days_since_prev']) <= abs(row['days_until_next']) else row['earnings_next']
    # If only one exists, use it
    elif pd.notna(row['earnings_prev']):
        return row['earnings_prev']
    elif pd.notna(row['earnings_next']):
        return row['earnings_next']
    else:
        return np.nan

In [None]:
#apply the function
merge4['earnings_date_nearest'] = merge4.apply(choose_nearest, axis=1)
merge4

In [None]:
#create day to/from nearest earnings date
merge4['days_from_earn'] = (merge4['Date'] - merge4['earnings_date_nearest']).dt.days
merge4

In [None]:
merge4.columns

In [None]:
#subset data
merge5 = merge4[['Date', 'ticker', 'price', 'vol', 'return', 'share_passive', 'shares_outstanding', 'days_from_earn', 'sp_ret', 'earnings_date_nearest']]
merge5

In [None]:
merge5['return_lag1'] = merge5.groupby('ticker')['return'].shift(1)
merge5['sd_3day'] = merge5.groupby('ticker')['price'].rolling(window=3).std().reset_index(level=0, drop=True)
merge5['3day_vol'] = merge5['sd_3day']
merge5

In [None]:
merge5['sd_5day'] = merge5.groupby('ticker')['price'].rolling(window=5).std().reset_index(level=0, drop=True)
merge5['5day_vol'] = merge5['sd_5day']
merge5

In [None]:
merge5['sd_7day'] = merge5.groupby('ticker')['price'].rolling(window=7).std().reset_index(level=0, drop=True)
merge5['7day_vol'] = merge5['sd_7day']
merge5

In [None]:
merge5 = merge5.dropna()
X = merge5[['return_lag1']]
y = merge5['return']
ar1 = LinearRegression().fit(X, y)
exp_returns_ar1 = ar1.predict(X)
merge5['exp_return_ar1'] = exp_returns_ar1
merge5['abnormal_return_ar1'] = merge5['return'] - merge5['exp_return_ar1']
merge5

In [None]:
merge5 = merge5.drop(['return_lag1', 'sd_3day', 'sd_5day', 'sd_7day', 'exp_return_ar1'], axis = 1)
merge5

In [None]:
merge5['mkt_cap'] = merge5['price'] * merge5['shares_outstanding']
total_mkt_cap = merge5.groupby('Date')['mkt_cap'].transform('sum')
merge5['mkt_weight'] = np.where(total_mkt_cap > 0, merge5['mkt_cap'] / total_mkt_cap, 0)
merge5['mkt_return'] = merge5['return'] * merge5['mkt_weight']
merge5['mkt_return'] = merge5.groupby('Date')['mkt_return'].transform('sum')
merge5

In [None]:
merge5 = merge5.dropna()
X = merge5[['mkt_return']]
y = merge5['return']
market_model = LinearRegression().fit(X, y)
exp_returns_mkt = market_model.predict(X)
merge5['exp_return_mkt'] = exp_returns_mkt
merge5['abnormal_return_mkt'] = merge5['return'] - merge5['exp_return_mkt']
merge5 = merge5.drop(['mkt_cap', 'mkt_weight', 'mkt_return', 'exp_return_mkt'], axis = 1)
merge5

In [None]:
X = merge5[['sp_ret']]
y = merge5['return']
sp_model = LinearRegression().fit(X, y)
exp_returns_sp = sp_model.predict(X)
merge5['exp_return_sp'] = exp_returns_sp
merge5['abnormal_return_sp'] = merge5['return'] - merge5['exp_return_sp']
merge5 = merge5.drop(['exp_return_sp'], axis = 1)
merge5

In [None]:
link = ("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies#S&P_500_component_stocks")
headers = {"User-Agent":"Chrome/120.0.0.0 wikidatasource/1.0 (email@email.com) for thesis"}
response = requests.get(link, headers=headers)
in_sp = pd.read_html(response.text)
sp1 = in_sp[0]
ch_sp = in_sp[1]
sp1

In [None]:
sp2 = sp1[['Symbol']]
sp2['Date'] = "2025-03-31"
sp2.columns = ['ticker', 'Date']
sp2['Date'] = pd.to_datetime(sp2['Date'])
sp3 = sp2.dropna()
sp3

In [None]:
ch_sp

In [None]:
sp_adds = ch_sp.T.reset_index().iloc[0:2, 2:].T
sp_adds.columns = ['Date', 'ticker']
sp_adds['Date'] = pd.to_datetime(sp_adds['Date'])
sp_adds

In [None]:
sp_adds2 = sp_adds[sp_adds['Date'] >= "2021-01-01"]
sp_adds3 = sp_adds2.dropna()
sp_adds3

In [None]:
sp_removed = ch_sp.T.reset_index().iloc[[0,3], 2:].T
sp_removed.columns = ['Date', 'ticker']
sp_removed['Date'] = pd.to_datetime(sp_removed['Date'])
sp_removed

In [None]:
sp_removed2 = sp_removed[sp_removed['Date'] >= "2021-01-01"]
sp_removed3 = sp_removed2.dropna()
sp_removed3

In [None]:
unique_dates = sp_adds3['Date'].unique()
len(unique_dates)

In [None]:
full_dates = pd.date_range(start="2021-01-01", end="2025-03-31", freq="D")
event_dates = sorted(unique_dates, reverse=True)
current_members = set(sp3['ticker'])
sp_dict = {}

for d in event_dates:
    sp_dict[d] = pd.DataFrame({'Date': d, 'ticker': list(current_members)})
    adds = sp_adds3.loc[sp_adds3['Date'] == d, 'ticker']
    removes = sp_removed3.loc[sp_removed3['Date'] == d, 'ticker']
    current_members.difference_update(adds)
    current_members.update(removes)

sp_event_panel = pd.concat(sp_dict.values(), ignore_index=True)

sp_event_panel['Date'] = pd.to_datetime(sp_event_panel['Date'])
sp_event_panel = sp_event_panel.sort_values('Date')

event_sorted = sorted(sp_event_panel['Date'].unique())

daily_records = []
current_set = set()

for d in full_dates:
    while event_sorted and d >= event_sorted[0]:
        current_set = set(sp_event_panel.loc[sp_event_panel['Date'] == event_sorted[0], 'ticker'])
        event_sorted.pop(0)
    if current_set:
        for t in current_set:
            daily_records.append({'Date': d, 'ticker': t})

sp_daily_panel = pd.DataFrame(daily_records)
sp_daily_panel

In [None]:
sp_daily_panel['ticker_sp'] = sp_daily_panel['ticker']
merge6 = pd.merge(merge5, sp_daily_panel, on=['Date', 'ticker'], how = 'left')

In [None]:
merge6['in_sp'] = 0
for i in range(len(merge6)):
    if merge6.iloc[i, 16] == merge6.iloc[i, 1]:
        merge6.iloc[i, 17] = 1
merge6

In [None]:
merge6 = merge6.drop(['ticker_sp'], axis = 1)

In [None]:
#check if any share passive greater than 100
greaterthan1 = merge6[merge6['share_passive'] >= 100]
problem_tickers = greaterthan1['ticker'].unique()
totaltickers = merge6['ticker'].unique()
print(f"total:{len(totaltickers)}, problems: {len(problem_tickers)}")

In [None]:
#remove those with share passive greater than 100
master_data1 = merge6[merge6['share_passive'] < 0.95] #maybe 1.0 or 0.9?
master_data1

In [None]:
#create event window
master_data2 = master_data1[(master_data1['days_from_earn'] >= -31) & (master_data1['days_from_earn'] <= 15)]

In [None]:
master_data2 = master_data2.sort_values(['ticker', 'earnings_date_nearest', 'days_from_earn']).copy()
master_data2

In [None]:
master_data3 = master_data1[(master_data1['days_from_earn'] >= -21) & (master_data1['days_from_earn'] <= 5)]

In [None]:
master_data3['total_cum_ar1'] = master_data3.groupby(['ticker', 'earnings_date_nearest'])['abnormal_return_ar1'].transform('sum')
master_data3['total_cum_mkt'] = master_data3.groupby(['ticker', 'earnings_date_nearest'])['abnormal_return_mkt'].transform('sum')
master_data3['total_cum_sp'] = master_data3.groupby(['ticker', 'earnings_date_nearest'])['abnormal_return_sp'].transform('sum')
master_data3 = master_data3[(master_data3['days_from_earn'] >=-1) & (master_data3['days_from_earn'] <= 5)]
master_data3['cum_pre_post_ar1'] = master_data3.groupby(['ticker', 'earnings_date_nearest'])['abnormal_return_ar1'].transform('sum')
master_data3['cum_pre_post_mkt'] = master_data3.groupby(['ticker', 'earnings_date_nearest'])['abnormal_return_mkt'].transform('sum')
master_data3['cum_pre_post_sp'] = master_data3.groupby(['ticker', 'earnings_date_nearest'])['abnormal_return_sp'].transform('sum')
master_data3['cum_pre_post_ar1'] = abs(master_data3['cum_pre_post_ar1'])
master_data3['total_cum_ar1'] = abs(master_data3['total_cum_ar1'])
master_data3['cum_pre_post_mkt'] = abs(master_data3['cum_pre_post_mkt'])
master_data3['total_cum_mkt'] = abs(master_data3['total_cum_mkt'])
master_data3['cum_pre_post_sp'] = abs(master_data3['cum_pre_post_sp'])
master_data3['total_cum_sp'] = abs(master_data3['total_cum_sp'])
master_data3['PJ_ar1'] = master_data3['cum_pre_post_ar1'] / master_data3['total_cum_ar1']
master_data3['PJ_mkt'] = master_data3['cum_pre_post_mkt'] / master_data3['total_cum_mkt']
master_data3['PJ_sp'] = master_data3['cum_pre_post_sp'] / master_data3['total_cum_sp']
master_data3 = master_data3.drop(['total_cum_ar1', 'cum_pre_post_ar1', 'total_cum_mkt', 'cum_pre_post_mkt', 'total_cum_sp', 'cum_pre_post_sp'], axis = 1)
master_data3

In [None]:
master_data3 = master_data3[master_data3['days_from_earn'] == -1]
master_data3

In [None]:
fundamentals_data = pd.read_stata("Data/fundamentals.dta")
fundamentals_data

In [None]:
fundamentals_data.columns

In [None]:
fundamentals_cols = ['costat', 'currency', 'datafmt', 'indfmt', 'consolidation', 'ticker', 'Date', 'globalid', 'name', 'cusip', 'sector', 'ca', 'ta', 'capr1q', 'capr2q', 'cash_sinvest', 'comp_inc', 'long_debt', 'long_debt_tot', 'eps', 'long_invest', 'cl', 'tl', 'net_inc', 'rev']
fundamentals_data.columns = fundamentals_cols
fundamentals_data2 = fundamentals_data.copy()
fundamentals_data2['Date'] = pd.to_datetime(fundamentals_data2['Date'])
fundamentals_data3 = fundamentals_data2[['Date', 'ticker', 'sector', 'ca', 'ta', 'cash_sinvest', 'comp_inc', 'long_debt_tot', 'eps', 'long_invest', 'cl', 'tl', 'net_inc', 'rev']]
fundamentals_data4 = fundamentals_data3[fundamentals_data3['Date'] <= '2025-03-31']
fundamentals_data4

In [None]:
fundamentals_data5 = fundamentals_data4[['Date', 'ticker', 'sector', 'ta', 'cash_sinvest', 'long_debt_tot', 'rev', 'net_inc', 'eps']]
fundamentals_data5 = fundamentals_data5.dropna()

In [None]:
master_data4 = pd.merge(master_data1, fundamentals_data5, how = 'left', on = ['Date', 'ticker'])
master_data4

In [None]:
master_data4['QuarterEnd'] = master_data4['Date'].dt.to_period('Q').dt.end_time
master_data4['sector'] = (master_data4.groupby(['ticker', 'QuarterEnd'])['sector']
    .transform(lambda x: x.bfill() if x.notna().any() else x))
master_data4['ta'] = (master_data4.groupby(['ticker', 'QuarterEnd'])['ta']
    .transform(lambda x: x.bfill() if x.notna().any() else x))
master_data4['cash_sinvest'] = (master_data4.groupby(['ticker', 'QuarterEnd'])['cash_sinvest']
    .transform(lambda x: x.bfill() if x.notna().any() else x))
master_data4['long_debt_tot'] = (master_data4.groupby(['ticker', 'QuarterEnd'])['long_debt_tot']
    .transform(lambda x: x.bfill() if x.notna().any() else x))
master_data4['rev'] = (master_data4.groupby(['ticker', 'QuarterEnd'])['rev']
    .transform(lambda x: x.bfill() if x.notna().any() else x))
master_data4['net_inc'] = (master_data4.groupby(['ticker', 'QuarterEnd'])['net_inc']
    .transform(lambda x: x.bfill() if x.notna().any() else x))
master_data4['eps'] = (master_data4.groupby(['ticker', 'QuarterEnd'])['eps']
    .transform(lambda x: x.bfill() if x.notna().any() else x))
master_data4

In [None]:
master_data5 = master_data4.drop(['QuarterEnd'], axis = 1)
master_data5 = master_data5.dropna()
master_data5

In [None]:
master_data6 = master_data5[(master_data5['days_from_earn'] >= -31) & (master_data5['days_from_earn'] <= 15)]

In [None]:
master_data6 = master_data6.sort_values(['ticker', 'earnings_date_nearest', 'days_from_earn']).copy()
master_data6

In [None]:
master_data6.columns

In [None]:
master_data7 = master_data5[(master_data5['days_from_earn'] >= -21) & (master_data5['days_from_earn'] <= 5)]

In [None]:
master_data7['total_cum_ar1'] = master_data7.groupby(['ticker', 'earnings_date_nearest'])['abnormal_return_ar1'].transform('sum')
master_data7['total_cum_mkt'] = master_data7.groupby(['ticker', 'earnings_date_nearest'])['abnormal_return_mkt'].transform('sum')
master_data7['total_cum_sp'] = master_data7.groupby(['ticker', 'earnings_date_nearest'])['abnormal_return_sp'].transform('sum')
master_data7 = master_data7[(master_data7['days_from_earn'] >=-1) & (master_data7['days_from_earn'] <= 5)]
master_data7['cum_pre_post_ar1'] = master_data7.groupby(['ticker', 'earnings_date_nearest'])['abnormal_return_ar1'].transform('sum')
master_data7['cum_pre_post_mkt'] = master_data7.groupby(['ticker', 'earnings_date_nearest'])['abnormal_return_mkt'].transform('sum')
master_data7['cum_pre_post_sp'] = master_data7.groupby(['ticker', 'earnings_date_nearest'])['abnormal_return_sp'].transform('sum')
master_data7['cum_pre_post_ar1'] = abs(master_data7['cum_pre_post_ar1'])
master_data7['total_cum_ar1'] = abs(master_data7['total_cum_ar1'])
master_data7['cum_pre_post_mkt'] = abs(master_data7['cum_pre_post_mkt'])
master_data7['total_cum_mkt'] = abs(master_data7['total_cum_mkt'])
master_data7['cum_pre_post_sp'] = abs(master_data7['cum_pre_post_sp'])
master_data7['total_cum_sp'] = abs(master_data7['total_cum_sp'])
master_data7['PJ_ar1'] = master_data7['cum_pre_post_ar1'] / master_data7['total_cum_ar1']
master_data7['PJ_mkt'] = master_data7['cum_pre_post_mkt'] / master_data7['total_cum_mkt']
master_data7['PJ_sp'] = master_data7['cum_pre_post_sp'] / master_data7['total_cum_sp']
master_data7 = master_data7.drop(['total_cum_ar1', 'cum_pre_post_ar1', 'total_cum_mkt', 'cum_pre_post_mkt', 'total_cum_sp', 'cum_pre_post_sp'], axis = 1)
master_data7

In [None]:
master_data7 = master_data7[master_data7['days_from_earn'] == -1]
master_data7

In [None]:
master_data2.to_csv("Data/master_data_es.csv")

In [None]:
master_data3.to_csv("Data/master_data_pj.csv")

In [None]:
master_data1.to_csv("Data/master_data_iv.csv")

In [None]:
master_data6.to_csv("Data/master_data_es_controls.csv")

In [None]:
master_data7.to_csv("Data/master_data_pj_controls.csv")

In [None]:
master_data5.to_csv("Data/master_data_iv_controls.csv")