In [1]:
#####################################
# Post Earnings Announcement Drift  #
# Qingyi (Freda) Song Drechsler     #
# Date: June 2019                   #
# Update: Jan 2021                  #
#####################################

# Update Note: this version uses CRSP data to extract S&P500 Index constituents,
# as comp.idxcst_hist data is no longer available on WRDS
# Also updated various .loc usage that has been deprecated by Python.
# Source: https://www.fredasongdrechsler.com/data-crunching/pead

import os
import pandas as pd
import numpy as np
import wrds
import matplotlib.pyplot as plt
from dateutil.relativedelta import *
import datetime

from Constants import Constants as const

# set sample date range
begdate = '01/01/2007'
enddate = '12/31/2016'

# set CRSP date range a bit wider to guarantee collecting all information
crsp_begdate = '01/01/2006'
crsp_enddate = '12/31/2017'

In [2]:
# Connect to WRDS
conn = wrds.Connection(wrds_username='aheitz')

Loading library list...
Done


In [3]:
# Load linkage files
crsp_comp_link = pd.read_pickle(os.path.join(const.TEMP_PATH, 'crsp_comp.pkl')).rename(columns={'permco': 'permno'})
ibes_crsp_link = pd.read_pickle(os.path.join(const.TEMP_PATH, 'ibes_crsp_link.pkl'))

# Placeholder dates
bdate = '2006-01-01'
edate = '2016-06-30'


In [4]:
iclink = pd.read_pickle(os.path.join(const.TEMP_PATH, '20241011_iclink.pkl'))
iclink.head()

Unnamed: 0,ticker,permno,cname,comnam,name_ratio,score
0,0000,14471,TALMER BANCORP,TALMER BANCORP INC,100.0,0
1,0001,14392,EP ENGR CORP,E P ENERGY CORP,74.0,0
2,0004,14418,AMERICAN CAPITAL,AMERICAN CAPITAL SR FLOATING LTD,100.0,0
3,000R,14378,CARECOM,CARE COM INC,74.0,0
4,000V,14423,EIGER,EIGER BIOPHARMACEUTICALS INC,100.0,0


In [9]:
crsp_comp_link.head()

Unnamed: 0,permno,gvkey
0,1658,4685
1,7476,1518
2,15680,65368
3,5140,2859
4,43716,151576


In [10]:
ibes_link2 = ibes_crsp_link.merge(iclink[['ticker', 'permno']], on=['ticker'], suffixes=('', '_ic'), how='outer')
ibes_link2.loc[:, 'permno'] = ibes_link2['permno'].fillna(ibes_link2['permno_ic'])
ibes_link2.dropna(subset=['permno'], inplace=True)
ibes_link2.drop_duplicates(subset=['ticker'], keep='first', inplace=True)

gvkeys = ibes_link2[['ticker', 'permno']].merge(crsp_comp_link, on=['permno'], how='left')

In [54]:
# Step 2: Extract estimates from IBES using WRDS
ibes_temp = conn.raw_sql(f"""
    SELECT a.*, b.anndats AS repdats, b.value AS act
    FROM ibes.detu_epsus a
    LEFT JOIN ibes.actu_epsus b
    ON a.ticker = b.ticker AND a.fpedats = b.pends AND b.pdicity = 'QTR'
    WHERE '{bdate}' <= a.fpedats AND a.fpedats <= '{edate}'
    AND fpi IN ('6', '7') AND 0 <= (b.anndats - a.anndats) AND (b.anndats - a.anndats) <= 90
""")

In [16]:
ibes_detu_epsus = conn.raw_sql(f"""
    SELECT *
    FROM ibes.detu_epsus
    WHERE '{bdate}' <= fpedats AND fpedats <= '{edate}'
    AND fpi IN ('6', '7')
""")

In [17]:
# Step 2: Extract estimates from IBES using WRDS
ibes_actu_epsus = conn.raw_sql(f"""
    SELECT anndats AS repdats, value AS act, ticker
    FROM ibes.actu_epsus
    WHERE '{bdate}' <= pends AND pends <= '{edate}' AND pdicity = 'QTR'
""")

In [18]:
ibes_detu_epsus['anndats'] = pd.to_datetime(ibes_detu_epsus['anndats'])
ibes_actu_epsus['repdats'] = pd.to_datetime(ibes_actu_epsus['repdats'])

In [164]:
def merge_ibes_actu_epsus_to_ibes_detu_epsus(row):
    ticker = row['ticker']
    anndats = row['anndats']
    temp_df = ibes_actu_epsus.loc[(ibes_actu_epsus['ticker'] == ticker) & (ibes_actu_epsus['repdats'] >= anndats) & ((ibes_actu_epsus['repdats'] - anndats).dt.days <= 90)].copy()
    if temp_df.empty:
        return np.nan
    else:
        return temp_df.iloc[0]['act']

In [165]:
ibes_detu_epsus.loc[:, 'act'] = ibes_detu_epsus.apply(merge_ibes_actu_epsus_to_ibes_detu_epsus, axis=1)

KeyboardInterrupt: 

In [19]:
# Step 1: Create a temporary column in `ibes_actu_epsus` for efficient filtering
ibes_actu_epsus['repdats_minus_90'] = ibes_actu_epsus['repdats'] - pd.Timedelta(days=90)

# Step 2: Merge the two dataframes on the 'ticker' column
merged_df = pd.merge(
    ibes_detu_epsus,
    ibes_actu_epsus,
    on='ticker',
    suffixes=('_detu', '_actu')
)

# Step 3: Filter based on the conditions
filtered_df = merged_df[
    (merged_df['repdats'] >= merged_df['anndats']) &
    (merged_df['anndats'] >= merged_df['repdats_minus_90'])
]

# Step 4: Sort and drop duplicates to get the closest 'repdats' value
filtered_df = filtered_df.sort_values(by=['ticker', 'anndats', 'repdats'])
filtered_df = filtered_df.drop_duplicates(subset=['ticker', 'anndats'], keep='first')

# Step 5: Merge the filtered result back to `ibes_detu_epsus` to add the 'act' values
result = pd.merge(
    ibes_detu_epsus,
    filtered_df[['ticker', 'anndats', 'act']],
    how='left',
    left_on=['ticker', 'anndats'],
    right_on=['ticker', 'anndats']
)

# Assign the result back to the original dataframe
ibes_detu_epsus['act'] = result['act']


In [22]:
del filtered_df

NameError: name 'filtered_df' is not defined

In [29]:
import gc
gc.collect()

216

In [28]:
del merged_df

In [20]:
ibes_detu_epsus.shape

(2928566, 22)

In [23]:
ibes_temp = ibes_detu_epsus

# Create `fyearq` and `fqtr` for `ibes_temp` for more flexible date alignment
ibes_temp['fyearq'] = pd.to_datetime(ibes_temp['fpedats']).dt.year
ibes_temp['fqtr'] = pd.to_datetime(ibes_temp['fpedats']).dt.quarter

In [24]:
# Step 3: Normalize Ticker and Merge IBES Data
ibes_temp['ticker'] = ibes_temp['ticker'].str.replace(r'\..*$', '', regex=True)  # Remove any suffix from ticker
ibes_temp = pd.merge(ibes_temp, gvkeys, on='ticker', how='left')  # Changed from inner to left join to retain more data

In [25]:
ibes_temp.head()

Unnamed: 0,ticker,cusip,oftic,cname,actdats,estimator,analys,currfl,pdf,fpi,...,revdats,revtims,anndats,anntims,report_curr,act,fyearq,fqtr,permno,gvkey
0,0,87482X10,TLMR,TALMER BANCORP,2014-03-10,1267.0,71182.0,,D,6,...,2014-03-10,08:54:03,2014-03-10,00:20:00,USD,0.12,2014,1,14471.0,62491
1,0,87482X10,TLMR,TALMER BANCORP,2014-03-11,149.0,119962.0,,D,6,...,2014-03-11,15:17:12,2014-03-09,17:05:00,USD,0.12,2014,1,14471.0,62491
2,0,87482X10,TLMR,TALMER BANCORP,2014-03-11,228.0,80474.0,,D,6,...,2014-03-11,15:49:22,2014-03-10,06:48:00,USD,0.12,2014,1,14471.0,62491
3,0,87482X10,TLMR,TALMER BANCORP,2014-03-11,873.0,79092.0,,D,6,...,2014-03-11,17:10:50,2014-03-10,00:25:00,USD,0.12,2014,1,14471.0,62491
4,0,87482X10,TLMR,TALMER BANCORP,2014-03-11,952.0,50789.0,,D,6,...,2014-03-11,15:22:08,2014-03-10,07:45:00,USD,0.12,2014,1,14471.0,62491


In [26]:
# Step 4: Count number of estimates and select primary/diluted basis
ibes = ibes_temp[['ticker', 'fpedats', 'pdf']].copy()
ibes['p_count'] = np.where(ibes['pdf'] == 'P', 1, 0)
ibes['d_count'] = np.where(ibes['pdf'] == 'D', 1, 0)
ibes = ibes.groupby(['ticker', 'fpedats']).agg(
    {'p_count': 'sum', 'd_count': 'sum'}
).reset_index()

ibes['basis'] = np.where(ibes['p_count'] > ibes['d_count'], 'P', 'D')
ibes = ibes.merge(ibes_temp, on=['ticker', 'fpedats'], how='left')
ibes.drop_duplicates(['ticker', 'fpedats', 'estimator', 'analys'], keep='last', inplace=True)


In [27]:
# Step 5: Compute median forecast based on estimates in the 90 days prior to the EAD
medest = ibes.groupby(['gvkey', 'fqtr', 'fyearq']).agg(
    median_est=('value', 'median'),
    num_est=('value', 'count'),
    basis=('basis', 'first')  # Include basis for further calculations
).reset_index()

In [30]:
# Step 6: Extract Compustat data and merge it with IBES consensus using WRDS
comp = conn.raw_sql(f"""
    SELECT gvkey, fyearq, fqtr, conm, datadate, rdq, epsfxq, epspxq, prccq, ajexq, spiq, cshoq, 
           fyr, datafqtr
    FROM comp.fundq
    WHERE ((saleq IS NOT NULL OR atq > 0) AND consol = 'C' AND popsrc = 'D' 
           AND indfmt = 'INDL' AND datafmt = 'STD' AND datafqtr IS NOT NULL)
""")

In [31]:
comp_backup = comp.copy()

In [32]:
comp = comp_backup.loc[comp_backup['fyearq'].apply(lambda x: 2005 < x < 2016)].copy()

In [33]:
comp = comp.merge(medest, on=['gvkey', 'fqtr', 'fyearq'], how='left')

# Merge 'act' variable from ibes_temp to comp
comp = comp.merge(ibes_temp[['gvkey', 'fqtr', 'fyearq', 'act']].drop_duplicates().dropna(subset=['gvkey']), on=['gvkey', 'fqtr', 'fyearq'], how='left')

In [34]:
# Step 7: Process Compustat Data on a seasonal year-quarter basis
comp.sort_values(by=['gvkey', 'fqtr', 'fyearq'], inplace=True)
comp['lag_eps'] = comp.groupby('gvkey')['epspxq'].shift(1)

In [130]:
comp.drop_duplicates(['gvkey', 'fyearq', 'fqtr'], keep='last').shape

(1690329, 18)

In [37]:
# Step 8: Calculate SUE variables based on the basis
comp['actual1'] = np.where(comp['basis'] == 'P', comp['epspxq'] / comp['ajexq'], comp['epsfxq'] / comp['ajexq'])
comp['expected1'] = np.where(comp['basis'] == 'P', comp['lag_eps'] / comp['ajexq'], comp['lag_eps'] / comp['ajexq'])
comp['sue1'] = (comp['actual1'] - comp['expected1']) / (comp['prccq'] / comp['ajexq'])

comp['actual2'] = np.where(comp['basis'] == 'P', (comp['epspxq'] - 0.65 * comp['spiq'] / comp['cshoq']) / comp['ajexq'],
                           (comp['epsfxq'] - 0.65 * comp['spiq'] / comp['cshoq']) / comp['ajexq'])
comp['expected2'] = np.where(comp['basis'] == 'P', (comp['lag_eps'] - 0.65 * comp['spiq'] / comp['cshoq']) / comp['ajexq'],
                             (comp['lag_eps'] - 0.65 * comp['spiq'] / comp['cshoq']) / comp['ajexq'])
comp['sue2'] = (comp['actual2'] - comp['expected2']) / (comp['prccq'] / comp['ajexq'])

comp['sue3'] = (comp['act'] - comp['median_est']) / comp['prccq']


In [38]:
# Final output
sue_final = comp[['gvkey', 'fyearq', 'fqtr', 'datadate', 'rdq', 'sue1', 'sue2', 'sue3']]
sue_final.drop_duplicates(subset=['gvkey', 'fyearq', 'fqtr'], keep='last', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sue_final.drop_duplicates(subset=['gvkey', 'fyearq', 'fqtr'], keep='last', inplace=True)


In [39]:
sue_final.to_pickle(os.path.join(const.TEMP_PATH, '20241011_sue_123.pkl'))

In [40]:
sue_final.describe()

Unnamed: 0,fyearq,fqtr,sue1,sue2,sue3
count,359849.0,359849.0,307941.0,302383.0,4541.0
mean,2010.459593,2.494833,-12.53755,-2.00251,4.10121
std,2.892548,1.119652,5210.38,1874.770279,136.362189
min,2006.0,1.0,-1986700.0,-511809.4375,-1033.333333
25%,2008.0,1.0,-0.01187097,-0.011858,-0.013986
50%,2010.0,2.0,0.0,0.0,0.007391
75%,2013.0,4.0,0.01030928,0.010299,0.058169
max,2015.0,4.0,769352.0,508710.0,7600.0


In [155]:
# Close the connection to WRDS
conn.close()