In [1]:
import os

import wrds
import pandas as pd
from pandas import DataFrame
import numpy as np

from Constants import Constants as const

In [2]:
# Connect to WRDS
conn = wrds.Connection(wrds_username='aheitz')

Loading library list...
Done


In [3]:
# Define start and end dates
start_date = '2007-01-01'
end_date = '2016-12-31'

Analyst Coverage

Method from He, J. (Jack), & Tian, X. (2013). The dark side of analyst coverage: The case of innovation. Journal of Financial Economics, 109(3), 856–878. https://doi.org/10.1016/j.jfineco.2013.04.001 

We obtain analyst information from the I/B/E/S database. For each fiscal year of a firm, we take the average of the 12 monthly numbers of earnings forecasts given by the summary file and treat that as a raw measure of analyst coverage (Coverage). This measure relies on the fact that most analysts following a firm issue at least one earnings forecast for that firm during the year before its fiscal year ending date and that a majority of them issue at most one earnings forecast in each month.

In [15]:

# Query to obtain the monthly number of earnings forecasts from the IBES summary file
ibes_summary = conn.raw_sql(f"""
    SELECT ticker, statpers, numest
    FROM ibes.statsum_epsus
    WHERE fpi = '1' AND measure='EPS' AND statpers >= '{start_date}' AND statpers <= '{end_date}'
""")

# Convert the date column to datetime format
ibes_summary['statpers'] = pd.to_datetime(ibes_summary['statpers'])

# Extract year and month from 'statpers'
ibes_summary['year'] = ibes_summary['statpers'].dt.year
ibes_summary['month'] = ibes_summary['statpers'].dt.month

# Calculate the average number of analysts for each firm for each fiscal year
ibes_summary['fiscal_year'] = ibes_summary['statpers'].dt.to_period('Y')

# Group by ticker and fiscal year, then calculate the average number of analysts
analyst_coverage = ibes_summary.groupby(['ticker', 'fiscal_year'])['numest'].mean().reset_index()

# Rename columns
analyst_coverage.rename(columns={'numest': 'coverage'}, inplace=True)

In [23]:
# Link the IBES ticker to CRSP using the IBES CRSP Link database
ibes_crsp_link = conn.raw_sql(f"""
    SELECT distinct ticker, permno
    FROM wrdsapps_link_crsp_ibes.ibcrsphist
    WHERE sdate <= '{end_date}' AND (edate IS NULL OR edate >= '{start_date}')
""")

# Merge IBES and CRSP data to link ticker with PERMNO
analyst_coverage2 = pd.merge(analyst_coverage, ibes_crsp_link, on='ticker', how='left')

In [29]:
ibes_summary['fqtr'] = ibes_summary['statpers'].dt.to_period('Q')
analyst_coverage_qtr = ibes_summary.groupby(['ticker', 'fqtr'])['numest'].mean().reset_index()
analyst_coverage_qtr.rename(columns={'numest': 'coverage'}, inplace=True)

# Merge IBES and CRSP data to link ticker with PERMNO
analyst_coverage_qtr2 = pd.merge(analyst_coverage_qtr, ibes_crsp_link, on='ticker', how='left')

In [34]:
# Link CRSP to Compustat using the CRSP-Compustat Link table
crsp_comp_link = conn.raw_sql(f"""
    SELECT distinct permco, gvkey
    FROM wrdsapps_link_crsp_comp_bdx.bdxcrspcomplink
""")

# Merge CRSP data with Compustat data to add GVKEY to the dataset
analyst_coverage2 = pd.merge(analyst_coverage2, crsp_comp_link, left_on='permno', right_on='permco', how='left')

# Drop 'permco' column as it is no longer needed
analyst_coverage2.drop(columns=['permco'], inplace=True)
analyst_coverage2.to_csv(os.path.join(const.TEMP_PATH, '2007_2016_annual_analyst_coverage.csv'), index=False)

# Merge CRSP data with Compustat data to add GVKEY to the dataset
analyst_coverage_qtr2 = pd.merge(analyst_coverage_qtr2, crsp_comp_link, left_on='permno', right_on='permco', how='left')

# Drop 'permco' column as it is no longer needed
analyst_coverage_qtr2.drop(columns=['permco'], inplace=True)
analyst_coverage_qtr2.to_csv(os.path.join(const.TEMP_PATH, '2007_2016_quarterly_analyst_coverage.csv'), index=False)


In [36]:
analyst_coverage2

Unnamed: 0,ticker,fiscal_year,coverage,permno,gvkey_x,gvkey_y
0,0000,2014,4.352113,14471.0,062491,062491
1,0000,2015,5.000000,14471.0,062491,062491
2,0000,2016,3.480000,14471.0,062491,062491
3,0001,2014,13.526316,14392.0,,
4,0001,2015,14.763158,14392.0,,
...,...,...,...,...,...,...
54481,ZYNX,2011,1.000000,,,
54482,ZYNX,2012,1.000000,,,
54483,ZYNX,2013,1.000000,,,
54484,ZYNX,2014,1.000000,,,


DISPERSION and FCSTERROR data

In [5]:
# Query WRDS IBES to obtain the necessary data
# We need 'stdev', 'meanest', 'actual' from the IBES dataset
ibes_data = conn.raw_sql(f"""
    SELECT ticker, fpi, statpers, numest, fpedats, meanest AS EPS_MEAN, 
           stdev AS EPS_SD, actual AS EPS_ACTUAL
    FROM ibes.statsum_epsus
    WHERE fpi = '6' AND statpers BETWEEN '{start_date}' AND '{end_date}' AND measure = 'EPS'
""")
# ibes_data.to_pickle(os.path.join(const.TEMP_PATH, '2007_2016_ibes_annual_data.pkl'))
ibes_data.to_pickle(os.path.join(const.TEMP_PATH, '2007_2016_ibes_quarter_data.pkl'))

In [6]:
# Query WRDS CRSP to obtain the price data for each company at the end of the previous quarter
crsp_data = conn.raw_sql(f"""
    SELECT permno, date, prc AS Price_lag
    FROM crsp.msf
    WHERE date BETWEEN '{start_date}' AND '{end_date}'
""")
crsp_data.to_pickle(os.path.join(const.TEMP_PATH, '2007_2016_crsp_monthly_data.pkl'))

In [7]:
# Load the data into pandas DataFrames
ibes_df = pd.DataFrame(ibes_data)
crsp_df = pd.DataFrame(crsp_data).rename(columns={'price_lag': 'prc'})
link_df = pd.DataFrame(ibes_crsp_link)

NameError: name 'ibes_crsp_link' is not defined

In [78]:
link_df.to_pickle(os.path.join(const.TEMP_PATH, 'ibes_crsp_link.pkl'))
crsp_comp_link.to_pickle(os.path.join(const.TEMP_PATH, 'crsp_comp.pkl'))

In [8]:
link_df = pd.read_pickle(os.path.join(const.TEMP_PATH, 'ibes_crsp_link.pkl'))

In [9]:
# Convert the IBES data to quarterly by creating a quarter value and aggregating it
ibes_df['statpers'] = pd.to_datetime(ibes_df['statpers'])
ibes_df['quarter'] = ibes_df['statpers'].dt.to_period('Q')
ibes_quarterly = ibes_df.groupby(['ticker', 'quarter']).agg({
    'eps_mean': 'mean',
    'eps_sd': 'mean',
    'eps_actual': 'mean',
    'numest': 'last',
    'fpedats': 'last'
}).reset_index()

In [10]:
# Convert the CRSP data to quarterly by creating a quarter value
crsp_df['date'] = pd.to_datetime(crsp_df['date'])
crsp_df['quarter'] = crsp_df['date'].dt.to_period('Q')
crsp_quarterly = crsp_df.groupby(['permno', 'quarter']).last().reset_index()

In [11]:
# Use the price at the end of the previous quarter as Price_lag
crsp_quarterly['Price_lag'] = crsp_quarterly.groupby('permno')['prc'].shift(1)

In [12]:
# Merge IBES and CRSP Link data to get permno for each IBES ticker
ibes_linked_df = pd.merge(ibes_quarterly, link_df, on='ticker', how='inner')

# Merge IBES-linked data with CRSP quarterly data on permno and quarter
merged_df = pd.merge(ibes_linked_df, crsp_quarterly, on=['permno', 'quarter'], how='inner')

# Create DISPERSION
# DISPERSION is calculated as the ratio of the standard deviation of EPS forecasts (eps_sd) 
# to the stock price at the end of the previous quarter (Price_lag)
merged_df['DISPERSION'] = merged_df['eps_sd'] / merged_df['Price_lag']

# Create FCSTERROR
# FCSTERROR is calculated as the absolute value of the difference between the mean analyst EPS forecast (eps_mean)
# and the actual EPS (eps_actual), scaled by the stock price at the end of the previous quarter (Price_lag)
merged_df['FCSTERROR'] = abs(merged_df['eps_mean'] - merged_df['eps_actual']) / merged_df['Price_lag']


In [13]:
merged_df.to_csv(os.path.join(const.TEMP_PATH, '2007_2016_quarterly_dispersion_fcsterror.csv'), index=False)

In [10]:
ibes_df.head()

Unnamed: 0,ticker,fpi,statpers,numest,fpedats,eps_mean,eps_sd,eps_actual,year
0,0,1,2014-04-17,4.0,2014-12-31,0.52,0.03,1.21,2014
1,0,1,2014-05-15,4.0,2014-12-31,0.56,0.04,1.21,2014
2,0,1,2014-06-19,4.0,2014-12-31,0.56,0.04,1.21,2014
3,0,1,2014-07-17,3.0,2014-12-31,0.56,0.05,1.21,2014
4,0,1,2014-08-14,5.0,2014-12-31,1.18,0.06,1.21,2014


In [14]:
# Convert the IBES data to annual by creating a annual value and aggregating it
ibes_df['statpers'] = pd.to_datetime(ibes_df['statpers'])
ibes_df['year'] = ibes_df['statpers'].dt.year
ibes_annual = ibes_df.groupby(['ticker', 'year']).agg({
    'eps_mean': 'mean',
    'eps_sd': 'mean',
    'eps_actual': 'mean',
    'numest': 'mean',
}).reset_index()

ibes_annual2 = ibes_df.groupby(['ticker', 'year']).agg({
    'eps_mean': 'last',
    'eps_sd': 'last',
    'eps_actual': 'last',
    'numest': 'last',
}).reset_index()

ibes_annual = ibes_annual.merge(ibes_annual2, on=['ticker', 'year'], how='left', suffixes=('', '_last'))

# Convert the CRSP data to quarterly by creating a quarter value
crsp_df['date'] = pd.to_datetime(crsp_df['date'])
crsp_df['year'] = crsp_df['date'].dt.year
crsp_annual = crsp_df.groupby(['permno', 'year']).last().reset_index()

# Use the price at the end of the previous quarter as Price_lag
crsp_annual['Price_lag'] = crsp_annual.groupby('permno')['prc'].shift(1)

# Merge IBES and CRSP Link data to get permno for each IBES ticker
ibes_linked_df = pd.merge(ibes_annual, link_df, on='ticker', how='inner')

# Merge IBES-linked data with CRSP quarterly data on permno and quarter
merged_df_ann = pd.merge(ibes_linked_df, crsp_annual, on=['permno', 'year'], how='inner')
merged_df_ann = merged_df_ann[merged_df_ann['Price_lag'] > 0]

# Create DISPERSION
# DISPERSION is calculated as the ratio of the standard deviation of EPS forecasts (eps_sd) 
# to the stock price at the end of the previous quarter (Price_lag)
merged_df_ann['DISPERSION'] = merged_df_ann['eps_sd'] / merged_df_ann['Price_lag']
merged_df_ann['DISPERSION_last'] = merged_df_ann['eps_sd_last'] / merged_df_ann['Price_lag']

# Create FCSTERROR
# FCSTERROR is calculated as the absolute value of the difference between the mean analyst EPS forecast (eps_mean)
# and the actual EPS (eps_actual), scaled by the stock price at the end of the previous quarter (Price_lag)
merged_df_ann['FCSTERROR'] = abs(merged_df_ann['eps_mean'] - merged_df_ann['eps_actual']) / merged_df_ann['Price_lag']
merged_df_ann['FCSTERROR_last'] = abs(merged_df_ann['eps_mean_last'] - merged_df_ann['eps_actual_last']) / merged_df_ann['Price_lag']

merged_df_ann.to_csv(os.path.join(const.TEMP_PATH, '2007_2016_annual_dispersion_fcsterror.csv'), index=False)

Query WRDS database information.

In [31]:
# List all libraries
libraries = conn.list_libraries()
print(libraries)

['aha_sample', 'ahasamp', 'audit', 'audit_audit_comp', 'audit_common', 'audit_corp_legal', 'audit_oia', 'auditsmp', 'auditsmp_all', 'bank', 'bank_all', 'bank_premium_samp', 'banksamp', 'block', 'block_all', 'boardex', 'boardex_na', 'boardex_trial', 'boardsmp', 'bvd_amadeus_trial', 'bvd_bvdbankf_trial', 'bvd_orbis_trial', 'bvdsamp', 'calcbench_trial', 'calcbnch', 'cboe', 'cboe_all', 'cboe_sample', 'cboesamp', 'ciq', 'ciq_common', 'ciqsamp', 'ciqsamp_capstrct', 'ciqsamp_common', 'ciqsamp_keydev', 'ciqsamp_pplintel', 'ciqsamp_ratings', 'ciqsamp_transactions', 'ciqsamp_transcripts', 'cisdmsmp', 'columnar', 'comp', 'comp_bank', 'comp_bank_daily', 'comp_execucomp', 'comp_global', 'comp_global_daily', 'comp_na_annual_all', 'comp_na_daily_all', 'comp_na_monthly_all', 'comp_segments_hist', 'comp_segments_hist_daily', 'compa', 'compb', 'compg', 'compm', 'compsamp', 'compsamp_all', 'compsamp_snapshot', 'compseg', 'contrib', 'contrib_as_filed_financials', 'contrib_ceo_turnover', 'contrib_char_retu

NoSuchTableError: wrdsapps.iclink

In [32]:
# Assuming you found a relevant library, e.g., 'wrdsapps'
# List all tables in that library
tables = conn.list_tables('wrdsapps_link_crsp_comp_bdx')
print(tables)

['bdxcrspcomplink']


In [33]:


# Assuming you found a table, e.g., 'iclink'
# Describe the table to see its columns
columns = conn.describe_table('wrdsapps_link_crsp_comp_bdx', 'bdxcrspcomplink')
print(columns)

Approximately 12045 rows in wrdsapps_link_crsp_comp_bdx.bdxcrspcomplink.
        name  nullable              type            comment
0     permco      True           INTEGER        CRSP PERMCO
1      gvkey      True        VARCHAR(6)    Compustat GVKEY
2  companyid      True  DOUBLE PRECISION  Boardex CompanyID
3      score      True  DOUBLE PRECISION              Score
4  preferred      True  DOUBLE PRECISION          Preferred
5  duplicate      True  DOUBLE PRECISION          Duplicate
