# Accuracy Analysis

In [1]:
import eikon as ek
import pandas as pd
import numpy as np
import datetime
ek.set_app_key("f47c330480d74c598b7e8ebc2539424e91764dd8")

https://community.developers.refinitiv.com/questions/73493/get-eps-historical-data-for-stocks.html

### Part 1: Accuracy  

**Variables**

**TR.EPSActValue** - The company's actual value normalized to reflect the I/B/E/S default currency and corporate actions (e.g. stock splits). Earnings Per Share is defined as the EPS that the contributing analyst considers to be that with which to value a security. This figure may include or exclude certain items depending on the contributing analyst's specific model.  

**TR.EPSMean** - The statistical average of all broker estimates determined to be on the majority accounting basis. Earnings Per Share is defined as the EPS that the contributing analyst considers to be that with which to value a security. This figure may include or exclude certain items depending on the contributing analyst's specific model.  

--> this is a analyst forecast variable

**TR.EPSActSurprise** - The difference between the actual and the last mean of the period, expressed as a percentage. Earnings Per Share is defined as the EPS that the contributing analyst considers to be that with which to value a security. This figure may include or exclude certain items depending on the contributing analyst's specific model.  

--> forecast error between actual EPS and TR.EPSMean  


In [13]:
#accuracy_variables = ['TR.RevenueActValue.date', 'TR.RevenueActValue', "TR.F.EV", 'TR.EPSActValue', "TR.EPSMean", "TR.EpsSmartEst", "TR.EPSActSurprise", "TR.EpsPreSurprisePct"]
accuracy_variables = ['TR.EPSactValue.date', 'TR.EPSActValue', "TR.EPSMean", "TR.EPSActSurprise"]

In [23]:
df_accuracy, e = ek.get_data('0#.SPX',accuracy_variables, parameters = {'SDate':'0','EDate':'-5','Period':'FQ0','Frq':'FQ'})
df_accuracy["Date"] = pd.to_datetime(df_accuracy["Date"])
df_accuracy.to_csv("df_accuracy.csv")

In [22]:
df_accuracy

Unnamed: 0,Instrument,Date,Earnings Per Share - Actual,Earnings Per Share - Mean,Earnings Per Share - Actual Surprise
0,POOL.OQ,2023-02-16 07:00:00+00:00,1.82,1.987,-8.405
1,POOL.OQ,2022-10-20 07:00:00+00:00,4.78,4.5875,4.196
2,POOL.OQ,2022-07-21 07:00:00+00:00,7.63,7.517,1.503
3,POOL.OQ,2022-04-21 07:00:00+00:00,4.23,3.14867,34.342
4,POOL.OQ,2022-02-17 07:00:00+00:00,2.63,1.875,40.267
...,...,...,...,...,...
3003,AVY.N,2022-10-26 06:45:00+00:00,2.46,2.46323,-0.131
3004,AVY.N,2022-07-27 06:45:00+00:00,2.64,2.36485,11.635
3005,AVY.N,2022-04-26 06:45:00+00:00,2.4,2.17008,10.595
3006,AVY.N,2022-02-02 06:45:00+00:00,2.13,2.12375,0.294


In [16]:
df_accuracy.dtypes

Instrument                                           string
Date                                    datetime64[ns, UTC]
Earnings Per Share - Actual                         Float64
Earnings Per Share - Mean                           Float64
Earnings Per Share - Actual Surprise                Float64
dtype: object

### Exploratory Data Analysis for Analyst Forcast Accuracy

Mean for the entire time frame for each instrument (just to get an overview)

In [20]:
df_averages = df_accuracy.groupby("Instrument").mean()
df_averages

  df_averages = df_accuracy.groupby("Instrument").mean()


Unnamed: 0_level_0,Earnings Per Share - Actual,Earnings Per Share - Mean,Earnings Per Share - Actual Surprise
Instrument,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A.N,1.253333,1.17476,6.625667
AAL.OQ,-0.351667,-0.408517,6.156333
AAP.N,3.138333,3.095295,2.1005
AAPL.OQ,1.538333,1.486917,3.306167
ABBV.N,3.418333,3.349458,2.076
...,...,...,...
YUM.N,1.123333,1.12382,-0.0935
ZBH.N,1.775,1.694997,5.18
ZBRA.OQ,4.43,4.276117,3.820833
ZION.OQ,1.431667,1.396037,2.782167


categorizing the forecast accuracy:

In [8]:
import pandas as pd
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'matplotlib'

 The mean surprise in percentage per instrument divided into percentile groups

In [None]:
plt.rcParams["figure.figsize"] = [10, 4]
plt.rcParams["figure.dpi"] = 150

# Calculate percentiles
percentiles = [-100, -20, -10, 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
percentile_counts = pd.cut(df_averages["Earnings Per Share - Actual Surprise"], percentiles).value_counts().sort_index()

# Plot bar chart
plt.bar(percentile_counts.index.astype(str), percentile_counts.values)
plt.title('EPS Surprise Percentiles')
plt.xlabel('Percentile Range')
plt.ylabel('Number of Companies')
plt.show()

ModuleNotFoundError: No module named 'matplotlib'

Surprise development over time

**!! this plot is not so good because too many companies.... need to find better way to show surprise development over time... still working on it**

In [None]:
# Group the data by company and date and calculate the mean surprise percentag
df_grouped = df_accuracy.groupby(['Instrument', 'Date'])['Earnings Per Share - Actual Surprise'].mean().reset_index()

# Plot the data for each company as a separate line
for company in df_grouped['Instrument'].unique():
    company_data = df_grouped[df_grouped['Instrument'] == company]
    plt.plot(company_data['Date'], company_data['Earnings Per Share - Actual Surprise'], label=company)

# Add axis labels and legend
plt.xlabel('Date')
plt.ylabel('EPS Surprise (%)')
#plt.legend(loc='upper left')

# Show the plot
plt.show()