# Exploratory Data Analysis - Analyst Forecast Accuracy

In [1]:
import eikon as ek
import pandas as pd
import numpy as np
import datetime
import plotly
import plotly.express as px
import plotly.graph_objs as go
ek.set_app_key("f47c330480d74c598b7e8ebc2539424e91764dd8")

https://community.developers.refinitiv.com/questions/73493/get-eps-historical-data-for-stocks.html

### Accuracy Variables  

**TR.EPSActValue** - The company's actual value normalized to reflect the I/B/E/S default currency and corporate actions (e.g. stock splits). Earnings Per Share is defined as the EPS that the contributing analyst considers to be that with which to value a security. This figure may include or exclude certain items depending on the contributing analyst's specific model.  

**TR.EPSMean** - The statistical average of all broker estimates determined to be on the majority accounting basis. Earnings Per Share is defined as the EPS that the contributing analyst considers to be that with which to value a security. This figure may include or exclude certain items depending on the contributing analyst's specific model.  

--> this is a analyst forecast variable

**TR.EPSActSurprise** - The difference between the actual and the last mean of the period, expressed as a percentage. Earnings Per Share is defined as the EPS that the contributing analyst considers to be that with which to value a security. This figure may include or exclude certain items depending on the contributing analyst's specific model.  

--> forecast error between actual EPS and TR.EPSMean  


### DataFrames

**df_accuracy** - basic dataframe containing quaterly data for all S&P 500 companies on EPS Actual, EPS Mean (the analyst forcast) and EPS Surprise (the forecast error in %)

**df_accuracy_new** - df_accuracy without extremely high or low values in the EPS Surprise column, so without outliers

**df_averages** - dataframe containing mean values of EPS Actual, EPS Mean and EPS Suprise over the entire time grouped by Instrument, takes df_accuracy_new as basis, so no outliers

**df_accuracy_yearly** - dataframe grouping quaterly datapoints into yearly data points

### 1) Summary statistics for EPS Actual, EPS Mean (forecast), and EPS Surprise

### Exploratory Data Analysis of Analyst Forcast Accuracy

In [2]:
accuracy_variables = ['TR.EPSactValue.date', 'TR.EPSActValue', "TR.EPSMean", "TR.EPSActSurprise"]
df_accuracy, e = ek.get_data('0#.SPX',accuracy_variables, parameters = {'SDate':'0','EDate':'-40','Period':'FQ0','Frq':'FQ'})
df_accuracy["Date"] = pd.to_datetime(df_accuracy["Date"])
df_accuracy = df_accuracy.dropna()
df_accuracy

Unnamed: 0,Instrument,Date,Earnings Per Share - Actual,Earnings Per Share - Mean,Earnings Per Share - Actual Surprise
0,POOL.OQ,2023-02-16 07:00:00+00:00,1.82,1.987,-8.405
1,POOL.OQ,2022-10-20 07:00:00+00:00,4.78,4.5875,4.196
2,POOL.OQ,2022-07-21 07:00:00+00:00,7.63,7.517,1.503
3,POOL.OQ,2022-04-21 07:00:00+00:00,4.23,3.14867,34.342
4,POOL.OQ,2022-02-17 07:00:00+00:00,2.63,1.875,40.267
...,...,...,...,...,...
20578,AVY.N,2014-01-31 08:30:00+00:00,0.69,0.68,1.471
20579,AVY.N,2013-10-25 08:30:00+00:00,0.69,0.63833,8.095
20580,AVY.N,2013-07-23 08:30:00+00:00,0.71,0.7025,1.068
20581,AVY.N,2013-04-24 08:30:00+00:00,0.59,0.57571,2.482


In [3]:
df_accuracy.describe()

Unnamed: 0,Earnings Per Share - Actual,Earnings Per Share - Mean,Earnings Per Share - Actual Surprise
count,19930.0,19930.0,19930.0
mean,1.382259,1.288404,190.969678
std,3.106907,2.898936,23391.75317
min,-16.43,-15.985,-8858.503
25%,0.5,0.463,0.468
50%,0.92,0.86763,4.548
75%,1.6,1.503225,12.249
max,133.441,126.76571,3297926.087


Min and max values seem very high. Next step is to check for outliers and remove them for better results:  

**Removing Outliers:**

In [4]:
#identifying outliers and replacing them with NA
summary_stats = df_accuracy["Earnings Per Share - Actual Surprise"].describe()
Q1 = summary_stats.loc['25%']
Q3 = summary_stats.loc['75%']
IQR = Q3 - Q1
threshold = 7 #1.5 is standard threshold but we still want to keep enough variation in the data so setting threshol higher here
surprise_outliers_removed = df_accuracy["Earnings Per Share - Actual Surprise"].loc[~((df_accuracy["Earnings Per Share - Actual Surprise"] < (Q1 - threshold * IQR)) | (df_accuracy["Earnings Per Share - Actual Surprise"] > (Q3 + threshold * IQR)))]
df_accuracy_new = df_accuracy.copy()
df_accuracy_new["Earnings Per Share - Actual Surprise"] = surprise_outliers_removed
df_accuracy_new

Unnamed: 0,Instrument,Date,Earnings Per Share - Actual,Earnings Per Share - Mean,Earnings Per Share - Actual Surprise
0,POOL.OQ,2023-02-16 07:00:00+00:00,1.82,1.987,-8.405
1,POOL.OQ,2022-10-20 07:00:00+00:00,4.78,4.5875,4.196
2,POOL.OQ,2022-07-21 07:00:00+00:00,7.63,7.517,1.503
3,POOL.OQ,2022-04-21 07:00:00+00:00,4.23,3.14867,34.342
4,POOL.OQ,2022-02-17 07:00:00+00:00,2.63,1.875,40.267
...,...,...,...,...,...
20578,AVY.N,2014-01-31 08:30:00+00:00,0.69,0.68,1.471
20579,AVY.N,2013-10-25 08:30:00+00:00,0.69,0.63833,8.095
20580,AVY.N,2013-07-23 08:30:00+00:00,0.71,0.7025,1.068
20581,AVY.N,2013-04-24 08:30:00+00:00,0.59,0.57571,2.482


In [5]:
na_count = df_accuracy_new["Earnings Per Share - Actual Surprise"].isna().sum()
na_count

800

--> deteceted outliers at the given threshold

In [6]:
#removing rows with NA (outliers)
df_accuracy_new = df_accuracy_new.dropna()
df_accuracy_new

Unnamed: 0,Instrument,Date,Earnings Per Share - Actual,Earnings Per Share - Mean,Earnings Per Share - Actual Surprise
0,POOL.OQ,2023-02-16 07:00:00+00:00,1.82,1.987,-8.405
1,POOL.OQ,2022-10-20 07:00:00+00:00,4.78,4.5875,4.196
2,POOL.OQ,2022-07-21 07:00:00+00:00,7.63,7.517,1.503
3,POOL.OQ,2022-04-21 07:00:00+00:00,4.23,3.14867,34.342
4,POOL.OQ,2022-02-17 07:00:00+00:00,2.63,1.875,40.267
...,...,...,...,...,...
20578,AVY.N,2014-01-31 08:30:00+00:00,0.69,0.68,1.471
20579,AVY.N,2013-10-25 08:30:00+00:00,0.69,0.63833,8.095
20580,AVY.N,2013-07-23 08:30:00+00:00,0.71,0.7025,1.068
20581,AVY.N,2013-04-24 08:30:00+00:00,0.59,0.57571,2.482


In [70]:
df_accuracy_new.describe()

Unnamed: 0,Earnings Per Share - Actual,Earnings Per Share - Mean,Earnings Per Share - Actual Surprise
count,19129.0,19129.0,19129.0
mean,1.423885,1.333085,6.929393
std,3.151161,2.948303,17.084754
min,-16.43,-15.985,-81.69
25%,0.53,0.50207,0.506
50%,0.95,0.89964,4.376
75%,1.63,1.53832,11.482
max,133.441,126.76571,94.748


### 2) Forecast error distribution

**Surprise Distribution - with outliers**

(commented it out because it's not that relevant but in case we want to look at it)

In [95]:
'''
fig = px.histogram(df_accuracy, x="Earnings Per Share - Actual Surprise", nbins=1000, title="EPS Surprise (%) Distribution (with outliers)")
fig.update_layout(yaxis=dict(tickformat=".2%"))
fig.show()
'''

'\nfig = px.histogram(df_accuracy, x="Earnings Per Share - Actual Surprise", nbins=1000, title="EPS Surprise (%) Distribution (with outliers)")\nfig.update_layout(yaxis=dict(tickformat=".2%"))\nfig.show()\n'

**Surprise Distribution - without outliers**

In [7]:
fig = px.histogram(df_accuracy_new, x="Earnings Per Share - Actual Surprise", nbins=1000, title="EPS Surprise (%) Distribution (outliers removed)")
fig.update_layout(yaxis=dict(tickformat=".2%"))
fig.show()

### 3) Average EPS Actual, EPS Mean and EPS Surprise per Instrument for our selected time period

In [8]:
df_averages = df_accuracy_new.groupby("Instrument").mean()
df_averages


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



Unnamed: 0_level_0,Earnings Per Share - Actual,Earnings Per Share - Mean,Earnings Per Share - Actual Surprise
Instrument,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A.N,0.77122,0.719961,7.255585
AAL.OQ,0.257,0.187633,6.460925
AAP.N,1.98878,1.966271,1.769024
AAPL.OQ,0.792457,0.736867,7.065512
ABBV.N,1.87625,1.824545,3.27955
...,...,...,...
YUM.N,0.87561,0.84134,5.429
ZBH.N,1.7895,1.727077,4.5081
ZBRA.OQ,2.389756,2.252027,5.404537
ZION.OQ,0.85825,0.774196,9.387575


Summary statistics per instrument

In [9]:
df_averages.describe()

Unnamed: 0,Earnings Per Share - Actual,Earnings Per Share - Mean,Earnings Per Share - Actual Surprise
count,502.0,502.0,502.0
mean,1.382178,1.294232,6.970454
std,2.42022,2.275579,6.357761
min,-0.291842,-0.307703,-54.155
25%,0.602805,0.577975,3.882537
50%,1.012051,0.954061,6.00572
75%,1.589085,1.492998,9.504716
max,46.97222,44.416074,48.632652


In [10]:
# calculate the mean of forecast errors for each company at each quarter
df_mean = df_accuracy_new.groupby(['Instrument', pd.Grouper(key='Date', freq='Q')])['Earnings Per Share - Actual Surprise'].mean().reset_index()

# create a line chart for each company
fig = go.Figure()

for company in df_mean['Instrument'].unique():
    # filter the data for each company
    df_company = df_mean[df_mean['Instrument'] == company]
    
    # add the line chart for the company
    fig.add_trace(go.Scatter(x=df_company['Date'], y=df_company['Earnings Per Share - Actual Surprise'],
                             mode='lines', name=company))

# set the chart title and axis labels
fig.update_layout(title='Quarterly Forecast Error by Company',
                   xaxis_title='Date', yaxis_title='Earnings Per Share - Actual Surprise')

# show the chart
fig.show()

### 4) Number of Instruments per surprise percentile range for each year

**df_accuracy_yearly** - df with outliers  
**df_accuracy_yearly_new** - df without outliers

In [11]:
# grouping data by year
df_accuracy_yearly, e = ek.get_data('0#.SPX', accuracy_variables, parameters = {'SDate':'0','EDate':'-10','Period':'FY0','Frq':'FY'})
df_accuracy_yearly["Date"] = pd.to_datetime(df_accuracy_yearly["Date"]).dt.year
df_accuracy_yearly = df_accuracy_yearly.dropna()
#df_accuracy_yearly

In [12]:
# removing outliers from yearly df
df_accuracy_yearly_new = df_accuracy_yearly.copy()
df_accuracy_yearly_new["Date"] = pd.to_datetime(df_accuracy_yearly_new["Date"]).dt.year
summary_stats_yearly = df_accuracy_yearly_new["Earnings Per Share - Actual Surprise"].describe()
Q1 = summary_stats_yearly.loc['25%']
Q3 = summary_stats_yearly.loc['75%']
IQR = Q3 - Q1
threshold2 = 7 #1.5 standard
surprise_outliers_removed2 = df_accuracy_yearly_new["Earnings Per Share - Actual Surprise"].loc[~((df_accuracy_yearly_new["Earnings Per Share - Actual Surprise"] < (Q1 - threshold2 * IQR)) | (df_accuracy_yearly_new["Earnings Per Share - Actual Surprise"] > (Q3 + threshold2 * IQR)))]
df_accuracy_yearly_new = df_accuracy.copy()
df_accuracy_yearly_new["Earnings Per Share - Actual Surprise"] = surprise_outliers_removed2
df_accuracy_yearly_new = df_accuracy_yearly_new.dropna()
df_accuracy_yearly_new["Date"] = pd.to_datetime(df_accuracy_yearly_new["Date"]).dt.year
#df_accuracy_yearly_new

**with** outliers - number of Instruments per surprise percentile group for each year

In [13]:
bins = [-10000,-100, -50, -20, -10, -5, 0, 5, 10, 20, 30, 40, 50, 60, 80, 100, 10000]
# Group the data by year and calculate the percentile counts for each year
df_percentiles = pd.DataFrame(index=range(df_accuracy_yearly["Date"].min(), df_accuracy_yearly["Date"].max()+1),
                              columns=[f"{bins[i]}-{bins[i+1]}" for i in range(len(bins)-1)])
for year in df_percentiles.index:
    df_year = df_accuracy_yearly[df_accuracy_yearly["Date"] == year]
    percentile_counts = pd.cut(df_year["Earnings Per Share - Actual Surprise"], bins=bins, labels=df_percentiles.columns).value_counts().sort_index()
    df_percentiles.loc[year] = percentile_counts.values
df_percentiles

Unnamed: 0,-10000--100,-100--50,-50--20,-20--10,-10--5,-5-0,0-5,5-10,10-20,20-30,30-40,40-50,50-60,60-80,80-100,100-10000
2012,1,0,0,2,2,27,79,3,3,0,0,0,0,0,0,0
2013,3,1,4,5,8,102,268,35,12,7,4,0,2,2,2,5
2014,1,1,4,7,6,125,278,23,10,5,2,1,0,0,0,3
2015,1,3,5,5,10,126,274,26,11,8,1,2,0,1,0,3
2016,3,2,8,2,17,99,281,32,22,4,2,0,4,1,0,4
2017,3,4,8,6,11,101,293,34,13,2,3,2,2,0,0,1
2018,2,0,4,11,8,97,309,33,12,8,1,0,1,2,0,1
2019,3,1,6,10,7,122,298,26,14,1,1,2,0,1,0,3
2020,2,2,6,4,9,98,300,35,22,6,4,3,0,1,2,3
2021,2,5,9,10,12,72,258,65,40,11,5,2,0,1,3,5


In [14]:
# Melt the DataFrame to long format
df_percentiles_plot = df_percentiles.reset_index().melt(id_vars="index", var_name="percentile", value_name="count")

# Create line plot
fig = px.line(df_percentiles_plot, x="index", y="count", color="percentile")

# Set plot title and axis labels
fig.update_layout(title="Development of EPS Surprise Percentiles",
                   xaxis_title="Year",
                   yaxis_title="Count")

# Show plot
fig.show()

**without** outliers - number of Instruments per surprise percentile group for each year

In [15]:
bins2 = [-50, -20, -10, -5, - 3, -2, -1, 0, 1, 2, 3, 5, 10, 20, 30, 40, 50]
# Group the data by year and calculate the percentile counts for each year
df_percentiles2 = pd.DataFrame(index=range(df_accuracy_yearly_new["Date"].min(), df_accuracy_yearly_new["Date"].max()+1),
                              columns=[f"{bins2[i]}-{bins2[i+1]}" for i in range(len(bins2)-1)])
for year in df_percentiles2.index:
    df_year2 = df_accuracy_yearly_new[df_accuracy_yearly_new["Date"] == year]
    percentile_counts2 = pd.cut(df_year2["Earnings Per Share - Actual Surprise"], bins=bins2, labels=df_percentiles2.columns).value_counts().sort_index()
    df_percentiles2.loc[year] = percentile_counts2.values

df_percentiles2

Unnamed: 0,-50--20,-20--10,-10--5,-5--3,-3--2,-2--1,-1-0,0-1,1-2,2-3,3-5,5-10,10-20,20-30,30-40,40-50
2012,0,0,0,0,2,0,3,5,2,1,1,0,0,0,0,0
2013,0,7,8,10,9,16,65,122,75,43,48,33,16,0,0,0
2014,0,6,6,15,10,14,63,146,74,44,49,25,15,1,0,0
2015,0,5,7,9,10,18,69,139,91,44,48,32,14,1,0,0
2016,0,7,14,18,12,14,60,135,83,44,43,29,15,1,0,0
2017,0,6,11,15,15,25,68,97,75,42,62,24,13,1,0,0
2018,0,10,15,14,12,27,60,105,82,38,41,39,17,0,0,0
2019,0,12,14,13,15,21,69,117,68,51,52,32,19,2,0,0
2020,0,6,15,15,13,24,59,124,78,32,46,50,23,1,0,0
2021,0,9,10,9,21,26,56,132,77,46,38,38,18,2,0,0


In [16]:
# Melt the DataFrame to long format
df_percentiles_plot2 = df_percentiles2.reset_index().melt(id_vars="index", var_name="percentile", value_name="count")

# Create line plot
fig = px.line(df_percentiles_plot2, x="index", y="count", color="percentile")

# Set plot title and axis labels
fig.update_layout(title="Development of EPS Surprise Percentiles",
                   xaxis_title="Year",
                   yaxis_title="Count")

# Show plot
fig.show()

Heatmap for data without outliers

In [17]:
heatmap_trace = go.Heatmap(z=df_percentiles2.values,
                           x=df_percentiles2.columns,
                           y=df_percentiles2.index,
                           colorscale='Viridis')

# create the layout
layout = go.Layout(title='Earnings Surprise Percentiles',
                   xaxis_title='Percentile Range',
                   yaxis_title='Year')

# create the figure object and plot
fig = go.Figure(data=[heatmap_trace], layout=layout)
fig.show()

--> we can see that most forecast errors are close to 0 (in the range around 0) and that there is not much change over the years.

### 5) Yearly Average Forecast Error of S&P 500 Companies

In [18]:
df_accuracy_yearly

Unnamed: 0,Instrument,Date,Earnings Per Share - Actual,Earnings Per Share - Mean,Earnings Per Share - Actual Surprise
0,POOL.OQ,2023,18.7,18.78,-0.426
1,POOL.OQ,2022,15.92,15.19,4.806
2,POOL.OQ,2021,9.13,8.45,8.047
3,POOL.OQ,2020,6.4,6.34111,0.929
4,POOL.OQ,2019,5.62,5.65,-0.531
...,...,...,...,...,...
5528,AVY.N,2017,4.02,3.967,1.336
5529,AVY.N,2016,3.44,3.369,2.108
5530,AVY.N,2015,3.11,3.01875,3.023
5531,AVY.N,2014,2.68,2.664,0.601


**with** outliers - Yearly Average Forecast Error of S&P 500 Companies

In [19]:
# Group the data by year and calculate the mean error for each year
df_yearly_mean = df_accuracy_yearly.groupby("Date")["Earnings Per Share - Actual Surprise"].mean().reset_index()

# Create a line plot using Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_yearly_mean ["Date"], y=df_yearly_mean ["Earnings Per Share - Actual Surprise"], mode="lines", name="Average S&P500 Forecast Error"))

# Set the title and axis labels
fig.update_layout(title="Yearly Average Forecast Error of S&P 500 Companies",
                   xaxis_title="Year",
                   yaxis_title="Forecast Error (%)")
fig.show()

**without** outliers - Yearly Average Forecast Error of S&P 500 Companies

In [20]:
# Group the data by year and calculate the mean error for each year
df_yearly_mean2 = df_accuracy_yearly_new.groupby("Date")["Earnings Per Share - Actual Surprise"].mean().reset_index()

# Create a line plot using Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_yearly_mean2 ["Date"], y=df_yearly_mean2 ["Earnings Per Share - Actual Surprise"], mode="lines", name="Average S&P500 Forecast Error"))

# Set the title and axis labels
fig.update_layout(title="Yearly Average Forecast Error of S&P 500 Companies",
                   xaxis_title="Year",
                   yaxis_title="Forecast Error (%)")
fig.show()

--> forecast error is on average in the positive range and around +1.35%. Over the years this has been quite constant. A peak is visible in 2020

### 6) Quaterly Average Forecast Error of S&P 500 Companies

(without outliers)

In [21]:
# Group the data by quarter and calculate the mean error for each quarter
df_quaterly_mean = df_accuracy_new.groupby(pd.Grouper(key="Date", freq="Q"))["Earnings Per Share - Actual Surprise"].mean().reset_index()

# Create a line plot using Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_quaterly_mean["Date"], y= df_quaterly_mean["Earnings Per Share - Actual Surprise"], mode='lines', name='Average Forecast Error'))

# Set the title and axis labels
fig.update_layout(title='Quarterly Average Forecast Error of S&P 500 Companies',
                   xaxis_title='Quarter',
                   yaxis_title='Forecast Error (%)')
fig.show()

--> looking at the quaterly average forecast error, it is usually at around +5% - +7% percent. 2020 - 2022 the error was much higher, probably due to the pandemic and geo-political uncertainty.

### 7)

In [110]:
'''
# create a new column that indicates the sign of the forecast error
df_accuracy_new['Sign'] = pd.cut(df_accuracy_new['Earnings Per Share - Actual Surprise'], 
                    bins=[-float("inf"), 0, float("inf")], 
                    labels=['Negative', 'Positive'])

# create a pivot table that shows the count of each sign for each company and quarter
pivot = pd.pivot_table(df_accuracy_new, index=['Instrument', pd.Grouper(key='Date', freq='Q')], 
                       columns='Sign', values='Earnings Per Share - Actual Surprise', 
                       aggfunc='count', fill_value=0)

# normalize the data to get the proportion of each sign for each company and quarter
pivot = pivot.div(pivot.sum(axis=1), axis=0)

# add a 'Zero' column if it is not already present in the pivot table
if 'Zero' not in pivot.columns:
    pivot['Zero'] = 0

# create a stacked bar chart for each company
fig = go.Figure()

for company in pivot.index.get_level_values('Instrument').unique():
    # filter the data for each company
    df_company = pivot.loc[company]
    
    # add the stacked bar chart for the company
    fig.add_trace(go.Bar(x=df_company.index, y=df_company['Positive'],
                         name='Positive', marker_color='green'))
    fig.add_trace(go.Bar(x=df_company.index, y=df_company['Negative'],
                         name='Negative', marker_color='red'))
    fig.add_trace(go.Bar(x=df_company.index, y=df_company['Zero'],
                         name='Zero', marker_color='gray'))

# set the chart title and axis labels
fig.update_layout(title='Quarterly Forecast Error by Company',
                   xaxis_title='Date', yaxis_title='Proportion of Forecast Errors')

# stack the bars and show the chart
fig.update_layout(barmode='stack')
fig.show()
'''

'\n# create a new column that indicates the sign of the forecast error\ndf_accuracy_new[\'Sign\'] = pd.cut(df_accuracy_new[\'Earnings Per Share - Actual Surprise\'], \n                    bins=[-float("inf"), 0, float("inf")], \n                    labels=[\'Negative\', \'Positive\'])\n\n# create a pivot table that shows the count of each sign for each company and quarter\npivot = pd.pivot_table(df_accuracy_new, index=[\'Instrument\', pd.Grouper(key=\'Date\', freq=\'Q\')], \n                       columns=\'Sign\', values=\'Earnings Per Share - Actual Surprise\', \n                       aggfunc=\'count\', fill_value=0)\n\n# normalize the data to get the proportion of each sign for each company and quarter\npivot = pivot.div(pivot.sum(axis=1), axis=0)\n\n# add a \'Zero\' column if it is not already present in the pivot table\nif \'Zero\' not in pivot.columns:\n    pivot[\'Zero\'] = 0\n\n# create a stacked bar chart for each company\nfig = go.Figure()\n\nfor company in pivot.index.get_l

In [111]:
'''# calculate the error direction for each company and quarter
df_accuracy_new["error_direction"] = pd.cut(df_accuracy_new["Earnings Per Share - Actual Surprise"], bins=[-float('inf'), 0, float('inf')], labels=['negative', 'positive'])
df_accuracy_new["error_direction"] = df_accuracy_new["error_direction"].fillna('zero')

# reshape the data into a pivot table
pivot_df = df_accuracy_new.pivot(index="Date", columns="Instrument", values="Earnings Per Share - Actual Surprise")
#pivot_df = pivot_df.where(pd.notnull(pivot_df), None)

# replace NA values with None
pivot_df = pivot_df.replace(pd.NA, None)

# create the heatmap trace
heatmap_trace = go.Heatmap(z=pivot_df.values,
                           x=pivot_df.columns,
                           y=pivot_df.index,
                           colorscale="RdBu_r",
                           reversescale=True)

# create the layout
layout = go.Layout(title='Consistency of Forecast Error Direction over Time for S&P 500 Companies',
                   xaxis=dict(title='Company'),
                   yaxis=dict(title='Date'))

# create the figure object and plot
fig = go.Figure(data=[heatmap_trace], layout=layout)
fig.show()'''

'# calculate the error direction for each company and quarter\ndf_accuracy_new["error_direction"] = pd.cut(df_accuracy_new["Earnings Per Share - Actual Surprise"], bins=[-float(\'inf\'), 0, float(\'inf\')], labels=[\'negative\', \'positive\'])\ndf_accuracy_new["error_direction"] = df_accuracy_new["error_direction"].fillna(\'zero\')\n\n# reshape the data into a pivot table\npivot_df = df_accuracy_new.pivot(index="Date", columns="Instrument", values="Earnings Per Share - Actual Surprise")\n#pivot_df = pivot_df.where(pd.notnull(pivot_df), None)\n\n# replace NA values with None\npivot_df = pivot_df.replace(pd.NA, None)\n\n# create the heatmap trace\nheatmap_trace = go.Heatmap(z=pivot_df.values,\n                           x=pivot_df.columns,\n                           y=pivot_df.index,\n                           colorscale="RdBu_r",\n                           reversescale=True)\n\n# create the layout\nlayout = go.Layout(title=\'Consistency of Forecast Error Direction over Time for S&P 5