In [33]:
import pandas as pd
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

import numpy as np

from linearmodels.panel import PanelOLS
import datetime as dt
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
import seaborn as sns

In [1]:
#decide which df to use
df = pd.read_csv("Dataframes/economic_story_regression.csv")
#df = df.loc[:, ["Instrument", "Date", "Earnings Per Share - Actual Surprise", "Revenue - Actual", "Net Income after Tax"]]
X_var_names = ["Revenue - Actual", "Earnings Per Share - Actual Surprise", "Enterprise Value", "Earnings Per Share - Actual Surprise AbsVals", "Recommendation change", "Recommendation - Mean (1-5).1", "Recommendation - Mean (1-5)"]
df = df.drop(["Earnings Per Share - Actual Surprise"], axis=1)
df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format=True)
df

NameError: name 'pd' is not defined

In [19]:
market_cap = df.loc[:, ["Instrument", "Market Capitalization", "Earnings Per Share - Actual Surprise AbsVals"]]
market_cap

Unnamed: 0,Instrument,Market Capitalization,Earnings Per Share - Actual Surprise AbsVals
0,AVY.N,3.426001e+09,11.178
1,AVY.N,4.309428e+09,2.482
2,AVY.N,4.258229e+09,1.068
3,AVY.N,4.273414e+09,8.095
4,AVY.N,4.877168e+09,1.471
...,...,...,...
19196,POOL.OQ,1.742020e+10,17.194
19197,POOL.OQ,2.268979e+10,40.267
19198,POOL.OQ,1.696712e+10,34.342
19199,POOL.OQ,1.405795e+10,1.503


In [20]:
market_cap.describe()


Unnamed: 0,Market Capitalization,Earnings Per Share - Actual Surprise AbsVals
count,19167.0,19201.0
mean,50688300000.0,16.701591
std,120877100000.0,36.689541
min,55158910.0,0.0
25%,11184620000.0,2.67
50%,20617730000.0,6.701
75%,43570740000.0,15.199
max,2892120000000.0,503.622


In [36]:
# bin the market capitalization into quantiles
market_cap['Quantile'] = pd.qcut(market_cap['Market Capitalization'], q=5)

# group by quantile and calculate the average market capitalization and EPS surprise
grouped_data1 = market_cap.groupby('Quantile').agg({'Market Capitalization': 'mean', 
                                                    'Earnings Per Share - Actual Surprise AbsVals': 'mean'})

print(grouped_data1)

                                    Market Capitalization  \
Quantile                                                    
(55158913.559, 9732329856.736]               6.610057e+09   
(9732329856.736, 16114044597.604]            1.277114e+10   
(16114044597.604, 26682703036.1]             2.087249e+10   
(26682703036.1, 55069141311.072]             3.775314e+10   
(55069141311.072, 2892119663160.0]           1.754136e+11   

                                    Earnings Per Share - Actual Surprise AbsVals  
Quantile                                                                          
(55158913.559, 9732329856.736]                                         21.355055  
(9732329856.736, 16114044597.604]                                      18.001306  
(16114044597.604, 26682703036.1]                                       17.397387  
(26682703036.1, 55069141311.072]                                       14.748064  
(55069141311.072, 2892119663160.0]                                     11.

In [38]:
grouped_data1['Quantile'] = grouped_data1.index.astype(str)

# create a bar plot of the mean EPS surprise by quantile
fig1 = px.bar(grouped_data1, x='Quantile', y='Earnings Per Share - Actual Surprise AbsVals',
             title='Mean EPS surprise by Market Capitalization Quantile')
fig1.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = [0,1,2,3,4],
        ticktext = ['Quantile 1', 'Quantile 2', 'Quantile 3', 'Quantile 4', 'Quantile 5']
    )
)
fig1.show()

In [22]:
revenue = df.loc[:, ["Instrument", "Revenue - Actual", "Earnings Per Share - Actual Surprise AbsVals"]]
revenue

Unnamed: 0,Instrument,Revenue - Actual,Earnings Per Share - Actual Surprise AbsVals
0,AVY.N,1.532200e+09,11.178
1,AVY.N,1.498900e+09,2.482
2,AVY.N,1.552300e+09,1.068
3,AVY.N,1.504900e+09,8.095
4,AVY.N,1.583900e+09,1.471
...,...,...,...
19196,POOL.OQ,1.411448e+09,17.194
19197,POOL.OQ,1.035557e+09,40.267
19198,POOL.OQ,1.412650e+09,34.342
19199,POOL.OQ,2.055818e+09,1.503


In [23]:
# bin the market capitalization into quantiles
revenue['Quantile'] = pd.qcut(revenue['Revenue - Actual'], q=5)

# group by quantile and calculate the average market capitalization and EPS surprise
grouped_data2 = revenue.groupby('Quantile').agg({'Revenue - Actual': 'mean', 
                                                    'Earnings Per Share - Actual Surprise AbsVals': 'mean'})

print(grouped_data2)

                                Revenue - Actual  \
Quantile                                           
(-9010000000.001, 815700000.0]      4.743002e+08   
(815700000.0, 1708000000.0]         1.237760e+09   
(1708000000.0, 3128000000.0]        2.377353e+09   
(3128000000.0, 6502000000.0]        4.411223e+09   
(6502000000.0, 152871000000.0]      2.120203e+10   

                                Earnings Per Share - Actual Surprise AbsVals  
Quantile                                                                      
(-9010000000.001, 815700000.0]                                     24.770588  
(815700000.0, 1708000000.0]                                        16.443912  
(1708000000.0, 3128000000.0]                                       15.952560  
(3128000000.0, 6502000000.0]                                       11.766459  
(6502000000.0, 152871000000.0]                                     14.627472  


In [39]:
grouped_data2['Quantile'] = grouped_data2.index.astype(str)

# create a bar plot of the mean EPS surprise by quantile
fig2 = px.bar(grouped_data2, x='Quantile', y='Earnings Per Share - Actual Surprise AbsVals',
             title='Mean EPS surprise by Revenue - Actual')
fig2.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = [0,1,2,3,4],
        ticktext = ['Quantile 1', 'Quantile 2', 'Quantile 3', 'Quantile 4', 'Quantile 5']
    )
)
fig2.show()

In [24]:
eps_variation = df.loc[:, ["Instrument", "Earnings Per Share – Coefficient of Variation", "Earnings Per Share - Actual Surprise AbsVals"]]
eps_variation

Unnamed: 0,Instrument,Earnings Per Share – Coefficient of Variation,Earnings Per Share - Actual Surprise AbsVals
0,AVY.N,3.625620,11.178
1,AVY.N,2.046169,2.482
2,AVY.N,1.551601,1.068
3,AVY.N,2.775994,8.095
4,AVY.N,3.204412,1.471
...,...,...,...
19196,POOL.OQ,2.836295,17.194
19197,POOL.OQ,3.269867,40.267
19198,POOL.OQ,6.630736,34.342
19199,POOL.OQ,3.625249,1.503


In [25]:
eps_variation.describe()

Unnamed: 0,Earnings Per Share – Coefficient of Variation,Earnings Per Share - Actual Surprise AbsVals
count,19201.0,19201.0
mean,5.705246,16.701591
std,32.707179,36.689541
min,-1025.0,0.0
25%,1.874267,2.67
50%,3.275962,6.701
75%,6.572211,15.199
max,1642.812172,503.622


In [29]:
# bin the market capitalization into quantiles
eps_variation['Quantile'] = pd.qcut(eps_variation['Earnings Per Share – Coefficient of Variation'], q=8)

# group by quantile and calculate the average market capitalization and EPS surprise
grouped_data3 = eps_variation.groupby('Quantile').agg({'Earnings Per Share – Coefficient of Variation': 'mean', 
                                                    'Earnings Per Share - Actual Surprise AbsVals': 'mean'})

print(grouped_data3)

                    Earnings Per Share – Coefficient of Variation  \
Quantile                                                            
(-1025.001, 1.251]                                     -14.362177   
(1.251, 1.874]                                           1.572522   
(1.874, 2.498]                                           2.173809   
(2.498, 3.276]                                           2.865207   
(3.276, 4.444]                                           3.809260   
(4.444, 6.572]                                           5.387865   
(6.572, 11.645]                                          8.573797   
(11.645, 1642.812]                                      35.630047   

                    Earnings Per Share - Actual Surprise AbsVals  
Quantile                                                          
(-1025.001, 1.251]                                     19.329337  
(1.251, 1.874]                                          4.971833  
(1.874, 2.498]                           

In [41]:
grouped_data3['Quantile'] = grouped_data3.index.astype(str)

# create a bar plot of the mean EPS surprise by quantile
fig3 = px.bar(grouped_data3, x='Quantile', y='Earnings Per Share - Actual Surprise AbsVals',
             title='Mean EPS - Coefficient of Variation')
fig3.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = [0,1,2,3,4,5,6,7],
        ticktext = ['Quantile 1', 'Quantile 2', 'Quantile 3', 'Quantile 4', 'Quantile 5','Quantile 6',
                    'Quantile 7', 'Quantile 8']
    )
)
fig3.show()