In [2]:
import pandas as pd
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

import numpy as np

from linearmodels.panel import PanelOLS
import datetime as dt
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
import seaborn as sns

In [3]:
#decide which df to use
df = pd.read_csv("economic_story_regression.csv")
#df = df.loc[:, ["Instrument", "Date", "Earnings Per Share - Actual Surprise", "Revenue - Actual", "Net Income after Tax"]]
X_var_names = ["Revenue - Actual", "Earnings Per Share - Actual Surprise", "Enterprise Value", "Earnings Per Share - Actual Surprise AbsVals", "Recommendation change", "Recommendation - Mean (1-5).1", "Recommendation - Mean (1-5)"]
df = df.drop(["Earnings Per Share - Actual Surprise"], axis=1)
df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format=True)
df

Unnamed: 0,Instrument,Date,Earnings Per Share - Actual Surprise AbsVals,GICS Industry Group Name,Earnings Per Share – Coefficient of Variation,Market Capitalization,Revenue - Actual,Number of Analysts,Recommendation - Mean (1-5),Recommendation change,...,Price Target - Standard Deviation,Enterprise Value To Sales (Daily Time Series Ratio),3 Month Total Return,Volume,loss firm status,CBOE Crude Oil ETF Volatility Index,90-Day AA Financial Commercial Paper Interest Rate,Inflation Risk Premium,"University of Michigan: Consumer Sentiment, Index 1966:Q1=100",Unemployment Rate
0,AVY.N,2013-01-01,11.178,Materials,3.625620,3.426001e+09,1.532200e+09,7.0,3.00000,0.00000,...,2.44949,0.799347,10.637479,207295.0,-1,22.538500,0.166000,0.359537,76.666667,7.733333
1,AVY.N,2013-04-01,2.482,Materials,2.046169,4.309428e+09,1.498900e+09,7.0,2.87500,-0.12500,...,3.92272,0.895787,24.148726,0.0,-1,23.468125,0.143750,0.360572,81.666667,7.533333
2,AVY.N,2013-07-01,1.068,Materials,1.551601,4.258229e+09,1.552300e+09,8.0,2.87500,0.00000,...,4.30946,0.906078,-0.054218,0.0,-1,24.099219,0.125625,0.463789,81.566667,7.233333
3,AVY.N,2013-10-01,8.095,Materials,2.775994,4.273414e+09,1.504900e+09,7.0,2.62500,-0.25000,...,3.45230,0.903176,2.467620,236291.0,-1,19.740938,0.125806,0.441854,76.933333,6.933333
4,AVY.N,2014-01-01,1.471,Materials,3.204412,4.877168e+09,1.583900e+09,8.0,2.33333,-0.29167,...,3.42555,0.920659,16.003852,157761.0,-1,19.563770,0.126230,0.465460,80.933333,6.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19196,POOL.OQ,2021-10-01,17.194,Retailing,2.836295,1.742020e+10,1.411448e+09,8.0,2.30000,0.00000,...,50.48823,3.684400,-5.130322,157740.0,-1,43.036308,0.140755,0.382645,69.900000,4.200000
19197,POOL.OQ,2022-01-01,40.267,Retailing,3.269867,2.268979e+10,1.035557e+09,8.0,2.30000,0.00000,...,41.33833,4.504382,30.489705,82349.0,-1,52.695806,0.467234,0.380946,63.133333,3.800000
19198,POOL.OQ,2022-04-01,34.342,Retailing,6.630736,1.696712e+10,1.412650e+09,9.0,2.00000,-0.30000,...,37.49206,3.422880,-25.160664,290897.0,-1,50.457581,1.394118,0.495588,57.866667,3.600000
19199,POOL.OQ,2022-07-01,1.503,Retailing,3.625249,1.405795e+10,2.055818e+09,9.0,2.20000,0.20000,...,57.57373,2.749481,-21.661570,185584.0,-1,49.861562,2.885882,0.423045,56.100000,3.566667


In [4]:
market_cap = df.loc[:, ["Instrument", "Market Capitalization", "Earnings Per Share - Actual Surprise AbsVals"]]
market_cap

Unnamed: 0,Instrument,Market Capitalization,Earnings Per Share - Actual Surprise AbsVals
0,AVY.N,3.426001e+09,11.178
1,AVY.N,4.309428e+09,2.482
2,AVY.N,4.258229e+09,1.068
3,AVY.N,4.273414e+09,8.095
4,AVY.N,4.877168e+09,1.471
...,...,...,...
19196,POOL.OQ,1.742020e+10,17.194
19197,POOL.OQ,2.268979e+10,40.267
19198,POOL.OQ,1.696712e+10,34.342
19199,POOL.OQ,1.405795e+10,1.503


In [5]:
market_cap.describe()


Unnamed: 0,Market Capitalization,Earnings Per Share - Actual Surprise AbsVals
count,19167.0,19201.0
mean,50688300000.0,16.701591
std,120877100000.0,36.689541
min,55158910.0,0.0
25%,11184620000.0,2.67
50%,20617730000.0,6.701
75%,43570740000.0,15.199
max,2892120000000.0,503.622


In [6]:
# bin the market capitalization into quantiles
market_cap['Quantile'] = pd.qcut(market_cap['Market Capitalization'], q=5)

# group by quantile and calculate the average market capitalization and EPS surprise
grouped_data1 = market_cap.groupby('Quantile').agg({'Market Capitalization': 'mean', 
                                                    'Earnings Per Share - Actual Surprise AbsVals': 'mean'})

print(grouped_data1)

                                    Market Capitalization  \
Quantile                                                    
(55158913.559, 9732329856.736]               6.610057e+09   
(9732329856.736, 16114044597.604]            1.277114e+10   
(16114044597.604, 26682703036.1]             2.087249e+10   
(26682703036.1, 55069141311.072]             3.775314e+10   
(55069141311.072, 2892119663160.0]           1.754136e+11   

                                    Earnings Per Share - Actual Surprise AbsVals  
Quantile                                                                          
(55158913.559, 9732329856.736]                                         21.355055  
(9732329856.736, 16114044597.604]                                      18.001306  
(16114044597.604, 26682703036.1]                                       17.397387  
(26682703036.1, 55069141311.072]                                       14.748064  
(55069141311.072, 2892119663160.0]                                     11.

In [7]:
grouped_data1['Quantile'] = grouped_data1.index.astype(str)

# create a bar plot of the mean EPS surprise by quantile
fig1 = px.bar(grouped_data1, x='Quantile', y='Earnings Per Share - Actual Surprise AbsVals',
             title='Mean EPS surprise by Market Capitalization Quantile')
fig1.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = [0,1,2,3,4],
        ticktext = ['Quantile 1', 'Quantile 2', 'Quantile 3', 'Quantile 4', 'Quantile 5']
    )
)
fig1.show()

In [8]:
revenue = df.loc[:, ["Instrument", "Revenue - Actual", "Earnings Per Share - Actual Surprise AbsVals"]]
revenue

Unnamed: 0,Instrument,Revenue - Actual,Earnings Per Share - Actual Surprise AbsVals
0,AVY.N,1.532200e+09,11.178
1,AVY.N,1.498900e+09,2.482
2,AVY.N,1.552300e+09,1.068
3,AVY.N,1.504900e+09,8.095
4,AVY.N,1.583900e+09,1.471
...,...,...,...
19196,POOL.OQ,1.411448e+09,17.194
19197,POOL.OQ,1.035557e+09,40.267
19198,POOL.OQ,1.412650e+09,34.342
19199,POOL.OQ,2.055818e+09,1.503


In [10]:
quantiles = [float('-inf'), 0] + [x/5 for x in range(1, 6)]
revenue['Quantile'] = pd.qcut(revenue['Revenue - Actual'], q=quantiles)

# group by quantile and calculate the average market capitalization and EPS surprise
grouped_data2 = revenue.groupby('Quantile').agg({'Revenue - Actual': 'mean', 
                                                 'Earnings Per Share - Actual Surprise AbsVals': 'mean'})

print(grouped_data2)


ValueError: Quantiles must be in the range [0, 1]

In [12]:
revenue_neg = revenue[revenue['Revenue - Actual'] < 0]
revenue_pos = revenue[revenue['Revenue - Actual'] >= 0]
quantiles = [0] + [i/5 for i in range(1,6)]
labels = ['Negative'] + [f'Quantile_{i}' for i in range(1,6)]

revenue_neg['Quantile'] = labels[0]
revenue_pos['Quantile'] = pd.qcut(revenue_pos['Revenue - Actual'], q=quantiles, labels=labels[1:])

revenue = pd.concat([revenue_neg, revenue_pos], axis=0)

# group by quantile and calculate the average market capitalization and EPS surprise
grouped_data2 = revenue.groupby('Quantile').agg({'Revenue - Actual': 'mean', 
                                                 'Earnings Per Share - Actual Surprise AbsVals': 'mean'})

print(grouped_data2)


            Revenue - Actual  Earnings Per Share - Actual Surprise AbsVals
Quantile                                                                  
Negative       -3.047563e+09                                     11.087000
Quantile_1      4.773233e+08                                     24.777043
Quantile_2      1.238090e+09                                     16.441647
Quantile_3      2.377549e+09                                     15.951241
Quantile_4      4.411557e+09                                     11.766686
Quantile_5      2.120203e+10                                     14.627472




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [29]:
eps_variation = df.loc[:, ["Instrument", "Earnings Per Share – Coefficient of Variation", "Earnings Per Share - Actual Surprise AbsVals"]]
eps_variation

Unnamed: 0,Instrument,Earnings Per Share – Coefficient of Variation,Earnings Per Share - Actual Surprise AbsVals
0,AVY.N,3.625620,11.178
1,AVY.N,2.046169,2.482
2,AVY.N,1.551601,1.068
3,AVY.N,2.775994,8.095
4,AVY.N,3.204412,1.471
...,...,...,...
19196,POOL.OQ,2.836295,17.194
19197,POOL.OQ,3.269867,40.267
19198,POOL.OQ,6.630736,34.342
19199,POOL.OQ,3.625249,1.503


In [30]:
eps_variation.describe()

Unnamed: 0,Earnings Per Share – Coefficient of Variation,Earnings Per Share - Actual Surprise AbsVals
count,19201.0,19201.0
mean,5.705246,16.701591
std,32.707179,36.689541
min,-1025.0,0.0
25%,1.874267,2.67
50%,3.275962,6.701
75%,6.572211,15.199
max,1642.812172,503.622


In [65]:
# filter dataframe to only include negative values
negative_eps_variation = eps_variation[eps_variation['Earnings Per Share – Coefficient of Variation'] < 0]

# bin the remaining positive values into quantiles
positive_eps_variation = eps_variation[eps_variation['Earnings Per Share – Coefficient of Variation'] >= 0]
positive_quantiles = pd.qcut(positive_eps_variation['Earnings Per Share – Coefficient of Variation'], q=4, retbins=True)[1]

# combine the labels for the negative and positive quantiles
quantile_labels = ['Negative Range'] + [f'Quantile {i+1}' for i in range(4)] + ['Max Value']

# combine the quantile bins and labels
bins = [-np.inf] + list(np.round(positive_quantiles, 6)) + [positive_eps_variation['Earnings Per Share – Coefficient of Variation'].max()]
eps_variation_new = eps_variation.copy()
#eps_variation_new['Quantile'] = pd.cut(eps_variation_new['Earnings Per Share – Coefficient of Variation'], bins=bins, labels=quantile_labels)
eps_variation_new['Quantile'] = pd.cut(eps_variation_new['Earnings Per Share – Coefficient of Variation'], bins=bins)

# group by quantile and calculate the average market capitalization and EPS surprise
grouped_data3 = eps_variation_new.groupby('Quantile').agg({'Earnings Per Share – Coefficient of Variation': 'mean', 
                                                    'Earnings Per Share - Actual Surprise AbsVals': 'mean',
                                                    'Instrument': 'count'})

# calculate the percentage of companies in the negative range
percentage_negative_range = len(negative_eps_variation)/len(eps_variation_new)*100

# calculate the composition percentage for each quantile
total_count = grouped_data3['Instrument'].sum()
grouped_data3['Composition Percentage'] = grouped_data3['Instrument']/total_count*100

# print results
print(f"Percentage of companies in negative range: {percentage_negative_range:.2f}%")
print(grouped_data3[['Earnings Per Share – Coefficient of Variation', 'Earnings Per Share - Actual Surprise AbsVals', 'Composition Percentage']])


Percentage of companies in negative range: 3.08%
                             Earnings Per Share – Coefficient of Variation  \
Quantile                                                                     
(-inf, 0.0]                                                     -57.249475   
(0.0, 1.973222]                                                   1.358495   
(1.973222, 3.38495]                                               2.627875   
(3.38495, 6.74026]                                                4.744025   
(6.74026, 1642.812172]                                           22.241449   
(1642.812172, 1642.8121721]                                    1642.812172   

                             Earnings Per Share - Actual Surprise AbsVals  \
Quantile                                                                    
(-inf, 0.0]                                                     62.499997   
(0.0, 1.973222]                                                  4.641556   
(1.973222, 3.38495

In [None]:
#eps_variation_new['Quantile'] = pd.cut(eps_variation_new['Earnings Per Share – Coefficient of Variation'], bins=bins, labels=quantile_labels)

In [67]:
grouped_data3['Quantile'] = grouped_data3.index.astype(str)

grouped_data3 = grouped_data3.iloc[:-1]

# create a bar plot of the mean EPS surprise by quantile
fig3 = px.bar(grouped_data3, x='Quantile', y='Earnings Per Share - Actual Surprise AbsVals',
             title='Mean EPS - Coefficient of Variation')
fig3.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = [0,1,2,3,4,5,6,7],
        ticktext = ['Quantile 1', 'Quantile 2', 'Quantile 3', 'Quantile 4', 'Quantile 5','Quantile 6',
                    'Quantile 7', 'Quantile 8']
    )
)
fig3.show()