# Panle Regression - Firm Characteristics

### Random Effects Panel Regression

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import sklearn
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from linearmodels.panel import PanelOLS
from linearmodels.panel import RandomEffects


Data

In [2]:
df = pd.read_csv("Dataframes/macro_2.csv")
#X_var_names = ["Inflation Premium(%)", "90-Day AA Financial Commercial Paper Interest Rate","CBOE Crude Oil ETF Volatility Index", "Consumer Sentiment", "Unemployment rate"]
df["Earnings Per Share - Actual Surprise AbsVals"] = df["Earnings Per Share - Actual Surprise"].abs()
df = df.loc[:, ["Instrument", "Date", "GICS Industry Group Name", "Earnings Per Share - Actual Surprise", "Earnings Per Share - Actual Surprise AbsVals", "WACC Inflation Adjusted Risk Free Rate, (%)", "Inflation Premium(%)", "90-Day AA Financial Commercial Paper Interest Rate", "CPFFM", "CBOE Crude Oil ETF Volatility Index", "Consumer Sentiment", "Unemployment rate"]]
df

Unnamed: 0,Instrument,Date,GICS Industry Group Name,Earnings Per Share - Actual Surprise,Earnings Per Share - Actual Surprise AbsVals,"WACC Inflation Adjusted Risk Free Rate, (%)",Inflation Premium(%),90-Day AA Financial Commercial Paper Interest Rate,CPFFM,CBOE Crude Oil ETF Volatility Index,Consumer Sentiment,Unemployment rate
0,AVY.N,2013-01-01,Materials,11.178,11.178,,0.359537,0.17,0.02,22.54,76.67,7.733333
1,AVY.N,2013-04-01,Materials,2.482,2.482,,0.360572,0.14,0.03,23.47,81.67,7.533333
2,AVY.N,2013-07-01,Materials,1.068,1.068,,0.463789,0.13,0.04,24.10,81.57,7.233333
3,AVY.N,2013-10-01,Materials,8.095,8.095,,0.441854,0.13,0.04,19.74,76.93,6.933333
4,AVY.N,2014-01-01,Materials,1.471,1.471,,0.465460,0.13,0.05,19.56,80.93,6.666667
...,...,...,...,...,...,...,...,...,...,...,...,...
20115,POOL.OQ,2021-10-01,Retailing,17.194,17.194,1.527139,0.382645,0.14,0.06,43.04,69.90,4.200000
20116,POOL.OQ,2022-01-01,Retailing,40.267,40.267,1.515266,0.380946,0.47,0.32,52.70,63.13,3.800000
20117,POOL.OQ,2022-04-01,Retailing,34.342,34.342,2.325202,0.495588,1.39,0.61,50.46,57.87,3.600000
20118,POOL.OQ,2022-07-01,Retailing,1.503,1.503,3.092855,0.423045,2.89,0.64,49.86,56.10,3.566667


In [3]:
df["Date"] = pd.to_datetime(df["Date"])

Remove outliers

In [4]:
#removing outliers
summary_stats = df["Earnings Per Share - Actual Surprise"].describe()
Q1 = summary_stats.loc['25%']
Q3 = summary_stats.loc['75%']
IQR = Q3 - Q1
threshold = 7
surprise_outliers_removed = df["Earnings Per Share - Actual Surprise"].loc[~((df["Earnings Per Share - Actual Surprise"] < (Q1 - threshold * IQR)) | (df["Earnings Per Share - Actual Surprise"] > (Q3 + threshold * IQR)))]
df_clean = df.copy()
df_clean["Earnings Per Share - Actual Surprise"] = surprise_outliers_removed
df_clean = df_clean.dropna(subset=["Earnings Per Share - Actual Surprise"])
#df_clean = df_clean.dropna()
df_clean

Unnamed: 0,Instrument,Date,GICS Industry Group Name,Earnings Per Share - Actual Surprise,Earnings Per Share - Actual Surprise AbsVals,"WACC Inflation Adjusted Risk Free Rate, (%)",Inflation Premium(%),90-Day AA Financial Commercial Paper Interest Rate,CPFFM,CBOE Crude Oil ETF Volatility Index,Consumer Sentiment,Unemployment rate
0,AVY.N,2013-01-01,Materials,11.178,11.178,,0.359537,0.17,0.02,22.54,76.67,7.733333
1,AVY.N,2013-04-01,Materials,2.482,2.482,,0.360572,0.14,0.03,23.47,81.67,7.533333
2,AVY.N,2013-07-01,Materials,1.068,1.068,,0.463789,0.13,0.04,24.10,81.57,7.233333
3,AVY.N,2013-10-01,Materials,8.095,8.095,,0.441854,0.13,0.04,19.74,76.93,6.933333
4,AVY.N,2014-01-01,Materials,1.471,1.471,,0.465460,0.13,0.05,19.56,80.93,6.666667
...,...,...,...,...,...,...,...,...,...,...,...,...
20115,POOL.OQ,2021-10-01,Retailing,17.194,17.194,1.527139,0.382645,0.14,0.06,43.04,69.90,4.200000
20116,POOL.OQ,2022-01-01,Retailing,40.267,40.267,1.515266,0.380946,0.47,0.32,52.70,63.13,3.800000
20117,POOL.OQ,2022-04-01,Retailing,34.342,34.342,2.325202,0.495588,1.39,0.61,50.46,57.87,3.600000
20118,POOL.OQ,2022-07-01,Retailing,1.503,1.503,3.092855,0.423045,2.89,0.64,49.86,56.10,3.566667


In [None]:
#removing oly 99th percentile outliers
'''
# Calculate the 99th percentile
pct_99 = np.percentile(df['Earnings Per Share - Actual Surprise'], 99)

# Remove the data points above the 99th percentile
df_clean = df[df['Earnings Per Share - Actual Surprise'] <= pct_99].copy()

df_clean
'''

Standardising

get wierd results when standardising

In [5]:
rescale = df_clean
#rescale["Earnings Per Share - Actual Surprise"] = MinMaxScaler().fit_transform(np.array(rescale["Earnings Per Share - Actual Surprise"]).reshape(-1,1))
rescale["WACC Inflation Adjusted Risk Free Rate, (%)"] = MinMaxScaler().fit_transform(np.array(rescale["WACC Inflation Adjusted Risk Free Rate, (%)"]).reshape(-1,1))
rescale["Inflation Premium(%)"] = MinMaxScaler().fit_transform(np.array(rescale["Inflation Premium(%)"]).reshape(-1,1))
rescale["90-Day AA Financial Commercial Paper Interest Rate"] = MinMaxScaler().fit_transform(np.array(rescale["90-Day AA Financial Commercial Paper Interest Rate"]).reshape(-1,1))
rescale["CPFFM"] = MinMaxScaler().fit_transform(np.array(rescale["CPFFM"]).reshape(-1,1))
rescale["CBOE Crude Oil ETF Volatility Index"] = MinMaxScaler().fit_transform(np.array(rescale["CBOE Crude Oil ETF Volatility Index"]).reshape(-1,1))
rescale["Consumer Sentiment"] = MinMaxScaler().fit_transform(np.array(rescale["Consumer Sentiment"]).reshape(-1,1))
rescale["Unemployment rate"] = MinMaxScaler().fit_transform(np.array(rescale["Unemployment rate"]).reshape(-1,1))
df_clean = rescale
df_clean

Unnamed: 0,Instrument,Date,GICS Industry Group Name,Earnings Per Share - Actual Surprise,Earnings Per Share - Actual Surprise AbsVals,"WACC Inflation Adjusted Risk Free Rate, (%)",Inflation Premium(%),90-Day AA Financial Commercial Paper Interest Rate,CPFFM,CBOE Crude Oil ETF Volatility Index,Consumer Sentiment,Unemployment rate
0,AVY.N,2013-01-01,Materials,11.178,11.178,,0.552998,0.016949,0.150685,0.058466,0.480271,0.443262
1,AVY.N,2013-04-01,Materials,2.482,2.482,,0.555632,0.009685,0.164384,0.069128,0.597011,0.421986
2,AVY.N,2013-07-01,Materials,1.068,1.068,,0.818318,0.007264,0.178082,0.076350,0.594677,0.390071
3,AVY.N,2013-10-01,Materials,8.095,8.095,,0.762495,0.007264,0.178082,0.026367,0.486341,0.358156
4,AVY.N,2014-01-01,Materials,1.471,1.471,,0.822572,0.007264,0.191781,0.024304,0.579734,0.329787
...,...,...,...,...,...,...,...,...,...,...,...,...
20115,POOL.OQ,2021-10-01,Retailing,17.194,17.194,0.456602,0.611807,0.009685,0.205479,0.293477,0.322204,0.067376
20116,POOL.OQ,2022-01-01,Retailing,40.267,40.267,0.453705,0.607483,0.089588,0.561644,0.404219,0.164137,0.024823
20117,POOL.OQ,2022-04-01,Retailing,34.342,34.342,0.651306,0.899246,0.312349,0.958904,0.378539,0.041326,0.003546
20118,POOL.OQ,2022-07-01,Retailing,1.503,1.503,0.838591,0.714626,0.675545,1.000000,0.371661,0.000000,0.000000


In [6]:
df_panel = df_clean.copy()

#### Fixed Effects

In [7]:
df_clean['Instrument'] = df_clean['Instrument'].astype('category')
df_clean['Date'] = pd.to_datetime(df_clean['Date'])

# set the index to be the time variable and the cross-sectional variable
df_clean.set_index(['Instrument', 'Date'], inplace=True)

Actual Surprise Values

In [11]:
y = df_clean.loc[:, "Earnings Per Share - Actual Surprise"]
X = df_clean.loc[:, "Inflation Premium(%)":]

# perform the fixed effects panel regression
fixed_effects_model = PanelOLS(y, X, entity_effects=True, time_effects=True)

# fit the model and print the summary statistics
fixed_effects_results = fixed_effects_model.fit()
fixed_effects_results.summary

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


0,1,2,3
Dep. Variable:,Earnings Per Share - Actual Surprise,R-squared:,2.715e-07
Estimator:,PanelOLS,R-squared (Between):,-5.534e+22
No. Observations:,18608,R-squared (Within):,-2.221e+21
Date:,"Thu, Mar 02 2023",R-squared (Overall):,-1.662e+22
Time:,18:41:26,Log-likelihood,-7.792e+04
Cov. Estimator:,Unadjusted,,
,,F-statistic:,0.0008
Entities:,502,P-value,1.0000
Avg Obs:,37.068,Distribution:,"F(6,18061)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Inflation Premium(%),-4.073e+12,6.058e+13,-0.0672,0.9464,-1.228e+14,1.147e+14
90-Day AA Financial Commercial Paper Interest Rate,-2.05e+11,3.919e+13,-0.0052,0.9958,-7.702e+13,7.661e+13
CPFFM,9.788e+11,4.832e+13,0.0203,0.9838,-9.373e+13,9.569e+13
CBOE Crude Oil ETF Volatility Index,4.028e+11,5.992e+13,0.0067,0.9946,-1.171e+14,1.179e+14
Consumer Sentiment,2.071e+10,5.12e+13,0.0004,0.9997,-1.003e+14,1.004e+14
Unemployment rate,-6.159e+11,6.291e+13,-0.0098,0.9922,-1.239e+14,1.227e+14


Absolute Surprise Values

In [12]:
y = df_clean.loc[:, "Earnings Per Share - Actual Surprise AbsVals"]
X = df_clean.loc[:, "Inflation Premium(%)":]

# perform the fixed effects panel regression
fixed_effects_model = PanelOLS(y, X, entity_effects=True, time_effects=True)

# fit the model and print the summary statistics
fixed_effects_results = fixed_effects_model.fit()
fixed_effects_results.summary

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


0,1,2,3
Dep. Variable:,Earnings Per Share - Actual Surprise AbsVals,R-squared:,3.064e-07
Estimator:,PanelOLS,R-squared (Between):,-1.449e+22
No. Observations:,18608,R-squared (Within):,-3.538e+21
Date:,"Thu, Mar 02 2023",R-squared (Overall):,-1.062e+22
Time:,18:42:44,Log-likelihood,-7.294e+04
Cov. Estimator:,Unadjusted,,
,,F-statistic:,0.0009
Entities:,502,P-value,1.0000
Avg Obs:,37.068,Distribution:,"F(6,18061)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Inflation Premium(%),-9.108e+11,4.635e+13,-0.0196,0.9843,-9.177e+13,8.994e+13
90-Day AA Financial Commercial Paper Interest Rate,9.741e+11,2.999e+13,0.0325,0.9741,-5.78e+13,5.975e+13
CPFFM,1.995e+11,3.697e+13,0.0054,0.9957,-7.227e+13,7.267e+13
CBOE Crude Oil ETF Volatility Index,1.953e+12,4.585e+13,0.0426,0.9660,-8.792e+13,9.183e+13
Consumer Sentiment,1.637e+12,3.917e+13,0.0418,0.9667,-7.515e+13,7.842e+13
Unemployment rate,2.545e+12,4.814e+13,0.0529,0.9578,-9.181e+13,9.69e+13


By Industry: example tech industry

Actual Surprise Values

In [13]:
group = df_panel[df_panel['GICS Industry Group Name'] == 'Technology Hardware & Equipment']

group['Instrument'] = group['Instrument'].astype('category')
group['Date'] = pd.to_datetime(group['Date'])
group.set_index(['Instrument', 'Date'], inplace=True)

y = group.loc[:, "Earnings Per Share - Actual Surprise"]
X = group.loc[:, "Inflation Premium(%)":]

model = PanelOLS(y, X, entity_effects=True, time_effects=True).fit()
model

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group['Instrument'] = group['Instrument'].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group['Date'] = pd.to_datetime(group['Date'])
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


0,1,2,3
Dep. Variable:,Earnings Per Share - Actual Surprise,R-squared:,0.0000
Estimator:,PanelOLS,R-squared (Between):,0.8434
No. Observations:,729,R-squared (Within):,-0.3101
Date:,"Thu, Mar 02 2023",R-squared (Overall):,0.1385
Time:,18:43:49,Log-likelihood,-2702.7
Cov. Estimator:,Unadjusted,,
,,F-statistic:,0.0000
Entities:,19,P-value,1.0000
Avg Obs:,38.368,Distribution:,"F(6,665)"
Min Obs:,28.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Inflation Premium(%),1.4294,1.628e+16,8.779e-17,1.0000,-3.197e+16,3.197e+16
90-Day AA Financial Commercial Paper Interest Rate,-28.208,8.197e+16,-3.441e-16,1.0000,-1.609e+17,1.609e+17
CPFFM,24.254,6.996e+16,3.467e-16,1.0000,-1.374e+17,1.374e+17
CBOE Crude Oil ETF Volatility Index,16.278,3.742e+16,4.35e-16,1.0000,-7.347e+16,7.347e+16
Consumer Sentiment,4.2385,1.232e+16,3.441e-16,1.0000,-2.418e+16,2.418e+16
Unemployment rate,-7.3377,2.132e+16,-3.441e-16,1.0000,-4.187e+16,4.187e+16


Absolute Surprise Values

In [14]:
group = df_panel[df_panel['GICS Industry Group Name'] == 'Technology Hardware & Equipment']

group['Instrument'] = group['Instrument'].astype('category')
group['Date'] = pd.to_datetime(group['Date'])
group.set_index(['Instrument', 'Date'], inplace=True)

y = group.loc[:, "Earnings Per Share - Actual Surprise AbsVals"]
X = group.loc[:, "Inflation Premium(%)":]

model = PanelOLS(y, X, entity_effects=True, time_effects=True).fit()
model

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group['Instrument'] = group['Instrument'].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group['Date'] = pd.to_datetime(group['Date'])
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


0,1,2,3
Dep. Variable:,Earnings Per Share - Actual Surprise AbsVals,R-squared:,0.0000
Estimator:,PanelOLS,R-squared (Between):,-0.6845
No. Observations:,729,R-squared (Within):,-2.1084
Date:,"Thu, Mar 02 2023",R-squared (Overall):,-1.3433
Time:,18:45:28,Log-likelihood,-2614.9
Cov. Estimator:,Unadjusted,,
,,F-statistic:,0.0000
Entities:,19,P-value,1.0000
Avg Obs:,38.368,Distribution:,"F(6,665)"
Min Obs:,28.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Inflation Premium(%),-1.6573,1.443e+16,-1.148e-16,1.0000,-2.834e+16,2.834e+16
90-Day AA Financial Commercial Paper Interest Rate,-35.260,7.266e+16,-4.853e-16,1.0000,-1.427e+17,1.427e+17
CPFFM,-23.852,6.202e+16,-3.846e-16,1.0000,-1.218e+17,1.218e+17
CBOE Crude Oil ETF Volatility Index,16.414,3.317e+16,4.948e-16,1.0000,-6.513e+16,6.513e+16
Consumer Sentiment,12.716,1.092e+16,1.165e-15,1.0000,-2.144e+16,2.144e+16
Unemployment rate,7.3377,1.89e+16,3.882e-16,1.0000,-3.711e+16,3.711e+16
