# Panle Regression - Firm Characteristics

### Random Effects Panel Regression

In [8]:
import pandas as pd
import numpy as np
import datetime as dt
import sklearn
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from linearmodels.panel import PanelOLS
from linearmodels.panel import RandomEffects


Data

In [9]:
df = pd.read_csv("Dataframes/macro_regression.csv")
df["Date"] = pd.to_datetime(df["Date"])
df

Unnamed: 0,Instrument,Date,GICS Industry Group Name,Earnings Per Share - Actual Surprise,Earnings Per Share - Actual Surprise AbsVals,CBOE Crude Oil ETF Volatility Index,90-Day AA Financial Commercial Paper Interest Rate,Inflation Risk Premium,"University of Michigan: Consumer Sentiment, Index 1966:Q1=100",Unemployment Rate
0,AVY.N,2013-01-01,Materials,11.178,11.178,22.538500,0.166000,0.359537,76.666667,7.733333
1,AVY.N,2013-01-04,Materials,2.482,2.482,23.468125,0.143750,0.360572,81.666667,7.533333
2,AVY.N,2013-01-07,Materials,1.068,1.068,24.099219,0.125625,0.463789,81.566667,7.233333
3,AVY.N,2013-01-10,Materials,8.095,8.095,19.740938,0.125806,0.441854,76.933333,6.933333
4,AVY.N,2014-01-01,Materials,1.471,1.471,19.563770,0.126230,0.465460,80.933333,6.666667
...,...,...,...,...,...,...,...,...,...,...
19196,POOL.OQ,2021-01-10,Retailing,17.194,17.194,43.036308,0.140755,0.382645,69.900000,4.200000
19197,POOL.OQ,2022-01-01,Retailing,40.267,40.267,52.695806,0.467234,0.380946,63.133333,3.800000
19198,POOL.OQ,2022-01-04,Retailing,34.342,34.342,50.457581,1.394118,0.495588,57.866667,3.600000
19199,POOL.OQ,2022-01-07,Retailing,1.503,1.503,49.861562,2.885882,0.423045,56.100000,3.566667


Standardising

In [10]:
rescale = df.copy()
#rescale["Earnings Per Share - Actual Surprise"] = MinMaxScaler().fit_transform(np.array(rescale["Earnings Per Share - Actual Surprise"]).reshape(-1,1))
rescale["CBOE Crude Oil ETF Volatility Index"] = MinMaxScaler().fit_transform(np.array(rescale["CBOE Crude Oil ETF Volatility Index"]).reshape(-1,1))
rescale["90-Day AA Financial Commercial Paper Interest Rate"] = MinMaxScaler().fit_transform(np.array(rescale["90-Day AA Financial Commercial Paper Interest Rate"]).reshape(-1,1))
rescale["Inflation Risk Premium"] = MinMaxScaler().fit_transform(np.array(rescale["Inflation Risk Premium"]).reshape(-1,1))
rescale["University of Michigan: Consumer Sentiment, Index 1966:Q1=100"] = MinMaxScaler().fit_transform(np.array(rescale["University of Michigan: Consumer Sentiment, Index 1966:Q1=100"]).reshape(-1,1))
rescale["Unemployment Rate"] = MinMaxScaler().fit_transform(np.array(rescale["Unemployment Rate"]).reshape(-1,1))
df_clean = rescale
df_clean

Unnamed: 0,Instrument,Date,GICS Industry Group Name,Earnings Per Share - Actual Surprise,Earnings Per Share - Actual Surprise AbsVals,CBOE Crude Oil ETF Volatility Index,90-Day AA Financial Commercial Paper Interest Rate,Inflation Risk Premium,"University of Michigan: Consumer Sentiment, Index 1966:Q1=100",Unemployment Rate
0,AVY.N,2013-01-01,Materials,11.178,11.178,0.058434,0.015981,0.552998,0.480156,0.443262
1,AVY.N,2013-01-04,Materials,2.482,2.482,0.069092,0.010593,0.555632,0.596887,0.421986
2,AVY.N,2013-01-07,Materials,1.068,1.068,0.076327,0.006205,0.818318,0.594553,0.390071
3,AVY.N,2013-01-10,Materials,8.095,8.095,0.026361,0.006249,0.762495,0.486381,0.358156
4,AVY.N,2014-01-01,Materials,1.471,1.471,0.024330,0.006351,0.822572,0.579767,0.329787
...,...,...,...,...,...,...,...,...,...,...
19196,POOL.OQ,2021-01-10,Retailing,17.194,17.194,0.293433,0.009868,0.611807,0.322179,0.067376
19197,POOL.OQ,2022-01-01,Retailing,40.267,40.267,0.404175,0.088919,0.607483,0.164202,0.024823
19198,POOL.OQ,2022-01-04,Retailing,34.342,34.342,0.378514,0.313346,0.899246,0.041245,0.003546
19199,POOL.OQ,2022-01-07,Retailing,1.503,1.503,0.371681,0.674548,0.714626,0.000000,0.000000


In [11]:
df_panel = df_clean.copy()

#### Fixed Effects

In a fixed effects panel regression, the individual-specific effects are modeled as fixed variables that do not vary across time. This means that the coefficients of the independent variables are estimated based on the within-entity variation in the data, which eliminates the effect of time-invariant unobserved heterogeneity.

Fixed effects models are useful when there are time-invariant unobserved variables that may affect the dependent variable, but are not included in the model. By modeling the individual-specific effects as fixed variables, fixed effects models can control for this unobserved heterogeneity and estimate the coefficients of the independent variables based on the within-entity variation, which provides more efficient estimates of the coefficients.

One limitation of fixed effects models is that they do not allow for testing the effect of time-invariant variables on the dependent variable. In addition, fixed effects models may suffer from the incidental parameter problem, which may lead to biased estimates of the coefficients of the independent variables in the presence of a large number of fixed effects.

In [12]:
df_clean['Instrument'] = df_clean['Instrument'].astype('category')
df_clean['Date'] = pd.to_datetime(df_clean['Date'])

# set the index to be the time variable and the cross-sectional variable
df_clean.set_index(['Instrument', 'Date'], inplace=True)

Regression with absolute surprise values

In [13]:
y = df_clean.loc[:, "Earnings Per Share - Actual Surprise AbsVals"]
X = df_clean.loc[:, "CBOE Crude Oil ETF Volatility Index":]

# perform the fixed effects panel regression
fixed_effects_model = PanelOLS(y, X, entity_effects=True, time_effects=True, drop_absorbed=True)

# fit the model and print the summary statistics
fixed_effects_results = fixed_effects_model.fit()
fixed_effects_results.summary

0,1,2,3
Dep. Variable:,Earnings Per Share - Actual Surprise AbsVals,R-squared:,1.209e-07
Estimator:,PanelOLS,R-squared (Between):,-8.624e+21
No. Observations:,19201,R-squared (Within):,-7.935e+20
Date:,"Sun, Mar 05 2023",R-squared (Overall):,-3.945e+21
Time:,20:56:03,Log-likelihood,-9.359e+04
Cov. Estimator:,Unadjusted,,
,,F-statistic:,0.0005
Entities:,502,P-value,1.0000
Avg Obs:,38.249,Distribution:,"F(5,18655)"
Min Obs:,4.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
CBOE Crude Oil ETF Volatility Index,-3.193e+12,1.343e+14,-0.0238,0.9810,-2.664e+14,2.6e+14
90-Day AA Financial Commercial Paper Interest Rate,-2.086e+12,7.654e+13,-0.0272,0.9783,-1.521e+14,1.479e+14
Inflation Risk Premium,-8.484e+11,9.367e+13,-0.0091,0.9928,-1.845e+14,1.828e+14
"University of Michigan: Consumer Sentiment, Index 1966:Q1=100",9.172e+10,6.318e+13,0.0015,0.9988,-1.237e+14,1.239e+14
Unemployment Rate,-3.879e+12,8.788e+13,-0.0441,0.9648,-1.761e+14,1.684e+14


By Industry: example tech industry

In [14]:
group = df_panel[df_panel['GICS Industry Group Name'] == 'Technology Hardware & Equipment']

group['Instrument'] = group['Instrument'].astype('category')
group['Date'] = pd.to_datetime(group['Date'])
group.set_index(['Instrument', 'Date'], inplace=True)

y = group.loc[:, "Earnings Per Share - Actual Surprise AbsVals"]
X = group.loc[:, "CBOE Crude Oil ETF Volatility Index":]

model = PanelOLS(y, X, entity_effects=True, time_effects=True).fit()
model

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group['Instrument'] = group['Instrument'].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group['Date'] = pd.to_datetime(group['Date'])


0,1,2,3
Dep. Variable:,Earnings Per Share - Actual Surprise AbsVals,R-squared:,0.0000
Estimator:,PanelOLS,R-squared (Between):,-127.49
No. Observations:,731,R-squared (Within):,-156.68
Date:,"Sun, Mar 05 2023",R-squared (Overall):,-144.70
Time:,20:56:03,Log-likelihood,-2781.8
Cov. Estimator:,Unadjusted,,
,,F-statistic:,0.0000
Entities:,19,P-value,1.0000
Avg Obs:,38.474,Distribution:,"F(5,668)"
Min Obs:,28.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
CBOE Crude Oil ETF Volatility Index,-96.617,3.91e+16,-2.471e-15,1.0000,-7.677e+16,7.677e+16
90-Day AA Financial Commercial Paper Interest Rate,640.54,2.051e+17,3.123e-15,1.0000,-4.028e+17,4.028e+17
Inflation Risk Premium,-23.342,1.682e+16,-1.388e-15,1.0000,-3.302e+16,3.302e+16
"University of Michigan: Consumer Sentiment, Index 1966:Q1=100",15.800,1.023e+16,1.544e-15,1.0000,-2.01e+16,2.01e+16
Unemployment Rate,120.11,4.564e+16,2.632e-15,1.0000,-8.961e+16,8.961e+16


# Checking for COVID Years

In [15]:
df_clean = df_panel.copy()
df_clean

Unnamed: 0,Instrument,Date,GICS Industry Group Name,Earnings Per Share - Actual Surprise,Earnings Per Share - Actual Surprise AbsVals,CBOE Crude Oil ETF Volatility Index,90-Day AA Financial Commercial Paper Interest Rate,Inflation Risk Premium,"University of Michigan: Consumer Sentiment, Index 1966:Q1=100",Unemployment Rate
0,AVY.N,2013-01-01,Materials,11.178,11.178,0.058434,0.015981,0.552998,0.480156,0.443262
1,AVY.N,2013-01-04,Materials,2.482,2.482,0.069092,0.010593,0.555632,0.596887,0.421986
2,AVY.N,2013-01-07,Materials,1.068,1.068,0.076327,0.006205,0.818318,0.594553,0.390071
3,AVY.N,2013-01-10,Materials,8.095,8.095,0.026361,0.006249,0.762495,0.486381,0.358156
4,AVY.N,2014-01-01,Materials,1.471,1.471,0.024330,0.006351,0.822572,0.579767,0.329787
...,...,...,...,...,...,...,...,...,...,...
19196,POOL.OQ,2021-01-10,Retailing,17.194,17.194,0.293433,0.009868,0.611807,0.322179,0.067376
19197,POOL.OQ,2022-01-01,Retailing,40.267,40.267,0.404175,0.088919,0.607483,0.164202,0.024823
19198,POOL.OQ,2022-01-04,Retailing,34.342,34.342,0.378514,0.313346,0.899246,0.041245,0.003546
19199,POOL.OQ,2022-01-07,Retailing,1.503,1.503,0.371681,0.674548,0.714626,0.000000,0.000000


In [16]:
covid_start = pd.to_datetime("2020-01-01")

df_covid = df_clean[df_clean["Date"] >= covid_start]

df_nocovid = df_clean[df_clean["Date"] < covid_start]

In [17]:
df_covid['Instrument'] = df_covid['Instrument'].astype('category')
df_covid['Date'] = pd.to_datetime(df_covid['Date'])

# set the index to be the time variable and the cross-sectional variable
df_covid.set_index(['Instrument', 'Date'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_covid['Instrument'] = df_covid['Instrument'].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_covid['Date'] = pd.to_datetime(df_covid['Date'])


In [18]:
df_nocovid['Instrument'] = df_nocovid['Instrument'].astype('category')
df_nocovid['Date'] = pd.to_datetime(df_nocovid['Date'])

# set the index to be the time variable and the cross-sectional variable
df_nocovid.set_index(['Instrument', 'Date'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_nocovid['Instrument'] = df_nocovid['Instrument'].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_nocovid['Date'] = pd.to_datetime(df_nocovid['Date'])


# COVID

In [19]:
y = df_covid.loc[:, "Earnings Per Share - Actual Surprise AbsVals"]
X = df_covid.loc[:, "CBOE Crude Oil ETF Volatility Index":]

# perform the fixed effects panel regression
fixed_effects_model = PanelOLS(y, X, entity_effects=True, time_effects=True)

# fit the model and print the summary statistics
fixed_effects_results = fixed_effects_model.fit()
fixed_effects_results.summary


0,1,2,3
Dep. Variable:,Earnings Per Share - Actual Surprise AbsVals,R-squared:,3.711e-06
Estimator:,PanelOLS,R-squared (Between):,-4.663e+23
No. Observations:,5883,R-squared (Within):,-4.017e+23
Date:,"Sun, Mar 05 2023",R-squared (Overall):,-4.506e+23
Time:,20:57:03,Log-likelihood,-2.923e+04
Cov. Estimator:,Unadjusted,,
,,F-statistic:,0.0040
Entities:,501,P-value,1.0000
Avg Obs:,11.743,Distribution:,"F(5,5366)"
Min Obs:,4.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
CBOE Crude Oil ETF Volatility Index,-1.003e+14,7.118e+14,-0.1409,0.8879,-1.496e+15,1.295e+15
90-Day AA Financial Commercial Paper Interest Rate,9.317e+13,6.763e+14,0.1377,0.8904,-1.233e+15,1.419e+15
Inflation Risk Premium,6.127e+12,3.482e+14,0.0176,0.9860,-6.765e+14,6.887e+14
"University of Michigan: Consumer Sentiment, Index 1966:Q1=100",5.146e+13,4.096e+14,0.1257,0.9000,-7.515e+14,8.544e+14
Unemployment Rate,8.701e+13,6.228e+14,0.1397,0.8889,-1.134e+15,1.308e+15


In [20]:
y = df_nocovid.loc[:, "Earnings Per Share - Actual Surprise AbsVals"]
X = df_nocovid.loc[:, "CBOE Crude Oil ETF Volatility Index":]

# perform the fixed effects panel regression
fixed_effects_model = PanelOLS(y, X, entity_effects=True, time_effects=True)

# fit the model and print the summary statistics
fixed_effects_results = fixed_effects_model.fit()
fixed_effects_results.summary


0,1,2,3
Dep. Variable:,Earnings Per Share - Actual Surprise AbsVals,R-squared:,8.55e-08
Estimator:,PanelOLS,R-squared (Between):,-8.871e+21
No. Observations:,13318,R-squared (Within):,-2.793e+20
Date:,"Sun, Mar 05 2023",R-squared (Overall):,-3.995e+21
Time:,20:57:11,Log-likelihood,-6.328e+04
Cov. Estimator:,Unadjusted,,
,,F-statistic:,0.0002
Entities:,498,P-value,1.0000
Avg Obs:,26.743,Distribution:,"F(5,12788)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
CBOE Crude Oil ETF Volatility Index,1.325e+12,1.398e+14,0.0095,0.9924,-2.727e+14,2.754e+14
90-Day AA Financial Commercial Paper Interest Rate,2.864e+11,8.691e+13,0.0033,0.9974,-1.701e+14,1.706e+14
Inflation Risk Premium,2.201e+12,1.276e+14,0.0173,0.9862,-2.478e+14,2.522e+14
"University of Michigan: Consumer Sentiment, Index 1966:Q1=100",-4.203e+12,1.449e+14,-0.0290,0.9769,-2.882e+14,2.798e+14
Unemployment Rate,-3.099e+12,1.2e+14,-0.0258,0.9794,-2.383e+14,2.321e+14


### Based on these results we can conclude that macro factors are a bad indicator for prediction