# Panle Regression - Firm Characteristics

### Random Effects Panel Regression

In [13]:
import pandas as pd
import numpy as np
import datetime as dt
import sklearn
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from linearmodels.panel import PanelOLS
from linearmodels.panel import RandomEffects

Data

In [14]:
df = pd.read_csv("Dataframes/analysts.csv")
df

Unnamed: 0,Instrument,Date,GICS Industry Group Name,Earnings Per Share - Actual Surprise,Earnings Per Share - Actual Surprise AbsVals,Recommendation change,Earnings Per Share – Coefficient of Variation,Number of Analysts,Recommendation - Mean (1-5),Recommendation - Mean (1-5).1,Price Target - Mean,Price Target - Standard Deviation
0,AVY.N,2013-01-01,Materials,11.178,11.178,0.00000,3.625620,7.0,3.00000,3.000,36.00000,2.44949
1,AVY.N,2013-04-01,Materials,2.482,2.482,-0.12500,2.046169,7.0,2.87500,3.000,42.57143,3.92272
2,AVY.N,2013-07-01,Materials,1.068,1.068,0.00000,1.551601,8.0,2.87500,2.875,45.00000,4.30946
3,AVY.N,2013-10-01,Materials,8.095,8.095,-0.25000,2.775994,7.0,2.62500,2.875,47.71429,3.45230
4,AVY.N,2014-01-01,Materials,1.471,1.471,-0.29167,3.204412,8.0,2.33333,2.625,54.62500,3.42555
...,...,...,...,...,...,...,...,...,...,...,...,...
20115,POOL.OQ,2021-10-01,Retailing,17.194,17.194,0.00000,2.836295,8.0,2.30000,2.300,545.28571,50.48823
20116,POOL.OQ,2022-01-01,Retailing,40.267,40.267,0.00000,3.269867,8.0,2.30000,2.300,571.00000,41.33833
20117,POOL.OQ,2022-04-01,Retailing,34.342,34.342,-0.30000,6.630736,9.0,2.00000,2.300,519.88889,37.49206
20118,POOL.OQ,2022-07-01,Retailing,1.503,1.503,0.20000,3.625249,9.0,2.20000,2.000,435.37500,57.57373


In [15]:
df["Date"] = pd.to_datetime(df["Date"])

Remove outliers

In [16]:
#removing outliers
summary_stats = df["Earnings Per Share - Actual Surprise"].describe()
Q1 = summary_stats.loc['25%']
Q3 = summary_stats.loc['75%']
IQR = Q3 - Q1
threshold = 7
surprise_outliers_removed = df["Earnings Per Share - Actual Surprise"].loc[~((df["Earnings Per Share - Actual Surprise"] < (Q1 - threshold * IQR)) | (df["Earnings Per Share - Actual Surprise"] > (Q3 + threshold * IQR)))]
df_clean = df.copy()
df_clean["Earnings Per Share - Actual Surprise"] = surprise_outliers_removed
df_clean = df_clean.dropna(subset=["Earnings Per Share - Actual Surprise"])
#df_clean = df_clean.dropna()
df_clean

Unnamed: 0,Instrument,Date,GICS Industry Group Name,Earnings Per Share - Actual Surprise,Earnings Per Share - Actual Surprise AbsVals,Recommendation change,Earnings Per Share – Coefficient of Variation,Number of Analysts,Recommendation - Mean (1-5),Recommendation - Mean (1-5).1,Price Target - Mean,Price Target - Standard Deviation
0,AVY.N,2013-01-01,Materials,11.178,11.178,0.00000,3.625620,7.0,3.00000,3.000,36.00000,2.44949
1,AVY.N,2013-04-01,Materials,2.482,2.482,-0.12500,2.046169,7.0,2.87500,3.000,42.57143,3.92272
2,AVY.N,2013-07-01,Materials,1.068,1.068,0.00000,1.551601,8.0,2.87500,2.875,45.00000,4.30946
3,AVY.N,2013-10-01,Materials,8.095,8.095,-0.25000,2.775994,7.0,2.62500,2.875,47.71429,3.45230
4,AVY.N,2014-01-01,Materials,1.471,1.471,-0.29167,3.204412,8.0,2.33333,2.625,54.62500,3.42555
...,...,...,...,...,...,...,...,...,...,...,...,...
20115,POOL.OQ,2021-10-01,Retailing,17.194,17.194,0.00000,2.836295,8.0,2.30000,2.300,545.28571,50.48823
20116,POOL.OQ,2022-01-01,Retailing,40.267,40.267,0.00000,3.269867,8.0,2.30000,2.300,571.00000,41.33833
20117,POOL.OQ,2022-04-01,Retailing,34.342,34.342,-0.30000,6.630736,9.0,2.00000,2.300,519.88889,37.49206
20118,POOL.OQ,2022-07-01,Retailing,1.503,1.503,0.20000,3.625249,9.0,2.20000,2.000,435.37500,57.57373


In [17]:
#other way to deal with outlierrs: removing only 99th percentile outliers
'''
# Calculate the 99th percentile
pct_99 = np.percentile(df['Earnings Per Share - Actual Surprise'], 99)

# Remove the data points above the 99th percentile
df_clean = df[df['Earnings Per Share - Actual Surprise'] <= pct_99].copy()

df_clean
'''

"\n# Calculate the 99th percentile\npct_99 = np.percentile(df['Earnings Per Share - Actual Surprise'], 99)\n\n# Remove the data points above the 99th percentile\ndf_clean = df[df['Earnings Per Share - Actual Surprise'] <= pct_99].copy()\n\ndf_clean\n"

Standardising

In [18]:
rescale = df_clean
#rescale["Earnings Per Share - Actual Surprise"] = MinMaxScaler().fit_transform(np.array(rescale["Earnings Per Share - Actual Surprise"]).reshape(-1,1))
rescale["Earnings Per Share – Coefficient of Variation"] = MinMaxScaler().fit_transform(np.array(rescale["Earnings Per Share – Coefficient of Variation"]).reshape(-1,1))
rescale["Number of Analysts"] = MinMaxScaler().fit_transform(np.array(rescale["Number of Analysts"]).reshape(-1,1))
rescale["Recommendation change"] = MinMaxScaler().fit_transform(np.array(rescale["Recommendation change"]).reshape(-1,1))
rescale["Price Target - Mean"] = MinMaxScaler().fit_transform(np.array(rescale["Price Target - Mean"]).reshape(-1,1))
rescale["Price Target - Standard Deviation"] = MinMaxScaler().fit_transform(np.array(rescale["Price Target - Standard Deviation"]).reshape(-1,1))
df_clean = rescale
df_clean

Unnamed: 0,Instrument,Date,GICS Industry Group Name,Earnings Per Share - Actual Surprise,Earnings Per Share - Actual Surprise AbsVals,Recommendation change,Earnings Per Share – Coefficient of Variation,Number of Analysts,Recommendation - Mean (1-5),Recommendation - Mean (1-5).1,Price Target - Mean,Price Target - Standard Deviation
0,AVY.N,2013-01-01,Materials,11.178,11.178,0.538462,0.534474,0.125000,3.00000,3.000,0.005786,0.002079
1,AVY.N,2013-04-01,Materials,2.482,2.482,0.480770,0.533129,0.125000,2.87500,3.000,0.006891,0.003330
2,AVY.N,2013-07-01,Materials,1.068,1.068,0.538462,0.532708,0.145833,2.87500,2.875,0.007299,0.003658
3,AVY.N,2013-10-01,Materials,8.095,8.095,0.423078,0.533751,0.125000,2.62500,2.875,0.007756,0.002930
4,AVY.N,2014-01-01,Materials,1.471,1.471,0.403846,0.534116,0.145833,2.33333,2.625,0.008917,0.002908
...,...,...,...,...,...,...,...,...,...,...,...,...
20115,POOL.OQ,2021-10-01,Retailing,17.194,17.194,0.538462,0.533802,0.145833,2.30000,2.300,0.091403,0.042855
20116,POOL.OQ,2022-01-01,Retailing,40.267,40.267,0.538462,0.534171,0.145833,2.30000,2.300,0.095726,0.035088
20117,POOL.OQ,2022-04-01,Retailing,34.342,34.342,0.400001,0.537034,0.166667,2.00000,2.300,0.087134,0.031824
20118,POOL.OQ,2022-07-01,Retailing,1.503,1.503,0.630770,0.534474,0.166667,2.20000,2.000,0.072926,0.048869


In [19]:
df_panel = df_clean.copy()

#### Fixed Effects

In a fixed effects panel regression, the individual-specific effects are modeled as fixed variables that do not vary across time. This means that the coefficients of the independent variables are estimated based on the within-entity variation in the data, which eliminates the effect of time-invariant unobserved heterogeneity.

Fixed effects models are useful when there are time-invariant unobserved variables that may affect the dependent variable, but are not included in the model. By modeling the individual-specific effects as fixed variables, fixed effects models can control for this unobserved heterogeneity and estimate the coefficients of the independent variables based on the within-entity variation, which provides more efficient estimates of the coefficients.

One limitation of fixed effects models is that they do not allow for testing the effect of time-invariant variables on the dependent variable. In addition, fixed effects models may suffer from the incidental parameter problem, which may lead to biased estimates of the coefficients of the independent variables in the presence of a large number of fixed effects.

In [20]:
#reformatting indices of dataframe for panel regression
df_clean['Instrument'] = df_clean['Instrument'].astype('category')
df_clean['Date'] = pd.to_datetime(df_clean['Date'])

# set the index to be the time variable and the cross-sectional variable
df_clean.set_index(['Instrument', 'Date'], inplace=True)
df_clean

Unnamed: 0_level_0,Unnamed: 1_level_0,GICS Industry Group Name,Earnings Per Share - Actual Surprise,Earnings Per Share - Actual Surprise AbsVals,Recommendation change,Earnings Per Share – Coefficient of Variation,Number of Analysts,Recommendation - Mean (1-5),Recommendation - Mean (1-5).1,Price Target - Mean,Price Target - Standard Deviation
Instrument,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AVY.N,2013-01-01,Materials,11.178,11.178,0.538462,0.534474,0.125000,3.00000,3.000,0.005786,0.002079
AVY.N,2013-04-01,Materials,2.482,2.482,0.480770,0.533129,0.125000,2.87500,3.000,0.006891,0.003330
AVY.N,2013-07-01,Materials,1.068,1.068,0.538462,0.532708,0.145833,2.87500,2.875,0.007299,0.003658
AVY.N,2013-10-01,Materials,8.095,8.095,0.423078,0.533751,0.125000,2.62500,2.875,0.007756,0.002930
AVY.N,2014-01-01,Materials,1.471,1.471,0.403846,0.534116,0.145833,2.33333,2.625,0.008917,0.002908
...,...,...,...,...,...,...,...,...,...,...,...
POOL.OQ,2021-10-01,Retailing,17.194,17.194,0.538462,0.533802,0.145833,2.30000,2.300,0.091403,0.042855
POOL.OQ,2022-01-01,Retailing,40.267,40.267,0.538462,0.534171,0.145833,2.30000,2.300,0.095726,0.035088
POOL.OQ,2022-04-01,Retailing,34.342,34.342,0.400001,0.537034,0.166667,2.00000,2.300,0.087134,0.031824
POOL.OQ,2022-07-01,Retailing,1.503,1.503,0.630770,0.534474,0.166667,2.20000,2.000,0.072926,0.048869


FE Panel Regression with **Actual** Surprise Values

In [21]:
y = df_clean.loc[:, "Earnings Per Share - Actual Surprise"]
X = df_clean.loc[:, "Recommendation change":]

# perform the fixed effects panel regression
fixed_effects_model = PanelOLS(y, X, entity_effects=True, time_effects=True)

# fit the model and print the summary statistics
fixed_effects_results = fixed_effects_model.fit()
fixed_effects_results.summary


Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


0,1,2,3
Dep. Variable:,Earnings Per Share - Actual Surprise,R-squared:,0.0028
Estimator:,PanelOLS,R-squared (Between):,0.0103
No. Observations:,17708,R-squared (Within):,0.0040
Date:,"Thu, Mar 02 2023",R-squared (Overall):,0.0006
Time:,18:19:35,Log-likelihood,-7.4e+04
Cov. Estimator:,Unadjusted,,
,,F-statistic:,7.8913
Entities:,502,P-value,0.0000
Avg Obs:,35.275,Distribution:,"F(6,17162)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Recommendation change,-39.069,9.5994,-4.0699,0.0000,-57.885,-20.253
Earnings Per Share – Coefficient of Variation,50.370,9.2179,5.4644,0.0000,32.302,68.438
Number of Analysts,-3.8229,2.3268,-1.6430,0.1004,-8.3836,0.7378
Recommendation - Mean (1-5),15.278,4.6010,3.3206,0.0009,6.2597,24.297
Recommendation - Mean (1-5).1,-14.317,4.4701,-3.2028,0.0014,-23.079,-5.5549
Price Target - Mean,4.6139,10.706,0.4310,0.6665,-16.371,25.599
Price Target - Standard Deviation,-5.5557,13.917,-0.3992,0.6898,-32.835,21.724


FE Panel Regression with **Absolute** Surprise Values

In [22]:
y = df_clean.loc[:, "Earnings Per Share - Actual Surprise AbsVals"]
X = df_clean.loc[:, "Recommendation change":]

# perform the fixed effects panel regression
fixed_effects_model = PanelOLS(y, X, entity_effects=True, time_effects=True)

# fit the model and print the summary statistics
fixed_effects_results = fixed_effects_model.fit()
fixed_effects_results.summary

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


0,1,2,3
Dep. Variable:,Earnings Per Share - Actual Surprise AbsVals,R-squared:,0.0142
Estimator:,PanelOLS,R-squared (Between):,0.0291
No. Observations:,17708,R-squared (Within):,0.0145
Date:,"Thu, Mar 02 2023",R-squared (Overall):,0.0095
Time:,18:19:40,Log-likelihood,-6.916e+04
Cov. Estimator:,Unadjusted,,
,,F-statistic:,41.282
Entities:,502,P-value,0.0000
Avg Obs:,35.275,Distribution:,"F(6,17162)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Recommendation change,-66.308,7.3034,-9.0790,0.0000,-80.623,-51.992
Earnings Per Share – Coefficient of Variation,82.613,7.0132,11.780,0.0000,68.867,96.360
Number of Analysts,-8.1838,1.7703,-4.6230,0.0000,-11.654,-4.7139
Recommendation - Mean (1-5),29.538,3.5006,8.4381,0.0000,22.677,36.400
Recommendation - Mean (1-5).1,-26.923,3.4010,-7.9163,0.0000,-33.589,-20.257
Price Target - Mean,-34.411,8.1455,-4.2245,0.0000,-50.377,-18.445
Price Target - Standard Deviation,25.754,10.589,2.4322,0.0150,4.9988,46.508


By Industry: example tech industry

In [24]:
group = df_panel[df_panel['GICS Industry Group Name'] == 'Technology Hardware & Equipment']

group['Instrument'] = group['Instrument'].astype('category')
group['Date'] = pd.to_datetime(group['Date'])
group.set_index(['Instrument', 'Date'], inplace=True)

y = group.loc[:, "Earnings Per Share - Actual Surprise"]
X = group.loc[:, "Recommendation change":]

model = PanelOLS(y, X, entity_effects=True, time_effects=True).fit()
model

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group['Instrument'] = group['Instrument'].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group['Date'] = pd.to_datetime(group['Date'])
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


0,1,2,3
Dep. Variable:,Earnings Per Share - Actual Surprise,R-squared:,0.0232
Estimator:,PanelOLS,R-squared (Between):,-0.0839
No. Observations:,603,R-squared (Within):,0.0282
Date:,"Thu, Mar 02 2023",R-squared (Overall):,0.0090
Time:,18:20:04,Log-likelihood,-2213.2
Cov. Estimator:,Unadjusted,,
,,F-statistic:,2.1252
Entities:,19,P-value,0.0490
Avg Obs:,31.737,Distribution:,"F(6,538)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Recommendation change,-128.98,183.80,-0.7017,0.4831,-490.03,232.07
Earnings Per Share – Coefficient of Variation,114.59,187.18,0.6122,0.5407,-253.11,482.29
Number of Analysts,4.7608,5.7763,0.8242,0.4102,-6.5861,16.108
Recommendation - Mean (1-5),55.732,84.446,0.6600,0.5096,-110.15,221.62
Recommendation - Mean (1-5).1,-50.279,84.783,-0.5930,0.5534,-216.83,116.27
Price Target - Mean,89.670,79.446,1.1287,0.2595,-66.393,245.73
Price Target - Standard Deviation,-69.668,124.86,-0.5580,0.5771,-314.94,175.61
