# Random Effects Estimation of the determinants of leverage amongst SGX listed companies

## Import the necessary packages

In [238]:
import pandas as pd
import numpy as np
from scipy import stats

import statsmodels.api as sm
from statsmodels.regression.linear_model import RegressionResults
from linearmodels.panel import RandomEffects
from linearmodels.panel.results import PanelResults, RandomEffectsResults, PanelEffectsResults

from statsmodels.iolib import load_pickle, save_pickle

from typing import Union

## Loading the SGX Data

In [239]:
sgx = pd.read_csv("data/clean_sgx.csv")
sgx

Unnamed: 0,Company,NACE,Year,TA,CA,FA,TL,CL,DEBT,TAXEX,...,SIZE,PROFITABILITY,TANG,RISK,LIQUID,NDTSHIELD,TAXRATE,FCFF,GROWPOT,LEVERAGE
0,A SONIC AEROSPACE LIMITED,26,2015.0,6.107200e+07,4.941900e+07,1.079300e+07,3.331800e+07,3.160000e+07,-1.437900e+07,2.910000e+05,...,17.927564,-0.094659,0.176726,-82.603240,1.563892,0.041296,-0.029991,4.729897e+06,1.342854,-0.235443
1,A SONIC AEROSPACE LIMITED,26,2016.0,6.467500e+07,5.772600e+07,6.258000e+06,3.964200e+07,3.835100e+07,-1.338600e+07,-2.650000e+05,...,17.984885,-0.010839,0.096761,12.036318,1.505202,0.012911,0.097070,6.670989e+05,0.279433,-0.206973
2,A SONIC AEROSPACE LIMITED,26,2017.0,7.524600e+07,6.803700e+07,6.118000e+06,5.161200e+07,5.066000e+07,-1.792500e+07,-2.160000e+05,...,18.136273,-0.013888,0.081307,1.605162,1.343012,0.010791,0.150313,4.998374e+06,0.335075,-0.238219
3,A SONIC AEROSPACE LIMITED,26,2018.0,7.359400e+07,6.584300e+07,5.849000e+06,4.928100e+07,4.870300e+07,-1.906200e+07,-1.820000e+05,...,18.114074,0.013371,0.079477,-20.179642,1.351929,0.010245,-0.140108,8.313457e+05,0.378908,-0.259016
4,A SONIC AEROSPACE LIMITED,26,2019.0,6.428400e+07,5.679200e+07,7.293000e+06,3.731000e+07,3.643600e+07,-1.432700e+07,-3.740000e+05,...,17.978821,0.078838,0.113450,-193.544766,1.558678,0.029790,-0.173228,3.991890e+05,0.716118,-0.222870
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1723,YONGNAM HOLDINGS LIMITED,71,2018.0,3.503341e+08,1.196307e+08,2.306594e+08,1.703341e+08,1.032232e+08,9.217834e+07,8.678195e+06,...,19.674398,-0.059740,0.658398,-3.777753,1.158952,0.062545,-0.302336,-1.035043e+07,0.402061,0.263116
1724,YONGNAM HOLDINGS LIMITED,71,2019.0,3.429246e+08,1.037248e+08,2.351062e+08,2.013324e+08,1.504135e+08,1.227331e+08,3.608225e+06,...,19.653021,-0.033475,0.685591,-0.690731,0.689598,0.071469,-0.099419,4.195210e+07,0.410953,0.357901
1725,YONGNAM HOLDINGS LIMITED,71,2020.0,2.913047e+08,6.392860e+07,2.236608e+08,2.061644e+08,1.618667e+08,1.292187e+08,-4.172907e+06,...,19.489881,-0.064894,0.767790,-0.541603,0.394946,0.093303,0.066056,5.687663e+07,0.380703,0.443586
1726,YONGNAM HOLDINGS LIMITED,71,2021.0,2.595798e+08,6.673153e+07,1.896449e+08,1.906858e+08,1.146763e+08,1.165392e+08,4.512836e+04,...,19.374575,0.027514,0.730584,-2.513608,0.581912,0.078202,-0.002244,2.717399e+07,0.317394,0.448953


In [240]:
sgx = sgx.set_index(['Company Code', 'Year'], drop= False)

sgx

Unnamed: 0_level_0,Unnamed: 1_level_0,Company,NACE,Year,TA,CA,FA,TL,CL,DEBT,TAXEX,...,SIZE,PROFITABILITY,TANG,RISK,LIQUID,NDTSHIELD,TAXRATE,FCFF,GROWPOT,LEVERAGE
Company Code,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
3,2015.0,A SONIC AEROSPACE LIMITED,26,2015.0,6.107200e+07,4.941900e+07,1.079300e+07,3.331800e+07,3.160000e+07,-1.437900e+07,2.910000e+05,...,17.927564,-0.094659,0.176726,-82.603240,1.563892,0.041296,-0.029991,4.729897e+06,1.342854,-0.235443
3,2016.0,A SONIC AEROSPACE LIMITED,26,2016.0,6.467500e+07,5.772600e+07,6.258000e+06,3.964200e+07,3.835100e+07,-1.338600e+07,-2.650000e+05,...,17.984885,-0.010839,0.096761,12.036318,1.505202,0.012911,0.097070,6.670989e+05,0.279433,-0.206973
3,2017.0,A SONIC AEROSPACE LIMITED,26,2017.0,7.524600e+07,6.803700e+07,6.118000e+06,5.161200e+07,5.066000e+07,-1.792500e+07,-2.160000e+05,...,18.136273,-0.013888,0.081307,1.605162,1.343012,0.010791,0.150313,4.998374e+06,0.335075,-0.238219
3,2018.0,A SONIC AEROSPACE LIMITED,26,2018.0,7.359400e+07,6.584300e+07,5.849000e+06,4.928100e+07,4.870300e+07,-1.906200e+07,-1.820000e+05,...,18.114074,0.013371,0.079477,-20.179642,1.351929,0.010245,-0.140108,8.313457e+05,0.378908,-0.259016
3,2019.0,A SONIC AEROSPACE LIMITED,26,2019.0,6.428400e+07,5.679200e+07,7.293000e+06,3.731000e+07,3.643600e+07,-1.432700e+07,-3.740000e+05,...,17.978821,0.078838,0.113450,-193.544766,1.558678,0.029790,-0.173228,3.991890e+05,0.716118,-0.222870
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
573,2018.0,YONGNAM HOLDINGS LIMITED,71,2018.0,3.503341e+08,1.196307e+08,2.306594e+08,1.703341e+08,1.032232e+08,9.217834e+07,8.678195e+06,...,19.674398,-0.059740,0.658398,-3.777753,1.158952,0.062545,-0.302336,-1.035043e+07,0.402061,0.263116
573,2019.0,YONGNAM HOLDINGS LIMITED,71,2019.0,3.429246e+08,1.037248e+08,2.351062e+08,2.013324e+08,1.504135e+08,1.227331e+08,3.608225e+06,...,19.653021,-0.033475,0.685591,-0.690731,0.689598,0.071469,-0.099419,4.195210e+07,0.410953,0.357901
573,2020.0,YONGNAM HOLDINGS LIMITED,71,2020.0,2.913047e+08,6.392860e+07,2.236608e+08,2.061644e+08,1.618667e+08,1.292187e+08,-4.172907e+06,...,19.489881,-0.064894,0.767790,-0.541603,0.394946,0.093303,0.066056,5.687663e+07,0.380703,0.443586
573,2021.0,YONGNAM HOLDINGS LIMITED,71,2021.0,2.595798e+08,6.673153e+07,1.896449e+08,1.906858e+08,1.146763e+08,1.165392e+08,4.512836e+04,...,19.374575,0.027514,0.730584,-2.513608,0.581912,0.078202,-0.002244,2.717399e+07,0.317394,0.448953


## Random Effects Model

### 1-Way Random Effects Estimation

#### 1-Way Entity Random Effects Model

In [241]:
endo_var = 'LEVERAGE'
exog_vars = ['SIZE',
             'PROFITABILITY',
             'TANG',
             'LIQUID',
             'MCAP',
             'SOLV']

endo = sgx[endo_var]
exog = sm.add_constant(sgx[exog_vars])

entity_re_mod = RandomEffects(endo, exog)

entity_re_fit = entity_re_mod.fit()

print(entity_re_fit.summary)

                        RandomEffects Estimation Summary                        
Dep. Variable:               LEVERAGE   R-squared:                        0.2403
Estimator:              RandomEffects   R-squared (Between):              0.2846
No. Observations:                1728   R-squared (Within):               0.2318
Date:                Mon, Apr 01 2024   R-squared (Overall):              0.2751
Time:                        13:47:35   Log-likelihood                    1463.6
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      90.746
Entities:                         216   P-value                           0.0000
Avg Obs:                       8.0000   Distribution:                  F(6,1721)
Min Obs:                       8.0000                                           
Max Obs:                       8.0000   F-statistic (robust):             90.746
                            

#### 1-Way Time Random Effects Model

In [242]:
sgx = sgx.swaplevel('Year', 'Company Code')

endo_var = 'LEVERAGE'
exog_vars = ['SIZE',
             'PROFITABILITY',
             'TANG',
             'LIQUID',
             'MCAP',
             'SOLV']

endo = sgx[endo_var]
exog = sm.add_constant(sgx[exog_vars])

time_re_mod = RandomEffects(endo, exog)

time_re_fit = time_re_mod.fit()

print(time_re_fit.summary)

                        RandomEffects Estimation Summary                        
Dep. Variable:               LEVERAGE   R-squared:                        0.3621
Estimator:              RandomEffects   R-squared (Between):              0.4626
No. Observations:                1728   R-squared (Within):               0.3616
Date:                Mon, Apr 01 2024   R-squared (Overall):              0.3621
Time:                        13:47:35   Log-likelihood                    282.02
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      162.81
Entities:                           8   P-value                           0.0000
Avg Obs:                       216.00   Distribution:                  F(6,1721)
Min Obs:                       216.00                                           
Max Obs:                       216.00   F-statistic (robust):             162.81
                            

### 2-Way Random Effects Model

### Feasible Generalised Least Square Estimator

In [243]:
def get_weighting_matrix(time_panels: pd.Series, entity_panels: pd.Series):
    t = time_panels.nunique()
    n = entity_panels.nunique()
    
    J_n_bar = (1 / n) * np.ones(shape = (n, n))
    J_t_bar = (1 / t) * np.ones(shape = (t, t))
    I_n = np.identity(n = n)
    I_t = np.identity(n = t)

    E_n = I_n - J_n_bar
    E_t = I_t - J_t_bar

    Q_1 = np.kron(E_n, E_t)
    Q_2 = np.kron(E_n, J_t_bar)
    Q_3 = np.kron(J_n_bar, E_t)
    Q_4 = np.kron(J_n_bar, J_t_bar)
    
    return np.array([Q_1, Q_2, Q_3, Q_4])

def get_omega_i(weighting_matrix: np.array, resid: np.array):

    w_1 = (resid.T @ weighting_matrix[0] @ resid) / np.trace(weighting_matrix[0])
    w_2 = (resid.T @ weighting_matrix[1] @ resid) / np.trace(weighting_matrix[1])
    w_3 = (resid.T @ weighting_matrix[2] @ resid) / np.trace(weighting_matrix[2])
    w_4 = w_2 + w_3 - w_1

    return np.array([w_1, w_2, w_3, w_4])

def get_rcorr_matrix(omega_matrix: np.array, weighting_matrix: np.array):
    omega = omega_matrix
    weight = weighting_matrix
    return omega[0] * weight[0] + omega[1] * weight[1] + omega[2] * weight[2] + omega[3] * weight[3]

def TwoWayRandomEffects(Y: pd.Series, X: Union[pd.Series, pd.DataFrame], entity_panel: pd.Series, time_panel: pd.Series, epsilon: float= 0.0001, maxiter: int= 99):
    # Step 1: Run OLS of Y on X
    ols = sm.OLS(Y, X)
    residuals = ols.fit().resid
    # Step 2: Get OLS weighting matrix
    weight_matrix = get_weighting_matrix(time_panels= time_panel, entity_panels= entity_panel)
    omega_matrix = get_omega_i(weight_matrix, residuals)
    OMEGA = get_rcorr_matrix(omega_matrix, weight_matrix)
    
    # Step 3: Get GLS residuals using weighting matrix
    gls = sm.GLS(endog= Y, exog= X, sigma= OMEGA)
    gls_residuals = gls.fit().resid
    # Step 4: Update GLS weighting matrix
    weight_matrix = get_weighting_matrix(time_panels= time_panel, entity_panels= entity_panel)
    omega_matrix = get_omega_i(weight_matrix, gls_residuals)
    OMEGA = get_rcorr_matrix(omega_matrix, weight_matrix)
    # Step 5: Update GLS coefficient estimates
    init_gls = ols ## Initial GLS model
    iter_gls = sm.GLS(endog= Y, exog= X, sigma= OMEGA) ## Updated GLS model

    i = 1
    while np.max(abs(init_gls.fit().params - iter_gls.fit().params)) >= epsilon: ## If there is a significant difference in the model estimates, re-run the refining steps
        init_gls = iter_gls ## Set the initial GLS model to the most updated model
        # Step 3: Get GLS residuals using weighting matrix
        gls = sm.GLS(endog= Y, exog= X, sigma= OMEGA)
        gls_residuals = gls.fit().resid
        # Step 4: Update GLS weighting matrix
        weight_matrix = get_weighting_matrix(time_panels= time_panel, entity_panels= entity_panel)
        omega_matrix = get_omega_i(weight_matrix, gls_residuals)
        OMEGA = get_rcorr_matrix(omega_matrix, weight_matrix)
        # Step 5: Update GLS coefficient estimates
        iter_gls = sm.GLS(endog= Y, exog= X, sigma= OMEGA) ## Produce an updated GLS model
        i += 1
        if i == maxiter:
            print(f"Maximum of {maxiter} iterations reached before model convergence was achieved.")
            break

    print(f"{i} iterations of GLS re-specification performed")
    return gls

In [244]:
sgx = sgx.swaplevel('Company Code', 'Year')

endo_var = 'LEVERAGE'
exog_vars = ['SIZE',
             'PROFITABILITY',
             'TANG',
             'LIQUID',
             'MCAP',
             'SOLV']

endo = sgx[endo_var]
exog = sm.add_constant(sgx[exog_vars])

entity_panel = sgx['Company Code']
time_panel = sgx['Year']

tw_re_mod = TwoWayRandomEffects(endo, exog, entity_panel, time_panel)

tw_re_fit = tw_re_mod.fit()

print(tw_re_fit.summary())

4 iterations of GLS re-specification performed
                            GLS Regression Results                            
Dep. Variable:               LEVERAGE   R-squared:                       0.233
Model:                            GLS   Adj. R-squared:                  0.230
Method:                 Least Squares   F-statistic:                     87.09
Date:                Mon, 01 Apr 2024   Prob (F-statistic):           1.66e-95
Time:                        13:47:37   Log-Likelihood:                 1128.2
No. Observations:                1728   AIC:                            -2242.
Df Residuals:                    1721   BIC:                            -2204.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------

### Test for significance of Random Effects

#### Lagrange Multiplier Test

In [245]:
def cond_LM_stat(restricted_model: PanelResults, how: str): 
    T = restricted_model.time_info.total.astype('int')
    n = restricted_model.entity_info.total.astype('int')

    J_n = np.ones(shape = (n, n))
    J_T = np.ones(shape = (T, T))

    I_n = np.identity(n = n)
    I_T = np.identity(n = T)

    u_tilda = restricted_model.resids
    
    if how.lower() == 'entity':
        LM_C_entity = ((n * T) / (2 * (T - 1))) * (1 - ((u_tilda.T @ np.kron(I_n, J_T) @ u_tilda) / (u_tilda.T @ u_tilda))) ** 2
        return LM_C_entity
    elif how.lower() == 'time':
        LM_C_time = ((n * T) / (2 * (n - 1))) * (1 - ((u_tilda.T @ np.kron(J_n, I_T) @ u_tilda) / (u_tilda.T @ u_tilda))) ** 2
        return LM_C_time
    else:
        raise ValueError(f"'how' parameter should be either 'Entity' or 'Time' and not {how}.")
    
def marg_LM_stat(restricted_model: RandomEffectsResults, how: str): 
    if how.lower() == 'time':    
        T = restricted_model.time_info.total.astype('int')
        n = restricted_model.entity_info.total.astype('int')
    elif how.lower() == 'entity':
        n = restricted_model.time_info.total.astype('int')
        T = restricted_model.entity_info.total.astype('int')
    else:
        raise ValueError(f"'how' parameter should be either 'Entity' or 'Time' and not {how}.")

    J_n = np.ones(shape = (n, n))
    J_T = np.ones(shape = (T, T))
    
    J_n_bar = (1 / n) * J_n
    J_T_bar = (1 / T) * J_T

    I_n = np.identity(n = n)
    I_T = np.identity(n = T)

    E_n = I_n - J_n_bar
    E_T = I_T - J_T_bar

    u_tilda = restricted_model.resids

    if how.lower() == 'entity':
        sigma_v_sq = (1 / T*(n - 1)) * u_tilda.T @ np.kron(E_n, I_T) @ u_tilda
        sigma_2_sq = (1 / T) * u_tilda.T @ np.kron(J_n_bar, I_T) @ u_tilda

        Q_1 = (1 / sigma_2_sq) * u_tilda.T @ np.kron(J_n_bar, J_T_bar) @ u_tilda
        Q_2 = (1 / (n - 1)*sigma_v_sq) * u_tilda.T @ np.kron(E_n, J_T_bar) @ u_tilda

        LM_M_entity = ((np.sqrt(2) * sigma_2_sq * sigma_v_sq) / np.sqrt(T * (T - 1) * (sigma_v_sq ** 2 + (n - 1) * sigma_2_sq ** 2))) *\
             ((1/sigma_2_sq) * (Q_1 - 1) + ((n-1) / sigma_v_sq) * (Q_2 - 1))
        
        return LM_M_entity
    
    else:
        sigma_v_sq = (1 / n*(T - 1)) * u_tilda.T @ np.kron(I_n, E_T) @ u_tilda
        sigma_1_sq = (1 / n) * u_tilda.T @ np.kron(I_n, J_T_bar) @ u_tilda

        R_1 = (1 / sigma_1_sq) * u_tilda.T @ np.kron(J_T_bar, J_n_bar) @ u_tilda
        R_2 = (1 / (T - 1)*sigma_v_sq) * u_tilda.T @ np.kron(J_n_bar, E_T) @ u_tilda

        LM_M_time = ((np.sqrt(2) * sigma_1_sq * sigma_v_sq) / np.sqrt(n * (n - 1) * (sigma_v_sq ** 2 + (T - 1) * sigma_1_sq ** 2))) *\
             ((1/sigma_1_sq) * (R_1 - 1) + ((T-1) / sigma_v_sq) * (R_2 - 1))
        
        return LM_M_time

def joint_LM_stat(restricted_model: PanelResults): 
    return cond_LM_stat(restricted_model, 'Entity') + cond_LM_stat(restricted_model, 'Time')

#### Joint LM-Test

In [246]:
pooled_ols_res = load_pickle('model/pooled_ols.pickle')

In [247]:
joint_stat = joint_LM_stat(pooled_ols_res)

joint_p = 1 - stats.chi2.cdf(joint_stat, 1)

print(f"Joint LM Statistic for 2-way Random Effects: {joint_stat}")
print(f"p-value of Joint LM test for 2-way Random Effects: {joint_p}")


Joint LM Statistic for 2-way Random Effects: 2931.477025018793
p-value of Joint LM test for 2-way Random Effects: 0.0


#### Conditional LM-Test for Entity Random Effect

In [248]:
cond_entity_stat = cond_LM_stat(pooled_ols_res, how= 'entity')

cond_entity_p = 1 - stats.chi2.cdf(cond_entity_stat, 1)

print(f"Conditional LM Statistic for Entity Random Effects: {cond_entity_stat}")
print(f"p-value of Conditional LM test for Entity Random ffects: {cond_entity_p}")


Conditional LM Statistic for Entity Random Effects: 2931.4558135603897
p-value of Conditional LM test for Entity Random ffects: 0.0


#### Conditional LM-Test for Time Random Effect

In [249]:
cond_time_stat = cond_LM_stat(pooled_ols_res, how= 'time')

cond_time_p = 1 - stats.chi2.cdf(cond_time_stat, 1)

print(f"Conditional LM Statistic for Time Random Effects: {cond_time_stat}")
print(f"p-value of Conditional LM test for Time Random Effects: {cond_time_p}")

Conditional LM Statistic for Time Random Effects: 0.021211458403447795
p-value of Conditional LM test for Time Random Effects: 0.8842043727981699


#### Marginal LM-Test for Entity Random Effect

In [250]:
marg_entity_stat = marg_LM_stat(time_re_fit, how= 'entity')

marg_entity_p = 1 - stats.chi2.cdf(marg_entity_stat, 1)

print(f"Marginal LM Statistic for Entity Random Effects: {marg_entity_stat}")
print(f"p-value of Marginal LM test for Entity Random ffects: {marg_entity_p}")

Marginal LM Statistic for Entity Random Effects: 0.20697645959347585
p-value of Marginal LM test for Entity Random ffects: 0.6491476164436869


In [251]:
marg_time_stat = marg_LM_stat(entity_re_fit, how= 'time')

marg_time_p = stats.chi2.cdf(marg_time_stat, 1)

print(f"Marginal LM Statistic for Time Random Effects: {marg_time_stat}")
print(f"p-value of Marginal LM test for Time Random ffects: {marg_time_p}")

Marginal LM Statistic for Time Random Effects: -0.0076970396045147734
p-value of Marginal LM test for Time Random ffects: 0.0


From the results of the LM-test, only the joint LM and conditional LM test for entity effects were significant. However, the significance of the joint LM test is powered by the significance of the conditional LM test for entity as can be seen from the insignificance of the marginal LM tests. Thus, we should strongly consider the 1-way entity random effects model over the 2-way random effects model.

#### Log-Likelihood Ratio Test

In [252]:
def lr_test(restricted_model: PanelResults|RandomEffectsResults|PanelEffectsResults, unrestricted_model: PanelResults|RandomEffectsResults|PanelEffectsResults, df: int= 1):
    try:
        res_loglik = restricted_model.loglik
    except:
        res_loglik = restricted_model.llf

    try:
        unres_loglik = unrestricted_model.loglik
    except:
        unres_loglik = unrestricted_model.llf


    lr_stat = -2 * (res_loglik - unres_loglik)
        
    lr_p = 1 - stats.chi2.cdf(lr_stat, df)

    print(f"Log-Likelihood Test Statistic: {lr_stat}")
    print(f"Log-Likelihood Test Statistic: {lr_p}")


#### Conditional Entity LR Test

In [253]:
lr_test(pooled_ols_res, entity_re_fit, 1)

Log-Likelihood Test Statistic: 2363.2277405169125
Log-Likelihood Test Statistic: 0.0


#### Conditional Time LR Test

In [254]:
lr_test(pooled_ols_res, time_re_fit, 1)

Log-Likelihood Test Statistic: -0.0
Log-Likelihood Test Statistic: 1.0


#### Joint LR Test

In [255]:
lr_test(pooled_ols_res, tw_re_fit, 2)

Log-Likelihood Test Statistic: 1692.3510468266113
Log-Likelihood Test Statistic: 0.0


#### Conditional Time LR Test

In [256]:
lr_test(entity_re_fit, tw_re_fit, 1)

Log-Likelihood Test Statistic: -670.8766936903012
Log-Likelihood Test Statistic: 1.0


#### Conditional Time LR Test

In [257]:
lr_test(time_re_fit, tw_re_fit, 1)

Log-Likelihood Test Statistic: 1692.3510468266113
Log-Likelihood Test Statistic: 0.0


### Correlated Random Effects Model

#### Correlated Entity Random Effects Model

In [258]:
endo_var = 'LEVERAGE'
exog_vars = ['SIZE',
             'PROFITABILITY',
             'TANG',
             'LIQUID',
             'MCAP',
             'SOLV']

sgx = sgx.reset_index(drop= True).set_index(['Company Code', 'Year'], drop= False)

endo = sgx[endo_var]

mean_exog_vars = ['avg' + var for var in exog_vars]
sgx[mean_exog_vars] = sgx[exog_vars].groupby(level= 'Company Code').transform('mean')
exog = sm.add_constant(sgx[exog_vars + mean_exog_vars])
sgx.drop(columns= mean_exog_vars)

entity_cre_mod = RandomEffects(endo, exog)

entity_cre_fit = entity_cre_mod.fit()

print(entity_cre_fit.summary)

                        RandomEffects Estimation Summary                        
Dep. Variable:               LEVERAGE   R-squared:                        0.2733
Estimator:              RandomEffects   R-squared (Between):              0.4508
No. Observations:                1728   R-squared (Within):               0.2401
Date:                Mon, Apr 01 2024   R-squared (Overall):              0.4129
Time:                        13:47:37   Log-likelihood                    1505.4
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      53.744
Entities:                         216   P-value                           0.0000
Avg Obs:                       8.0000   Distribution:                 F(12,1715)
Min Obs:                       8.0000                                           
Max Obs:                       8.0000   F-statistic (robust):             53.744
                            

#### Correlated Time Random Effects Model

In [259]:
endo_var = 'LEVERAGE'
exog_vars = ['SIZE',
             'PROFITABILITY',
             'TANG',
             'LIQUID',
             'MCAP',
             'SOLV']

sgx = sgx.reset_index(drop= True).set_index(['Year', 'Company Code'], drop= False)

endo = sgx[endo_var]

mean_exog_vars = ['avg' + var for var in exog_vars]
sgx[mean_exog_vars] = sgx[exog_vars].groupby(level= 'Year').transform('mean')
exog = sm.add_constant(sgx[exog_vars + mean_exog_vars])
sgx.drop(columns= mean_exog_vars)

time_cre_mod = RandomEffects(endo, exog, check_rank= False)

time_cre_fit = time_cre_mod.fit()

print(time_cre_fit.summary)

                        RandomEffects Estimation Summary                        
Dep. Variable:               LEVERAGE   R-squared:                        0.3648
Estimator:              RandomEffects   R-squared (Between):              0.9970
No. Observations:                1728   R-squared (Within):               0.3616
Date:                Mon, Apr 01 2024   R-squared (Overall):              0.3648
Time:                        13:47:37   Log-likelihood                    285.75
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      82.090
Entities:                           8   P-value                           0.0000
Avg Obs:                       216.00   Distribution:                 F(12,1715)
Min Obs:                       216.00                                           
Max Obs:                       216.00   F-statistic (robust):             82.092
                            

#### Correlated 2-Way Random Effects Model

In [260]:
endo_var = 'LEVERAGE'
exog_vars = ['SIZE',
             'PROFITABILITY',
             'TANG',
             'LIQUID',
             'MCAP',
             'SOLV']

sgx = sgx.reset_index(drop= True).set_index(['Company Code', 'Year'], drop= False)

endo = sgx[endo_var]

mean_exog_vars = ['avg' + var for var in exog_vars]
sgx[mean_exog_vars] = sgx[exog_vars].groupby(level= 'Company Code').transform('mean')
exog = sm.add_constant(sgx[exog_vars + mean_exog_vars])
sgx.drop(columns= mean_exog_vars)

entity_panel, time_panel = sgx['Company Code'], sgx['Year']

tw_cre_mod = TwoWayRandomEffects(endo, exog, entity_panel, time_panel)

tw_cre_fit = tw_cre_mod.fit()

print(tw_cre_fit.summary())

2 iterations of GLS re-specification performed
                            GLS Regression Results                            
Dep. Variable:               LEVERAGE   R-squared:                       0.269
Model:                            GLS   Adj. R-squared:                  0.264
Method:                 Least Squares   F-statistic:                     52.54
Date:                Mon, 01 Apr 2024   Prob (F-statistic):          1.41e-107
Time:                        13:47:38   Log-Likelihood:                 1164.0
No. Observations:                1728   AIC:                            -2302.
Df Residuals:                    1715   BIC:                            -2231.
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------

## Hausman Test

In [261]:
def hausman_test(fixed_effects: PanelEffectsResults, random_effects: RandomEffectsResults|RegressionResults):    
    # (I) find overlapping coefficients:
    common_coef = list(set(fixed_effects.params.index).intersection(random_effects.params.index))

    # (II) calculate differences between FE and RE:
    b_diff = np.array(fixed_effects.params[common_coef] - random_effects.params[common_coef])
    df = len(b_diff)
    b_diff.reshape((df, 1))
    
    b_fe_cov = fixed_effects.cov
    try:
        b_re_cov = random_effects.cov
    except:
        b_re_cov = random_effects.cov_params()

    b_cov_diff = np.array(b_fe_cov.loc[common_coef, common_coef] -
                        b_re_cov.loc[common_coef, common_coef])
    b_cov_diff.reshape((df, df))

    # (III) calculate test statistic:
    hausman_stat = abs(np.transpose(b_diff) @ np.linalg.inv(b_cov_diff) @ b_diff)
    hausman_p = 1 - stats.chi2.cdf(hausman_stat, df)

    print(f"Hausman Test Statistic: {hausman_stat}")
    print(f"Hausman Test Statistic: {hausman_p}")

In [262]:
tw_fe_fit = load_pickle('model/tw_fe.pickle')
entity_fe_fit = load_pickle('model/entity_fe.pickle')
time_fe_fit = load_pickle('model/time_fe.pickle')

### 2-way FE vs. 2-way RE

In [263]:
hausman_test(tw_fe_fit, tw_re_fit)

Hausman Test Statistic: 67.01781028791942
Hausman Test Statistic: 5.9048321787713576e-12


### Entity FE vs. Entity RE

In [264]:
hausman_test(entity_fe_fit, entity_re_fit)

Hausman Test Statistic: 128.12439868380366
Hausman Test Statistic: 0.0


From the Hausman tests conducted around entity and joint effects, the p-values of all tests are significant at all reasonable levels of significance. Thus, either a entity or 2-way fixed effects model is preferred over the random effects model. 

### Time FE vs. Time RE

In [265]:
hausman_test(time_fe_fit, time_re_fit)

Hausman Test Statistic: 7.698830119454162
Hausman Test Statistic: 0.35989827537203667


From the Hausman tests conducted around time effects, the p-values of all tests are significant at all reasonable levels of significance. Thus, a time random effects model is preferred over the fixed effects model. 


### CRE vs. RE Test

#### Entity CRE vs. Entity RE

In [266]:
endo_var = 'LEVERAGE'
exog_vars = ['SIZE',
             'PROFITABILITY',
             'TANG',
             'LIQUID',
             'MCAP',
             'SOLV']

mean_exog_vars = ['avg' + var for var in exog_vars]

hypothesis = " = ".join(mean_exog_vars) + " = 0"
wald_test = entity_cre_fit.wald_test(formula= hypothesis)

print(wald_test)

Linear Equality Hypothesis Test
H0: Linear equality constraint is valid
Statistic: 78.1680
P-value: 0.0000
Distributed: chi2(6)


#### Time CRE vs. Time RE

In [267]:
endo_var = 'LEVERAGE'
exog_vars = ['SIZE',
             'PROFITABILITY',
             'TANG',
             'LIQUID',
             'MCAP',
             'SOLV']

mean_exog_vars = ['avg' + var for var in exog_vars]

hypothesis = " = ".join(mean_exog_vars) + " = 0"
wald_test = time_cre_fit.wald_test(formula= hypothesis)

print(wald_test)

Linear Equality Hypothesis Test
H0: Linear equality constraint is valid
Statistic: 7.3996
P-value: 0.2855
Distributed: chi2(6)


#### 2-Way CRE vs. 2-Way RE

In [269]:
endo_var = 'LEVERAGE'
exog_vars = ['SIZE',
             'PROFITABILITY',
             'TANG',
             'LIQUID',
             'MCAP',
             'SOLV']

mean_exog_vars = ['avg' + var for var in exog_vars]

hypothesis_matrix = " = ".join(mean_exog_vars) + ' = 0'
wald_test = tw_cre_fit.wald_test(hypothesis_matrix, use_f= False, )

print(f"Wald-Test Statistic for 2-Way Correlated Random Effects: {wald_test.df_denom}")
print(f"Wald-Test Statistic for 2-Way Correlated Random Effects: {wald_test.statistic[0][0]}")
print(f"Wald-Test Statistic for 2-Way Correlated Random Effects: {wald_test.pvalue.item()}")

Wald-Test Statistic for 2-Way Correlated Random Effects: 5
Wald-Test Statistic for 2-Way Correlated Random Effects: 76.08690678565168
Wald-Test Statistic for 2-Way Correlated Random Effects: 5.517345793710803e-15




The Wald-Test of the time effects CRE model is highly insignificant, as such if we had to compare, the time RE is preferred over the time CRE.

However, from the Wald-Tests conducted between the random effects and correlated random effects models with with entity or 2-way effects, we have to reject the linearity constraint hypothesis and conclude that the CRE model is preferred. This is expected as we have already tested and accepted the significance of the within effects in our model.

### LM Test for 2-Way CRE

In [270]:
marg_cre_entity_stat = marg_LM_stat(restricted_model= entity_cre_fit, how= 'Entity')
marg_cre_entity_p = 1 - stats.chi2.cdf(marg_cre_entity_stat, 1)

print(f"Marginal LM Statistic for Entity Correlated Random Effects: {marg_cre_entity_stat}")
print(f"p-value of Marginal LM test for Entity Correlated Random ffects: {marg_cre_entity_p}")

Marginal LM Statistic for Entity Correlated Random Effects: -0.007498517709995994
p-value of Marginal LM test for Entity Correlated Random ffects: 1.0


From the LM test conducted between the 2-way and entity correlated random effects, the p-value of 1.0 suggests that a 2-way model should not be preferred.

### Cluster Robust Entity CRE

In [273]:
ind_clust_entity_cre = entity_cre_mod.fit(cov_type= 'clustered', cluster_entity= True)
print(ind_clust_entity_cre.summary)

                        RandomEffects Estimation Summary                        
Dep. Variable:               LEVERAGE   R-squared:                        0.2733
Estimator:              RandomEffects   R-squared (Between):              0.4508
No. Observations:                1728   R-squared (Within):               0.2401
Date:                Mon, Apr 01 2024   R-squared (Overall):              0.4129
Time:                        13:56:57   Log-likelihood                    1505.4
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      53.744
Entities:                         216   P-value                           0.0000
Avg Obs:                       8.0000   Distribution:                 F(12,1715)
Min Obs:                       8.0000                                           
Max Obs:                       8.0000   F-statistic (robust):             36.989
                            

### Save Final Model

In [274]:
save_pickle(ind_clust_entity_cre, "model/ind_clust_entity_cre.pickle")

## Conclusion

- When comparing between random effects models, the entity random effects model appear to be preferred over the other models.

- When comparing between fixed effects and random effects models, the fixed effects models take precedence over the random effects models except when comparing time effects models.

&rarr; This suggests that there are fixed 