# NOTES

Explanation of Changes:
Logarithmic Transformations: Applied to variables like GDP/capita and Tech Advancement CN to reduce skewness and deal with large values.

Lagged Variables: Created lagged versions of the GDP/capita and Tech Advancement CN to capture delayed effects.

Scaling: Reduced the magnitude of variables like Import, GDP/capita, and others to make coefficients more interpretable and to address scaling issues.

Model Comparison:

Conducted the Durbin-Wu-Hausman test to check whether the Random Effects model is more appropriate than the Fixed Effects model. If the p-value is low, you would reject Random Effects in favor of Fixed Effects.
Breusch-Pagan Test: Added a heteroscedasticity check to ensure the model does not suffer from heteroscedasticity.

In [None]:
# -*- coding: utf-8 -*-
"""script-Copy1.ipynb
Automatically generated by Colab.

# IMPORTS AND SETTINGS
"""

import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from linearmodels.panel import PanelOLS, RandomEffects
from statsmodels.api import add_constant
from linearmodels.panel import compare
import numpy as np
import scipy.stats
from linearmodels.panel import FirstDifferenceOLS
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.stats.diagnostic import het_breuschpagan
import os
import warnings
warnings.filterwarnings('ignore')

project_folder = os.getcwd()

"""# DATA READING"""

# Read data
filename2 = os.path.join(project_folder, "Regression data", "panel_regression_absolute.csv")
data2 = pd.read_csv(os.path.join(project_folder, filename2))

# Set index and drop columns with all NaNs
data2 = data2.set_index(['Country', 'Year']).dropna(axis=1, how="all")
data2.head(20)

# Describe the dataset
data2.describe().T

"""# VARIABLE MANIPULATIONS"""

# Logarithmic transformations to handle large values and potential non-linearity
data2["log_GDP_per_capita"] = np.log(data2["GDP/capita"] + 1)
data2["log_Tech_Advancement_CN"] = np.log(data2["Tech Advancement CN"] + 1)

# Lagging variables by one year to account for delayed effects
data2["lag_GDP_per_capita"] = data2.groupby(level=0)["GDP/capita"].shift(1)
data2["lag_Tech_Advancement_CN"] = data2.groupby(level=0)["Tech Advancement CN"].shift(1)

# Drop missing rows after lagging
data2 = data2.dropna()

# Scaling to adjust for large magnitude differences
# Scaling down large variables for better interpretability
data2.loc[:, ["GDP/capita", "Tech Advancement CN", "Fixed Asset Investment CN (T-1)", "Avg Wage Difference"]] = data2.loc[:, ["GDP/capita", "Tech Advancement CN", "Fixed Asset Investment CN (T-1)", "Avg Wage Difference"]].div(1000)
data2["Import"] = data2["Import"].div(1000000)

# Show updated data
data2.describe().T

"""# MODELING"""

# Dependent and independent variables
data2["constant"] = 1
dependent_var = data2['Import'].to_frame()
independent_vars = data2[['log_GDP_per_capita', 'log_Tech_Advancement_CN', 'lag_GDP_per_capita', 'lag_Tech_Advancement_CN']]

# Check for multicollinearity with VIF
vif = pd.DataFrame()
vif["VIF Factor"] = [variance_inflation_factor(independent_vars.values, i) for i in range(independent_vars.shape[1])]
vif["features"] = independent_vars.columns
print("Variance Inflation Factors (VIF):")
print(vif)

"""# FIXED EFFECTS MODEL"""

# Entity Fixed Effects Model (One-way)
fe_model = PanelOLS(dependent_var, independent_vars, entity_effects=True)
fe_results = fe_model.fit()
print("Fixed Effects Model Results:")
print(fe_results)

# Two-way Fixed Effects Model (Entity and Time effects)
two_way_fe_model = PanelOLS(dependent_var, independent_vars, entity_effects=True, time_effects=True, drop_absorbed=True)
two_way_fe_results = two_way_fe_model.fit()
print("Two-Way Fixed Effects Model Results:")
print(two_way_fe_results)

"""# RANDOM EFFECTS MODEL"""

# Random Effects Model
re_model = RandomEffects(dependent_var, independent_vars)
re_results = re_model.fit()
print("\nRandom Effects Model Results:")
print(re_results)

"""# HAUSMAN TEST: FIXED VS RANDOM EFFECTS"""

# Perform Durbin-Wu-Hausman test to compare Fixed and Random Effects models
def durbin_wu_hausman_test(fe_results, re_results):
    u_fe = fe_results.resids
    u_re = re_results.resids
    dw_hausman_stat = np.sum((u_fe - u_re) ** 2) / np.sum(u_fe ** 2)
    df = fe_results.df_model - re_results.df_model
    p_value = 1 - scipy.stats.chi2.cdf(dw_hausman_stat, df)
    return dw_hausman_stat, p_value

dw_hausman_stat, p_value = durbin_wu_hausman_test(fe_results, re_results)
print("Durbin-Wu-Hausman Test Statistic:", dw_hausman_stat)
print("P-value:", p_value)

# If p-value is small (typically < 0.05), Fixed Effects is preferred over Random Effects

"""# BREUSCH-PAGAN TEST FOR HETEROSCEDASTICITY"""

# Breusch-Pagan test for heteroscedasticity
residuals = fe_results.resids
bp_test = het_breuschpagan(residuals, independent_vars)
labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print(dict(zip(labels, bp_test)))


# IMPORTS AND SETTINGS

In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from linearmodels.panel import PanelOLS, RandomEffects
from statsmodels.api import add_constant
from linearmodels.panel import compare
import numpy as np
import scipy.stats
from linearmodels.panel import FirstDifferenceOLS
from statsmodels.stats.outliers_influence import variance_inflation_factor
# import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
from linearmodels.panel import PanelOLS
# from sklearn.preprocessing import StandardScaler
from statsmodels.stats.diagnostic import het_breuschpagan
import os
import warnings
warnings.filterwarnings('ignore')

In [None]:
project_folder=os.getcwd()
project_folder

'C:\\Users\\erott\\Documents\\Masterarbeit'

# SCRIPT

## DATA READING

In [None]:
# data from Excel
filename2 = os.path.join(project_folder, "Regression data", "panel_regression_absolute.csv")
data2 = pd.read_csv(os.path.join(project_folder, filename2))
#'Country' as entity and 'Year' as time
data2 = data2.set_index(['Country', 'Year'])
data2=data2.dropna(axis=1, how="all")
data2.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Import,Fit,GDP/capita,Energy Consumption,Annual Solar Capacity Addition,TechAdvancement,Tech Advancement CN,Trade Policies EU,Fixed Asset Investment CN (T-1),Avg Wage Difference,Environ. St. Difference
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Austria,2005,1436678,0.672,38417.46,425.46,-6.0,104,573,0.0,63.51,2346.47,1.6111
Austria,2006,41083867,0.678,40669.33,421.01,1.4,103,979,0.0,94.15,2408.95,1.8333
Austria,2007,20635660,0.521,46915.34,411.99,1.9,117,1621,0.0,139.32,2407.1,1.5556
Austria,2008,28782325,0.559,51919.98,420.01,5.9,123,2364,0.0,211.47,2568.188377,1.8611
Austria,2009,18342478,0.53,48153.32,406.84,18.8,84,4149,0.0,337.35,2974.78,1.9722
Austria,2010,66300152,0.434,46903.76,420.16,39.9,52,6397,0.0,520.84,2755.66,1.7778
Austria,2011,54727543,0.438,51442.28,396.07,85.3,34,7634,0.0,738.25,3004.24,1.5833
Austria,2012,96135630,0.338,48564.92,414.52,163.4,27,8395,0.0,1219.38,2997.29,0.4722
Austria,2013,66585288,0.23,50731.13,411.2,288.5,20,8057,21.84,1312.54,3108.45,0.75
Austria,2014,38423535,0.0,51786.38,394.8,159.3,13,10136,47.7,1498.14,2864.4,0.3611


In [None]:
data2.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 360 entries, ('Austria', 2005) to ('Sweden', 2019)
Data columns (total 11 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Import                           360 non-null    int64  
 1   Fit                              360 non-null    float64
 2   GDP/capita                       360 non-null    float64
 3   Energy Consumption               360 non-null    float64
 4   Annual Solar Capacity Addition   360 non-null    float64
 5   TechAdvancement                  360 non-null    int64  
 6   Tech Advancement CN              360 non-null    int64  
 7   Trade Policies EU                360 non-null    float64
 8   Fixed Asset Investment CN (T-1)  360 non-null    float64
 9   Avg Wage Difference              360 non-null    float64
 10  Environ. St. Difference          360 non-null    float64
dtypes: float64(8), int64(3)
memory usage: 33.1+ KB


In [None]:
# #Standardisation of data
# scaler = StandardScaler()
# numerical_data = data.select_dtypes(include=['float64', 'int64'])
# scaled_data = scaler.fit_transform(numerical_data)
# data_scaled = pd.DataFrame(scaled_data, columns=numerical_data.columns, index=data.index)
# data_scaled

In [None]:
data2.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Import,360.0,224583500.0,825173200.0,32139.0,3082600.0,13635220.0,68235440.0,8340234000.0
Fit,360.0,0.1557222,0.2274972,0.0,0.0,0.0,0.22325,0.83
GDP/capita,360.0,29856.38,17016.86,3899.83,14699.57,23613.7,45540.15,80848.3
Energy Consumption,360.0,737.8902,943.8475,40.75,187.97,341.545,835.88,4062.38
Annual Solar Capacity Addition,360.0,322.6842,1035.107,-56.0,0.2,11.25,141.475,9539.0
TechAdvancement,360.0,57.21389,153.7337,0.0,3.0,9.0,35.25,1242.0
Tech Advancement CN,360.0,9970.867,7896.539,573.0,2364.0,8057.0,17340.0,25048.0
Trade Policies EU,360.0,16.42867,21.11868,0.0,0.0,0.0,47.7,47.7
Fixed Asset Investment CN (T-1),360.0,1053.947,763.5221,63.51,211.47,1219.38,1800.82,2284.87
Avg Wage Difference,360.0,1813.354,1252.309,-11.91,775.535,1416.56,2904.227,6615.06


In [None]:
#scaling of the data
data2.loc[:, ["GDP/capita", "Tech Advancement CN", "Fixed Asset Investment CN (T-1)", "Avg Wage Difference"]] = data2.loc[:, ["GDP/capita", "Tech Advancement CN", "Fixed Asset Investment CN (T-1)", "Avg Wage Difference"]].div(1000)
data2.loc[:, "Import"] = data2.loc[:, "Import"].div(1000000)
data2.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Import,Fit,GDP/capita,Energy Consumption,Annual Solar Capacity Addition,TechAdvancement,Tech Advancement CN,Trade Policies EU,Fixed Asset Investment CN (T-1),Avg Wage Difference,Environ. St. Difference
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Austria,2005,1.436678,0.672,38.41746,425.46,-6.0,104,0.573,0.0,0.06351,2.34647,1.6111
Austria,2006,41.083867,0.678,40.66933,421.01,1.4,103,0.979,0.0,0.09415,2.40895,1.8333
Austria,2007,20.63566,0.521,46.91534,411.99,1.9,117,1.621,0.0,0.13932,2.4071,1.5556
Austria,2008,28.782325,0.559,51.91998,420.01,5.9,123,2.364,0.0,0.21147,2.568188,1.8611
Austria,2009,18.342478,0.53,48.15332,406.84,18.8,84,4.149,0.0,0.33735,2.97478,1.9722
Austria,2010,66.300152,0.434,46.90376,420.16,39.9,52,6.397,0.0,0.52084,2.75566,1.7778
Austria,2011,54.727543,0.438,51.44228,396.07,85.3,34,7.634,0.0,0.73825,3.00424,1.5833
Austria,2012,96.13563,0.338,48.56492,414.52,163.4,27,8.395,0.0,1.21938,2.99729,0.4722
Austria,2013,66.585288,0.23,50.73113,411.2,288.5,20,8.057,21.84,1.31254,3.10845,0.75
Austria,2014,38.423535,0.0,51.78638,394.8,159.3,13,10.136,47.7,1.49814,2.8644,0.3611


In [None]:
data2.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Import,360.0,224.583454,825.173238,0.032139,3.0826,13.635224,68.235437,8340.234422
Fit,360.0,0.155722,0.227497,0.0,0.0,0.0,0.22325,0.83
GDP/capita,360.0,29.856376,17.016865,3.89983,14.699565,23.6137,45.540148,80.8483
Energy Consumption,360.0,737.890194,943.847525,40.75,187.97,341.545,835.88,4062.38
Annual Solar Capacity Addition,360.0,322.684167,1035.107133,-56.0,0.2,11.25,141.475,9539.0
TechAdvancement,360.0,57.213889,153.733667,0.0,3.0,9.0,35.25,1242.0
Tech Advancement CN,360.0,9.970867,7.896539,0.573,2.364,8.057,17.34,25.048
Trade Policies EU,360.0,16.428667,21.118675,0.0,0.0,0.0,47.7,47.7
Fixed Asset Investment CN (T-1),360.0,1.053947,0.763522,0.06351,0.21147,1.21938,1.80082,2.28487
Avg Wage Difference,360.0,1.813354,1.252309,-0.01191,0.775535,1.41656,2.904228,6.61506


In [None]:
# dependent, independent and control variables
data2["constant"]=1
dependent_var = data2['Import'].to_frame()
independent_vars = data2.iloc[:,1:]
dependent_var.shape, independent_vars.shape

((360, 1), (360, 11))

In [None]:
vif = pd.DataFrame()
vif["VIF Factor"] = [variance_inflation_factor(independent_vars.values, i) for i in range(independent_vars.shape[1])]
vif["features"] = independent_vars.columns
print("Variance Inflation Factors (VIF):")
vif

Variance Inflation Factors (VIF):


Unnamed: 0,VIF Factor,features
0,1.315725,Fit
1,4.54459,GDP/capita
2,2.853626,Energy Consumption
3,2.384757,Annual Solar Capacity Addition
4,3.69341,TechAdvancement
5,8.957944,Tech Advancement CN
6,2.102597,Trade Policies EU
7,15.198968,Fixed Asset Investment CN (T-1)
8,5.154516,Avg Wage Difference
9,4.278517,Environ. St. Difference


In [None]:
correlation_matrix = data2.corr()
print(correlation_matrix)

                                   Import       Fit  GDP/capita  \
Import                           1.000000  0.266253    0.166235   
Fit                              0.266253  1.000000    0.013715   
GDP/capita                       0.166235  0.013715    1.000000   
Energy Consumption               0.522634  0.292713    0.284049   
Annual Solar Capacity Addition   0.856715  0.177242    0.177018   
TechAdvancement                  0.794065  0.264630    0.194313   
Tech Advancement CN             -0.079013 -0.364941    0.064757   
Trade Policies EU               -0.147700 -0.300296    0.011244   
Fixed Asset Investment CN (T-1) -0.090597 -0.375513    0.060157   
Avg Wage Difference              0.230875 -0.011542    0.875215   
Environ. St. Difference          0.127749  0.349210    0.105704   
constant                              NaN       NaN         NaN   

                                 Energy Consumption  \
Import                                     0.522634   
Fit               

In [None]:
# Verify column names
print(independent_vars.columns)

# Drop the column if it exists
independent_vars = independent_vars.drop(columns=["constant","Fixed Asset Investment CN (T-1)","Avg Wage Difference"], errors='ignore')

# Verify the result
print(independent_vars.columns)

Index(['Fit', 'GDP/capita', 'Energy Consumption',
       'Annual Solar Capacity Addition', 'TechAdvancement ',
       'Tech Advancement CN', 'Trade Policies EU',
       'Fixed Asset Investment CN (T-1)', 'Avg Wage Difference',
       'Environ. St. Difference', 'constant'],
      dtype='object')
Index(['Fit', 'GDP/capita', 'Energy Consumption',
       'Annual Solar Capacity Addition', 'TechAdvancement ',
       'Tech Advancement CN', 'Trade Policies EU', 'Environ. St. Difference'],
      dtype='object')


In [None]:
vif = pd.DataFrame()
vif["VIF Factor"] = [variance_inflation_factor(independent_vars.values, i) for i in range(independent_vars.shape[1])]
vif["features"] = independent_vars.columns
print("Variance Inflation Factors (VIF):")
vif

Variance Inflation Factors (VIF):


Unnamed: 0,VIF Factor,features
0,1.805317,Fit
1,4.39471,GDP/capita
2,4.227952,Energy Consumption
3,2.518774,Annual Solar Capacity Addition
4,4.078471,TechAdvancement
5,3.899547,Tech Advancement CN
6,2.91494,Trade Policies EU
7,3.029983,Environ. St. Difference


## MODELS

### FE MODEL (ENTITY ONE WAY)

In [None]:
print(independent_vars.columns.duplicated())

[False False False False False False False False]


In [None]:
corr_matrix = independent_vars.corr()
print(corr_matrix)

                                     Fit  GDP/capita  Energy Consumption  \
Fit                             1.000000    0.013715            0.292713   
GDP/capita                      0.013715    1.000000            0.284049   
Energy Consumption              0.292713    0.284049            1.000000   
Annual Solar Capacity Addition  0.177242    0.177018            0.568172   
TechAdvancement                 0.264630    0.194313            0.758884   
Tech Advancement CN            -0.364941    0.064757           -0.021697   
Trade Policies EU              -0.300296    0.011244           -0.020441   
Environ. St. Difference         0.349210    0.105704            0.137263   

                                Annual Solar Capacity Addition  \
Fit                                                   0.177242   
GDP/capita                                            0.177018   
Energy Consumption                                    0.568172   
Annual Solar Capacity Addition                     

In [None]:
def matrix_rank_check(df):
    return np.linalg.matrix_rank(df.values)

print(f"Rank of independent variables matrix: {matrix_rank_check(independent_vars)}")
print(f"Number of columns in independent variables: {independent_vars.shape[1]}")

Rank of independent variables matrix: 8
Number of columns in independent variables: 8


In [None]:
print(corr_matrix)

                                     Fit  GDP/capita  Energy Consumption  \
Fit                             1.000000    0.013715            0.292713   
GDP/capita                      0.013715    1.000000            0.284049   
Energy Consumption              0.292713    0.284049            1.000000   
Annual Solar Capacity Addition  0.177242    0.177018            0.568172   
TechAdvancement                 0.264630    0.194313            0.758884   
Tech Advancement CN            -0.364941    0.064757           -0.021697   
Trade Policies EU              -0.300296    0.011244           -0.020441   
Environ. St. Difference         0.349210    0.105704            0.137263   

                                Annual Solar Capacity Addition  \
Fit                                                   0.177242   
GDP/capita                                            0.177018   
Energy Consumption                                    0.568172   
Annual Solar Capacity Addition                     

In [None]:
# Fixed Effects Model
fe_model = PanelOLS(dependent_var, independent_vars, entity_effects=True)
fe_results = fe_model.fit()

print("Fixed Effects Model Results:")
fe_results

Fixed Effects Model Results:


0,1,2,3
Dep. Variable:,Import,R-squared:,0.7987
Estimator:,PanelOLS,R-squared (Between):,-32.646
No. Observations:,360,R-squared (Within):,0.7987
Date:,"Fri, Jul 12 2024",R-squared (Overall):,-13.433
Time:,18:09:46,Log-likelihood,-2552.5
Cov. Estimator:,Unadjusted,,
,,F-statistic:,162.65
Entities:,24,P-value,0.0000
Avg Obs:,15.000,Distribution:,"F(8,328)"
Min Obs:,15.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Fit,-113.73,112.80,-1.0083,0.3141,-335.63,108.17
GDP/capita,7.5054,4.8436,1.5495,0.1222,-2.0231,17.034
Energy Consumption,2.1315,0.3457,6.1664,0.0000,1.4515,2.8115
Annual Solar Capacity Addition,0.4435,0.0260,17.055,0.0000,0.3924,0.4947
TechAdvancement,4.4812,0.2880,15.557,0.0000,3.9145,5.0478
Tech Advancement CN,2.4885,3.6655,0.6789,0.4977,-4.7224,9.6994
Trade Policies EU,2.0666,1.1748,1.7590,0.0795,-0.2446,4.3777
Environ. St. Difference,118.08,40.776,2.8957,0.0040,37.859,198.29


### FE MODEL (TWO-WAY)

In [None]:
#two-way fixed effects model
two_model = PanelOLS(dependent_var, independent_vars, entity_effects=True, time_effects=True,  drop_absorbed = True)
results = two_model.fit()
results

0,1,2,3
Dep. Variable:,Import,R-squared:,0.7818
Estimator:,PanelOLS,R-squared (Between):,-36.624
No. Observations:,360,R-squared (Within):,0.7813
Date:,"Fri, Jul 12 2024",R-squared (Overall):,-15.136
Time:,18:09:46,Log-likelihood,-2543.6
Cov. Estimator:,Unadjusted,,
,,F-statistic:,188.71
Entities:,24,P-value,0.0000
Avg Obs:,15.000,Distribution:,"F(6,316)"
Min Obs:,15.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Fit,-184.70,120.12,-1.5376,0.1251,-421.04,51.634
GDP/capita,6.2428,6.2856,0.9932,0.3214,-6.1241,18.610
Energy Consumption,2.3382,0.3937,5.9397,0.0000,1.5637,3.1127
Annual Solar Capacity Addition,0.4383,0.0262,16.715,0.0000,0.3867,0.4898
TechAdvancement,4.4119,0.2908,15.173,0.0000,3.8398,4.9840
Environ. St. Difference,156.43,79.971,1.9561,0.0513,-0.9120,313.77


### RANDOM EFFECTS

In [None]:
# Random Effects Model
re_model = RandomEffects(dependent_var, independent_vars)
re_results = re_model.fit()

print("\nRandom Effects Model Results:")
re_results


Random Effects Model Results:


0,1,2,3
Dep. Variable:,Import,R-squared:,0.8375
Estimator:,RandomEffects,R-squared (Between):,0.9583
No. Observations:,360,R-squared (Within):,0.7484
Date:,"Fri, Jul 12 2024",R-squared (Overall):,0.8377
Time:,18:09:46,Log-likelihood,-2613.5
Cov. Estimator:,Unadjusted,,
,,F-statistic:,226.76
Entities:,24,P-value,0.0000
Avg Obs:,15.000,Distribution:,"F(8,352)"
Min Obs:,15.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Fit,211.15,89.526,2.3586,0.0189,35.079,387.23
GDP/capita,0.7021,1.1208,0.6264,0.5315,-1.5023,2.9064
Energy Consumption,-0.2082,0.0315,-6.6007,0.0000,-0.2702,-0.1461
Annual Solar Capacity Addition,0.4874,0.0269,18.131,0.0000,0.4345,0.5402
TechAdvancement,2.6837,0.2262,11.863,0.0000,2.2388,3.1287
Tech Advancement CN,-3.9946,2.8496,-1.4018,0.1619,-9.5990,1.6098
Trade Policies EU,0.0388,1.1711,0.0331,0.9736,-2.2645,2.3420
Environ. St. Difference,51.875,22.593,2.2961,0.0223,7.4415,96.308


In [None]:
#Hausmann Test
comparison = compare({"Fixed": fe_results, "Random": re_results})
comparison

0,1,2
,Fixed,Random
Dep. Variable,Import,Import
Estimator,PanelOLS,RandomEffects
No. Observations,360,360
Cov. Est.,Unadjusted,Unadjusted
R-squared,0.7987,0.8375
R-Squared (Within),0.7987,0.7484
R-Squared (Between),-32.646,0.9583
R-Squared (Overall),-13.433,0.8377
F-statistic,162.65,226.76


### Durbin-Wu-Hausman test

In [None]:
import numpy as np
from statsmodels.regression.linear_model import OLS
from scipy.stats import chi2

def durbin_wu_hausman_test(ols_fixed, ols_random):
    """
    Perform the Durbin-Wu-Hausman test for endogeneity.

    Parameters:
    ols_fixed : OLS
        Fixed-effects regression results
    ols_random : OLS
        Random-effects regression results

    Returns:
    chi_squared_stat : float
        The chi-squared test statistic
    p_value : float
        The p-value of the test
    """
    # Residuals from fixed effects model
    u_fe = ols_fixed.resids

    # Residuals from random effects model
    u_re = ols_random.resids

    # Calculate the test statistic
    dw_hausman_stat = np.sum((u_fe - u_re)**2) / np.sum(u_fe**2)

    df = fe_results.df_model - re_results.df_model

    # Compute the p-value
    p_value = 1 - chi2.cdf(dw_hausman_stat, df)

    return dw_hausman_stat, p_value

# Example usage:
# Assuming you have panel data stored in a DataFrame called 'data'
# with columns 'dependent_var', 'independent_var1', 'independent_var2', etc.
# and a variable 'id' for individual identifiers and 'time' for time identifiers.


# Perform the Durbin-Wu-Hausman test
dw_hausman_stat, p_value = durbin_wu_hausman_test(fe_results, re_results)

print("Durbin-Wu-Hausman Test Statistic:", dw_hausman_stat)
print("P-value:", p_value)

Durbin-Wu-Hausman Test Statistic: 0.4026918013803389
P-value: 1.0


In [None]:
fe_results

0,1,2,3
Dep. Variable:,Import,R-squared:,0.7987
Estimator:,PanelOLS,R-squared (Between):,-32.646
No. Observations:,360,R-squared (Within):,0.7987
Date:,"Fri, Jul 12 2024",R-squared (Overall):,-13.433
Time:,18:09:46,Log-likelihood,-2552.5
Cov. Estimator:,Unadjusted,,
,,F-statistic:,162.65
Entities:,24,P-value,0.0000
Avg Obs:,15.000,Distribution:,"F(8,328)"
Min Obs:,15.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Fit,-113.73,112.80,-1.0083,0.3141,-335.63,108.17
GDP/capita,7.5054,4.8436,1.5495,0.1222,-2.0231,17.034
Energy Consumption,2.1315,0.3457,6.1664,0.0000,1.4515,2.8115
Annual Solar Capacity Addition,0.4435,0.0260,17.055,0.0000,0.3924,0.4947
TechAdvancement,4.4812,0.2880,15.557,0.0000,3.9145,5.0478
Tech Advancement CN,2.4885,3.6655,0.6789,0.4977,-4.7224,9.6994
Trade Policies EU,2.0666,1.1748,1.7590,0.0795,-0.2446,4.3777
Environ. St. Difference,118.08,40.776,2.8957,0.0040,37.859,198.29


In [None]:
#Wald Test: H0 is that all the regressors are not associated (no effect) with the dependent variables, H0 model has only the intercept (unconditional average)
wald_result = fe_results.wald_test(formula="Fit=0, `GDP/capita`=0, `Energy Consumption`=0, `Annual Solar Capacity Addition`=0, `Tech Advancement CN`=0, `TechAdvancement `=0, `Trade Policies EU`=0, `Environ. St. Difference`=0")
wald_result

Linear Equality Hypothesis Test
H0: Linear equality constraint is valid
Statistic: 1301.1621
P-value: 0.0000
Distributed: chi2(8)
WaldTestStatistic, id: 0x1db5869e2a0

In [None]:
# Perform the Breusch-Pagan test
residuals = fe_results.resids
independent_vars_bp_test = independent_vars.copy()
independent_vars_bp_test["constant"]=1
independent_vars_bp_test

bp_test = het_breuschpagan(resid=residuals, exog_het=independent_vars_bp_test, robust=True)
labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print(dict(zip(labels, bp_test)))

{'Lagrange multiplier statistic': 71.49189800125988, 'p-value': 2.4785064769839843e-12, 'f-value': 10.872162698637055, 'f p-value': 1.0472299529299159e-13}
