# NOTES

# IMPORTS AND SETTINGS

Changes and Improvements:
VIF Analysis: The script checks for multicollinearity, and I have suggested removing Fixed Asset Investment CN (T-1) and Avg Wage Difference due to potential multicollinearity issues.
Scaling: I added scaling for variables that have large magnitudes, improving the interpretability of coefficients.
Model Diagnostics: The script includes diagnostics such as the Durbin-Wu-Hausman test, Wald test, and Breusch-Pagan test for endogeneity and heteroskedasticity.
Rank Check: Added a check for matrix rank to ensure that the independent variables are not linearly dependent.

In [None]:
# -*- coding: utf-8 -*-
"""Improved script based on suggestions"""

"""# IMPORTS AND SETTINGS"""
import pandas as pd
import statsmodels.api as sm
from linearmodels.panel import PanelOLS, RandomEffects
from statsmodels.api import add_constant
from linearmodels.panel import compare
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.stats.diagnostic import het_breuschpagan
from scipy.stats import chi2
import os
import warnings
warnings.filterwarnings('ignore')

project_folder = os.getcwd()

"""# DATA READING"""

# data from Excel
filename3 = os.path.join(project_folder, "Regression data", "panel_regression_all.csv")
data3 = pd.read_csv(os.path.join(project_folder, filename3))

# 'Country' as entity and 'Year' as time
data3 = data3.set_index(['Country', 'Year'])
data3 = data3.dropna(axis=1, how="all")
data3.head(20)

data3.info()

# Data scaling for selected variables (to improve model fit)
data3.loc[:, ["GDP/capita", "Tech Advancement CN", "Fixed Asset Investment CN (T-1)", "Avg Wage Difference"]] = data3.loc[:, ["GDP/capita", "Tech Advancement CN", "Fixed Asset Investment CN (T-1)", "Avg Wage Difference"]].div(1000)
data3.loc[:, "Import"] = data3.loc[:, "Import"].div(1000000)
data3.describe().T

"""# MODELING SETUP"""

# Set dependent, independent, and control variables
data3["constant"] = 1
dependent_var = data3['Import'].to_frame()
independent_vars = data3.iloc[:, 1:]

# Drop insignificant or redundant variables based on VIF and correlation matrix
independent_vars = independent_vars.drop(columns=["constant", "Fixed Asset Investment CN (T-1)", "Avg Wage Difference"], errors='ignore')

# Calculate Variance Inflation Factor (VIF) to check multicollinearity
vif = pd.DataFrame()
vif["VIF Factor"] = [variance_inflation_factor(independent_vars.values, i) for i in range(independent_vars.shape[1])]
vif["features"] = independent_vars.columns
print("Variance Inflation Factors (VIF):")
print(vif)

"""# MODELS

## Fixed Effects Model (Entity One-Way)
"""
# Check correlation matrix and matrix rank to avoid multicollinearity
corr_matrix = independent_vars.corr()
print(corr_matrix)

def matrix_rank_check(df):
    return np.linalg.matrix_rank(df.values)

print(f"Rank of independent variables matrix: {matrix_rank_check(independent_vars)}")
print(f"Number of columns in independent variables: {independent_vars.shape[1]}")

# Fit Fixed Effects Model (entity effects only)
fe_model = PanelOLS(dependent_var, independent_vars, entity_effects=True)
fe_results = fe_model.fit()

print("Fixed Effects Model Results:")
print(fe_results)

"""## Fixed Effects Model (Two-Way)"""

# Two-way Fixed Effects Model (with time and entity effects)
two_model = PanelOLS(dependent_var, independent_vars, entity_effects=True, time_effects=True, drop_absorbed=True)
two_results = two_model.fit()

print("Two-Way Fixed Effects Model Results:")
print(two_results)

"""## Random Effects Model"""

# Random Effects Model
re_model = RandomEffects(dependent_var, independent_vars)
re_results = re_model.fit()

print("\nRandom Effects Model Results:")
print(re_results)

# Compare Fixed and Random Effects Models using Hausman Test
comparison = compare({"Fixed": fe_results, "Random": re_results})
print(comparison)

"""## Durbin-Wu-Hausman Test for Endogeneity"""

def durbin_wu_hausman_test(ols_fixed, ols_random):
    """Perform the Durbin-Wu-Hausman test for endogeneity."""
    u_fe = ols_fixed.resids  # Residuals from FE model
    u_re = ols_random.resids  # Residuals from RE model
    dw_hausman_stat = np.sum((u_fe - u_re)**2) / np.sum(u_fe**2)
    df = fe_results.df_model - re_results.df_model
    p_value = 1 - chi2.cdf(dw_hausman_stat, df)
    return dw_hausman_stat, p_value

# Perform the Durbin-Wu-Hausman test
dw_hausman_stat, p_value = durbin_wu_hausman_test(fe_results, re_results)
print("Durbin-Wu-Hausman Test Statistic:", dw_hausman_stat)
print("P-value:", p_value)

"""## Wald Test"""

# Wald test: checks if regressors are collectively insignificant
wald_result = fe_results.wald_test(formula="Fit=0, `GDP/capita`=0, `Energy Consumption`=0, `Annual Solar Capacity Addition`=0, `Tech Advancement CN`=0, `TechAdvancement `=0, `Trade Policies EU`=0, `Environ. St. Difference`=0")
print(wald_result)

"""## Breusch-Pagan Test for Heteroskedasticity"""

# Perform Breusch-Pagan test
residuals = fe_results.resids
independent_vars_bp_test = independent_vars.copy()
independent_vars_bp_test["constant"] = 1

bp_test = het_breuschpagan(resid=residuals, exog_het=independent_vars_bp_test, robust=True)
labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print(dict(zip(labels, bp_test)))


In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from linearmodels.panel import PanelOLS, RandomEffects
from statsmodels.api import add_constant
from linearmodels.panel import compare
import numpy as np
import scipy.stats
from linearmodels.panel import FirstDifferenceOLS
from statsmodels.stats.outliers_influence import variance_inflation_factor
# import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
from linearmodels.panel import PanelOLS
# from sklearn.preprocessing import StandardScaler
from statsmodels.stats.diagnostic import het_breuschpagan
import os
import warnings
warnings.filterwarnings('ignore')

In [None]:
project_folder=os.getcwd()
project_folder

'C:\\Users\\erott\\Documents\\Masterarbeit'

# SCRIPT

## DATA READING

In [None]:
# data from Excel
filename3 = os.path.join(project_folder, "Regression data", "panel_regression_all.csv")
data3 = pd.read_csv(os.path.join(project_folder, filename2))
#'Country' as entity and 'Year' as time
data3 = data3.set_index(['Country', 'Year'])
data3=data3.dropna(axis=1, how="all")
data3.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Import,Fit,SD Fit,Duration,SD Duration,GDP/capita,Area,Energy dependency,Energy Consumption,Electricity Price,Annual Solar Capacity Addition,TechAdvancement,Tech Advancement CN,Trade Policies EU,Fixed Asset Investment CN (T-1),Avg Wage Difference,Environ. St. Difference
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Austria,2005,1436678,0.672,0.299658,13,0.0,38417.46,82520.0,71.76,425.46,0.119,-6.0,104,573,0.0,63.51,2346.47,1.6111
Austria,2006,41083867,0.678,0.247408,13,0.0,40669.33,82520.0,72.248,421.01,0.120775,1.4,103,979,0.0,94.15,2408.95,1.8333
Austria,2007,20635660,0.521,0.060136,13,0.0,46915.34,82520.0,68.475,411.99,0.138875,1.9,117,1621,0.0,139.32,2407.1,1.5556
Austria,2008,28782325,0.559,0.066811,13,0.0,51919.98,82520.0,68.738,420.01,0.152825,5.9,123,2364,0.0,211.47,2568.188377,1.8611
Austria,2009,18342478,0.53,0.069027,13,0.0,48153.32,82520.0,65.122,406.84,0.165825,18.8,84,4149,0.0,337.35,2974.78,1.9722
Austria,2010,66300152,0.434,0.078703,13,0.0,46903.76,82520.0,62.782,420.16,0.16505,39.9,52,6397,0.0,520.84,2755.66,1.7778
Austria,2011,54727543,0.438,0.05061,13,0.0,51442.28,82520.0,69.981,396.07,0.166375,85.3,34,7634,0.0,738.25,3004.24,1.5833
Austria,2012,96135630,0.338,0.078276,13,0.0,48564.92,82520.0,63.785,414.52,0.1665,163.4,27,8395,0.0,1219.38,2997.29,0.4722
Austria,2013,66585288,0.23,0.101873,13,0.0,50731.13,82520.0,61.26,411.2,0.16915,288.5,20,8057,21.84,1312.54,3108.45,0.75
Austria,2014,38423535,0.0,0.162951,0,5.2,51786.38,82520.0,65.625,394.8,0.164525,159.3,13,10136,47.7,1498.14,2864.4,0.3611


In [None]:
data3.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 360 entries, ('Austria', 2005) to ('Sweden', 2019)
Data columns (total 17 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Import                           360 non-null    int64  
 1   Fit                              360 non-null    float64
 2   SD Fit                           360 non-null    float64
 3   Duration                         360 non-null    int64  
 4   SD Duration                      360 non-null    float64
 5   GDP/capita                       360 non-null    float64
 6   Area                             360 non-null    float64
 7   Energy dependency                360 non-null    float64
 8   Energy Consumption               360 non-null    float64
 9   Electricity Price                360 non-null    float64
 10  Annual Solar Capacity Addition   360 non-null    float64
 11  TechAdvancement                  360 non-null    int64 

In [None]:
# #Standardisation of data
# scaler = StandardScaler()
# numerical_data = data.select_dtypes(include=['float64', 'int64'])
# scaled_data = scaler.fit_transform(numerical_data)
# data_scaled = pd.DataFrame(scaled_data, columns=numerical_data.columns, index=data.index)
# data_scaled

In [None]:
data3.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Import,360.0,224583500.0,825173200.0,32139.0,3082600.0,13635220.0,68235440.0,8340234000.0
Fit,360.0,0.1557222,0.2274972,0.0,0.0,0.0,0.22325,0.83
SD Fit,360.0,0.07074416,0.08862287,0.0,0.0,0.03077162,0.113328,0.3411033
Duration,360.0,8.263889,9.297307,0.0,0.0,0.0,18.0,25.0
SD Duration,360.0,2.428685,3.778216,0.0,0.0,0.0,5.669694,12.24745
GDP/capita,360.0,29856.38,17016.86,3899.83,14699.57,23613.7,45540.15,80848.3
Area,360.0,166018.6,156239.7,20136.4,53990.0,86890.0,297774.7,547557.0
Energy dependency,360.0,51.91986,23.1019,-50.618,36.874,53.2385,70.34575,91.551
Energy Consumption,360.0,737.8902,943.8475,40.75,187.97,341.545,835.88,4062.38
Electricity Price,360.0,0.1410688,0.04426644,0.058,0.1122375,0.1334125,0.1632688,0.287175


In [None]:
#scaling of the data
data3.loc[:, ["GDP/capita", "Tech Advancement CN", "Fixed Asset Investment CN (T-1)", "Avg Wage Difference"]] = data3.loc[:, ["GDP/capita", "Tech Advancement CN", "Fixed Asset Investment CN (T-1)", "Avg Wage Difference"]].div(1000)
data3.loc[:, "Import"] = data3.loc[:, "Import"].div(1000000)
data3.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Import,Fit,SD Fit,Duration,SD Duration,GDP/capita,Area,Energy dependency,Energy Consumption,Electricity Price,Annual Solar Capacity Addition,TechAdvancement,Tech Advancement CN,Trade Policies EU,Fixed Asset Investment CN (T-1),Avg Wage Difference,Environ. St. Difference
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Austria,2005,1.436678,0.672,0.299658,13,0.0,38.41746,82520.0,71.76,425.46,0.119,-6.0,104,0.573,0.0,0.06351,2.34647,1.6111
Austria,2006,41.083867,0.678,0.247408,13,0.0,40.66933,82520.0,72.248,421.01,0.120775,1.4,103,0.979,0.0,0.09415,2.40895,1.8333
Austria,2007,20.63566,0.521,0.060136,13,0.0,46.91534,82520.0,68.475,411.99,0.138875,1.9,117,1.621,0.0,0.13932,2.4071,1.5556
Austria,2008,28.782325,0.559,0.066811,13,0.0,51.91998,82520.0,68.738,420.01,0.152825,5.9,123,2.364,0.0,0.21147,2.568188,1.8611
Austria,2009,18.342478,0.53,0.069027,13,0.0,48.15332,82520.0,65.122,406.84,0.165825,18.8,84,4.149,0.0,0.33735,2.97478,1.9722
Austria,2010,66.300152,0.434,0.078703,13,0.0,46.90376,82520.0,62.782,420.16,0.16505,39.9,52,6.397,0.0,0.52084,2.75566,1.7778
Austria,2011,54.727543,0.438,0.05061,13,0.0,51.44228,82520.0,69.981,396.07,0.166375,85.3,34,7.634,0.0,0.73825,3.00424,1.5833
Austria,2012,96.13563,0.338,0.078276,13,0.0,48.56492,82520.0,63.785,414.52,0.1665,163.4,27,8.395,0.0,1.21938,2.99729,0.4722
Austria,2013,66.585288,0.23,0.101873,13,0.0,50.73113,82520.0,61.26,411.2,0.16915,288.5,20,8.057,21.84,1.31254,3.10845,0.75
Austria,2014,38.423535,0.0,0.162951,0,5.2,51.78638,82520.0,65.625,394.8,0.164525,159.3,13,10.136,47.7,1.49814,2.8644,0.3611


In [None]:
data3.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Import,360.0,224.583454,825.173238,0.032139,3.0826,13.635224,68.235437,8340.234422
Fit,360.0,0.155722,0.227497,0.0,0.0,0.0,0.22325,0.83
SD Fit,360.0,0.070744,0.088623,0.0,0.0,0.030772,0.113328,0.341103
Duration,360.0,8.263889,9.297307,0.0,0.0,0.0,18.0,25.0
SD Duration,360.0,2.428685,3.778216,0.0,0.0,0.0,5.669694,12.247449
GDP/capita,360.0,29.856376,17.016865,3.89983,14.699565,23.6137,45.540148,80.8483
Area,360.0,166018.638083,156239.652736,20136.4,53990.0,86890.0,297774.675,547557.0
Energy dependency,360.0,51.919858,23.101903,-50.618,36.874,53.2385,70.34575,91.551
Energy Consumption,360.0,737.890194,943.847525,40.75,187.97,341.545,835.88,4062.38
Electricity Price,360.0,0.141069,0.044266,0.058,0.112237,0.133412,0.163269,0.287175


In [None]:
# dependent, independent and control variables
data3["constant"]=1
dependent_var = data3['Import'].to_frame()
independent_vars = data3.iloc[:,1:]
dependent_var.shape, independent_vars.shape

((360, 1), (360, 17))

In [None]:
vif = pd.DataFrame()
vif["VIF Factor"] = [variance_inflation_factor(independent_vars.values, i) for i in range(independent_vars.shape[1])]
vif["features"] = independent_vars.columns
print("Variance Inflation Factors (VIF):")
vif

Variance Inflation Factors (VIF):


Unnamed: 0,VIF Factor,features
0,3.989572,Fit
1,2.496832,SD Fit
2,3.089967,Duration
3,1.721816,SD Duration
4,5.521702,GDP/capita
5,2.940381,Area
6,1.418883,Energy dependency
7,6.19997,Energy Consumption
8,2.539037,Electricity Price
9,2.457971,Annual Solar Capacity Addition


In [None]:
correlation_matrix = data3.corr()
print(correlation_matrix)

                                   Import       Fit    SD Fit  Duration  \
Import                           1.000000  0.266253  0.080623  0.270845   
Fit                              0.266253  1.000000  0.559212  0.774316   
SD Fit                           0.080623  0.559212  1.000000  0.445053   
Duration                         0.270845  0.774316  0.445053  1.000000   
SD Duration                     -0.122346  0.009618  0.482366 -0.009277   
GDP/capita                       0.166235  0.013715 -0.067455 -0.010936   
Area                             0.240717  0.099172  0.019250  0.045788   
Energy dependency                0.121655  0.194650  0.157446  0.022402   
Energy Consumption               0.522634  0.292713  0.228120  0.281099   
Electricity Price                0.230993  0.001874  0.034808  0.095556   
Annual Solar Capacity Addition   0.856715  0.177242  0.049165  0.219916   
TechAdvancement                  0.794065  0.264630  0.128420  0.277321   
Tech Advancement CN      

In [None]:
# Verify column names
print(independent_vars.columns)

# Drop the column if it exists
independent_vars = independent_vars.drop(columns=["constant","Fixed Asset Investment CN (T-1)","Avg Wage Difference"], errors='ignore')

# Verify the result
print(independent_vars.columns)

Index(['Fit', 'SD Fit', 'Duration', 'SD Duration', 'GDP/capita', 'Area',
       'Energy dependency', 'Energy Consumption', 'Electricity Price',
       'Annual Solar Capacity Addition', 'TechAdvancement ',
       'Tech Advancement CN', 'Trade Policies EU',
       'Fixed Asset Investment CN (T-1)', 'Avg Wage Difference',
       'Environ. St. Difference', 'constant'],
      dtype='object')
Index(['Fit', 'SD Fit', 'Duration', 'SD Duration', 'GDP/capita', 'Area',
       'Energy dependency', 'Energy Consumption', 'Electricity Price',
       'Annual Solar Capacity Addition', 'TechAdvancement ',
       'Tech Advancement CN', 'Trade Policies EU', 'Environ. St. Difference'],
      dtype='object')


In [None]:
vif = pd.DataFrame()
vif["VIF Factor"] = [variance_inflation_factor(independent_vars.values, i) for i in range(independent_vars.shape[1])]
vif["features"] = independent_vars.columns
print("Variance Inflation Factors (VIF):")
vif

Variance Inflation Factors (VIF):


Unnamed: 0,VIF Factor,features
0,5.566317,Fit
1,3.927589,SD Fit
2,5.176692,Duration
3,2.367926,SD Duration
4,7.814158,GDP/capita
5,5.03743,Area
6,5.399971,Energy dependency
7,7.613314,Energy Consumption
8,18.990605,Electricity Price
9,2.592378,Annual Solar Capacity Addition


## MODELS

### FE MODEL (ENTITY ONE WAY)

In [None]:
print(independent_vars.columns.duplicated())

[False False False False False False False False False False False False
 False False]


In [None]:
corr_matrix = independent_vars.corr()
print(corr_matrix)

                                     Fit    SD Fit  Duration  SD Duration  \
Fit                             1.000000  0.559212  0.774316     0.009618   
SD Fit                          0.559212  1.000000  0.445053     0.482366   
Duration                        0.774316  0.445053  1.000000    -0.009277   
SD Duration                     0.009618  0.482366 -0.009277     1.000000   
GDP/capita                      0.013715 -0.067455 -0.010936    -0.165907   
Area                            0.099172  0.019250  0.045788    -0.090150   
Energy dependency               0.194650  0.157446  0.022402     0.030546   
Energy Consumption              0.292713  0.228120  0.281099    -0.054266   
Electricity Price               0.001874  0.034808  0.095556     0.071347   
Annual Solar Capacity Addition  0.177242  0.049165  0.219916    -0.081569   
TechAdvancement                 0.264630  0.128420  0.277321    -0.081921   
Tech Advancement CN            -0.364941 -0.106835 -0.258851     0.161921   

In [None]:
def matrix_rank_check(df):
    return np.linalg.matrix_rank(df.values)

print(f"Rank of independent variables matrix: {matrix_rank_check(independent_vars)}")
print(f"Number of columns in independent variables: {independent_vars.shape[1]}")

Rank of independent variables matrix: 14
Number of columns in independent variables: 14


In [None]:
print(corr_matrix)

                                     Fit    SD Fit  Duration  SD Duration  \
Fit                             1.000000  0.559212  0.774316     0.009618   
SD Fit                          0.559212  1.000000  0.445053     0.482366   
Duration                        0.774316  0.445053  1.000000    -0.009277   
SD Duration                     0.009618  0.482366 -0.009277     1.000000   
GDP/capita                      0.013715 -0.067455 -0.010936    -0.165907   
Area                            0.099172  0.019250  0.045788    -0.090150   
Energy dependency               0.194650  0.157446  0.022402     0.030546   
Energy Consumption              0.292713  0.228120  0.281099    -0.054266   
Annual Solar Capacity Addition  0.177242  0.049165  0.219916    -0.081569   
TechAdvancement                 0.264630  0.128420  0.277321    -0.081921   
Tech Advancement CN            -0.364941 -0.106835 -0.258851     0.161921   
Trade Policies EU              -0.300296 -0.017715 -0.140710     0.099049   

In [None]:
# Fixed Effects Model
fe_model = PanelOLS(dependent_var, independent_vars, entity_effects=True)
fe_results = fe_model.fit()

print("Fixed Effects Model Results:")
fe_results

AbsorbingEffectError: 
The model cannot be estimated. The included effects have fully absorbed
one or more of the variables. This occurs when one or more of the dependent
variable is perfectly explained using the effects included in the model.

The following variables or variable combinations have been fully absorbed
or have become perfectly collinear after effects are removed:

          Fit, SD Fit, Duration, SD Duration, Area, Electricity Price, Tech Advancement CN

Set drop_absorbed=True to automatically drop absorbed variables.


### FE MODEL (TWO-WAY)

In [None]:
#two-way fixed effects model
two_model = PanelOLS(dependent_var, independent_vars, entity_effects=True, time_effects=True,  drop_absorbed = True)
results = two_model.fit()
results

0,1,2,3
Dep. Variable:,Import,R-squared:,0.7841
Estimator:,PanelOLS,R-squared (Between):,-35.715
No. Observations:,360,R-squared (Within):,0.7766
Date:,"Mon, Aug 05 2024",R-squared (Overall):,-14.751
Time:,15:35:32,Log-likelihood,-2541.7
Cov. Estimator:,Unadjusted,,
,,F-statistic:,113.34
Entities:,24,P-value,0.0000
Avg Obs:,15.000,Distribution:,"F(10,312)"
Min Obs:,15.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Fit,-72.399,163.79,-0.4420,0.6588,-394.67,249.87
SD Fit,-154.63,314.94,-0.4910,0.6238,-774.30,465.04
Duration,-3.4741,3.8559,-0.9010,0.3683,-11.061,4.1129
SD Duration,-2.9500,6.4532,-0.4571,0.6479,-15.647,9.7472
GDP/capita,4.8897,6.4026,0.7637,0.4456,-7.7081,17.487
Energy dependency,-2.7528,2.3037,-1.1949,0.2330,-7.2857,1.7800
Energy Consumption,2.4240,0.4043,5.9952,0.0000,1.6284,3.2195
Annual Solar Capacity Addition,0.4384,0.0265,16.528,0.0000,0.3862,0.4905
TechAdvancement,4.3908,0.2920,15.035,0.0000,3.8162,4.9654


### RANDOM EFFECTS

In [None]:
# Random Effects Model
re_model = RandomEffects(dependent_var, independent_vars)
re_results = re_model.fit()

print("\nRandom Effects Model Results:")
re_results


Random Effects Model Results:


0,1,2,3
Dep. Variable:,Import,R-squared:,0.8376
Estimator:,RandomEffects,R-squared (Between):,0.9567
No. Observations:,360,R-squared (Within):,0.7550
Date:,"Wed, Aug 07 2024",R-squared (Overall):,0.8408
Time:,14:49:05,Log-likelihood,-2608.8
Cov. Estimator:,Unadjusted,,
,,F-statistic:,127.44
Entities:,24,P-value,0.0000
Avg Obs:,15.000,Distribution:,"F(14,346)"
Min Obs:,15.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Fit,212.20,157.32,1.3489,0.1783,-97.215,521.62
SD Fit,286.05,320.53,0.8924,0.3728,-344.39,916.49
Duration,-1.1335,3.3710,-0.3362,0.7369,-7.7638,5.4968
SD Duration,-8.6423,6.2980,-1.3722,0.1709,-21.029,3.7449
GDP/capita,2.1340,1.5253,1.3991,0.1627,-0.8660,5.1339
Area,-9.148e-05,0.0002,-0.4945,0.6212,-0.0005,0.0003
Energy dependency,0.3260,0.7657,0.4257,0.6706,-1.1801,1.8321
Energy Consumption,-0.2059,0.0430,-4.7945,0.0000,-0.2904,-0.1215
Electricity Price,-957.12,548.16,-1.7461,0.0817,-2035.3,121.03


In [None]:
#Hausmann Test
comparison = compare({"Fixed": fe_results, "Random": re_results})
comparison

0,1,2
,Fixed,Random
Dep. Variable,Import,Import
Estimator,PanelOLS,RandomEffects
No. Observations,360,360
Cov. Est.,Unadjusted,Unadjusted
R-squared,0.7987,0.8375
R-Squared (Within),0.7987,0.7484
R-Squared (Between),-32.646,0.9583
R-Squared (Overall),-13.433,0.8377
F-statistic,162.65,226.76


### Durbin-Wu-Hausman test

In [None]:
import numpy as np
from statsmodels.regression.linear_model import OLS
from scipy.stats import chi2

def durbin_wu_hausman_test(ols_fixed, ols_random):
    """
    Perform the Durbin-Wu-Hausman test for endogeneity.

    Parameters:
    ols_fixed : OLS
        Fixed-effects regression results
    ols_random : OLS
        Random-effects regression results

    Returns:
    chi_squared_stat : float
        The chi-squared test statistic
    p_value : float
        The p-value of the test
    """
    # Residuals from fixed effects model
    u_fe = ols_fixed.resids

    # Residuals from random effects model
    u_re = ols_random.resids

    # Calculate the test statistic
    dw_hausman_stat = np.sum((u_fe - u_re)**2) / np.sum(u_fe**2)

    df = fe_results.df_model - re_results.df_model

    # Compute the p-value
    p_value = 1 - chi2.cdf(dw_hausman_stat, df)

    return dw_hausman_stat, p_value

# Example usage:
# Assuming you have panel data stored in a DataFrame called 'data'
# with columns 'dependent_var', 'independent_var1', 'independent_var2', etc.
# and a variable 'id' for individual identifiers and 'time' for time identifiers.


# Perform the Durbin-Wu-Hausman test
dw_hausman_stat, p_value = durbin_wu_hausman_test(fe_results, re_results)

print("Durbin-Wu-Hausman Test Statistic:", dw_hausman_stat)
print("P-value:", p_value)

Durbin-Wu-Hausman Test Statistic: 0.4026918013803389
P-value: 1.0


In [None]:
fe_results

0,1,2,3
Dep. Variable:,Import,R-squared:,0.7987
Estimator:,PanelOLS,R-squared (Between):,-32.646
No. Observations:,360,R-squared (Within):,0.7987
Date:,"Fri, Jul 12 2024",R-squared (Overall):,-13.433
Time:,18:09:46,Log-likelihood,-2552.5
Cov. Estimator:,Unadjusted,,
,,F-statistic:,162.65
Entities:,24,P-value,0.0000
Avg Obs:,15.000,Distribution:,"F(8,328)"
Min Obs:,15.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Fit,-113.73,112.80,-1.0083,0.3141,-335.63,108.17
GDP/capita,7.5054,4.8436,1.5495,0.1222,-2.0231,17.034
Energy Consumption,2.1315,0.3457,6.1664,0.0000,1.4515,2.8115
Annual Solar Capacity Addition,0.4435,0.0260,17.055,0.0000,0.3924,0.4947
TechAdvancement,4.4812,0.2880,15.557,0.0000,3.9145,5.0478
Tech Advancement CN,2.4885,3.6655,0.6789,0.4977,-4.7224,9.6994
Trade Policies EU,2.0666,1.1748,1.7590,0.0795,-0.2446,4.3777
Environ. St. Difference,118.08,40.776,2.8957,0.0040,37.859,198.29


In [None]:
#Wald Test: H0 is that all the regressors are not associated (no effect) with the dependent variables, H0 model has only the intercept (unconditional average)
wald_result = fe_results.wald_test(formula="Fit=0, `GDP/capita`=0, `Energy Consumption`=0, `Annual Solar Capacity Addition`=0, `Tech Advancement CN`=0, `TechAdvancement `=0, `Trade Policies EU`=0, `Environ. St. Difference`=0")
wald_result

Linear Equality Hypothesis Test
H0: Linear equality constraint is valid
Statistic: 1301.1621
P-value: 0.0000
Distributed: chi2(8)
WaldTestStatistic, id: 0x1db5869e2a0

In [None]:
# Perform the Breusch-Pagan test
residuals = fe_results.resids
independent_vars_bp_test = independent_vars.copy()
independent_vars_bp_test["constant"]=1
independent_vars_bp_test

bp_test = het_breuschpagan(resid=residuals, exog_het=independent_vars_bp_test, robust=True)
labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print(dict(zip(labels, bp_test)))

{'Lagrange multiplier statistic': 71.49189800125988, 'p-value': 2.4785064769839843e-12, 'f-value': 10.872162698637055, 'f p-value': 1.0472299529299159e-13}
