In [2]:
import pandas as pd
import numpy as np
from linearmodels import IV2SLS


No fixed affects, No stationary contrls, Transport IV

In [3]:
# Load the data
data = pd.read_csv('gvcomix_transformed1.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS','decade','logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']
# Replace infinite values with NaN
data[columns_to_check] = data[columns_to_check].replace([np.inf, -np.inf], np.nan)

# Drop rows with NaN in these columns
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Define the dependent variable
dependent = data['onset2COWCS']

# Define the endogenous variable, exogenous controls, and instrument variable
endog = data['s1']
exog = data[['decade', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']]
instr = data['trans_outp_p']

# Create the regression formula
formula = 'onset2COWCS ~ 1 + [s1 ~ trans_outp_p] + decade + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff'

# Run the 2SLS regression
model_iv = IV2SLS.from_formula(formula, data)
results_iv = model_iv.fit(cov_type='clustered', clusters=data.index.get_level_values('country'))

# Print the results
print(results_iv)

                          IV-2SLS Estimation Summary                          
Dep. Variable:            onset2COWCS   R-squared:                     -3.6071
Estimator:                    IV-2SLS   Adj. R-squared:                -3.6239
No. Observations:                1929   F-statistic:                    9.0128
Date:                Wed, May 17 2023   P-value (F-stat)                0.2517
Time:                        00:05:16   Distribution:                  chi2(7)
Cov. Estimator:             clustered                                         
                                                                              
                                 Parameter Estimates                                 
                   Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------------
Intercept             0.1519     0.1109     1.3699     0.1707     -0.0655      0.3693
decade               -0.

No fixed affects, No stationary controls, Fuel IV

In [4]:
# Load the data
data = pd.read_csv('gvcomix_transformed1.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS','decade','logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Define the dependent variable
dependent = data['onset2COWCS']

# Define the endogenous variable, exogenous controls, and instrument variable
endog = data['s1']
exog = data[['decade','logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']]
instr = data['avgs1']

# Create the regression formula
formula = 'onset2COWCS ~ 1 + [s1 ~ avgs1] + decade + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff'

# Run the 2SLS regression
model_iv = IV2SLS.from_formula(formula, data)
results_iv = model_iv.fit(cov_type='clustered', clusters=data.index.get_level_values('country'))

# Print the results
print(results_iv)

                          IV-2SLS Estimation Summary                          
Dep. Variable:            onset2COWCS   R-squared:                     -0.0027
Estimator:                    IV-2SLS   Adj. R-squared:                -0.0064
No. Observations:                1929   F-statistic:                    13.510
Date:                Wed, May 17 2023   P-value (F-stat)                0.0606
Time:                        00:06:29   Distribution:                  chi2(7)
Cov. Estimator:             clustered                                         
                                                                              
                                 Parameter Estimates                                 
                   Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------------
Intercept             0.0990     0.0387     2.5592     0.0105      0.0232      0.1748
decade               -0.

NO Fixed effects, YES stationary controls, Transport IV

In [5]:
# Load the data
data = pd.read_csv('gvcomix_transformed1.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS','decade',
                         'logmountain', 'ethnic_fractionalization',
                         'religion_fractionalization', 'language_fractionalization',
                         'leg_british', 'opec',
                         'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Define the dependent variable
dependent = data['onset2COWCS']

# Define the endogenous variable, exogenous controls, and instrument variable
endog = data['s1']
exog = data[['decade',
                'logmountain', 'ethnic_fractionalization',
                'religion_fractionalization', 'language_fractionalization',
                'leg_british', 'opec',
                'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']]
instr = data['trans_outp_p']

# Create the regression formula
formula = 'onset2COWCS ~ 1 + [s1 ~ trans_outp_p] + decade + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff + logmountain + ethnic_fractionalization + religion_fractionalization + language_fractionalization + leg_british + opec'

# Run the 2SLS regression
model_iv = IV2SLS.from_formula(formula, data)
results_iv = model_iv.fit(cov_type='clustered', clusters=data.index.get_level_values('country'))

# Print the results
print(results_iv)

                          IV-2SLS Estimation Summary                          
Dep. Variable:            onset2COWCS   R-squared:                     -0.6129
Estimator:                    IV-2SLS   Adj. R-squared:                -0.6262
No. Observations:                1585   F-statistic:                    16.063
Date:                Wed, May 17 2023   P-value (F-stat)                0.2457
Time:                        00:07:18   Distribution:                 chi2(13)
Cov. Estimator:             clustered                                         
                                                                              
                                     Parameter Estimates                                      
                            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
----------------------------------------------------------------------------------------------
Intercept                      0.1441     0.0633     2.2774     0.0228      0.0201 

NO Fixed effect, YES stationary controls, avg GVC IV

In [6]:
# Load the data
data = pd.read_csv('gvcomix_transformed1.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS','decade',
                         'logmountain', 'ethnic_fractionalization',
                         'religion_fractionalization', 'language_fractionalization',
                         'leg_british', 'opec',
                         'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Define the dependent variable
dependent = data['onset2COWCS']

# Define the endogenous variable, exogenous controls, and instrument variable
endog = data['s1']
exog = data[['decade',
                'logmountain', 'ethnic_fractionalization',
                'religion_fractionalization', 'language_fractionalization',
                'leg_british', 'opec',
                'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']]
instr = data['avgs6']

# Create the regression formula
formula = 'onset2COWCS ~ 1 + [s1 ~ avgs1] + decade + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff + logmountain + ethnic_fractionalization + religion_fractionalization + language_fractionalization + leg_british + opec'

# Run the 2SLS regression
model_iv = IV2SLS.from_formula(formula, data)
results_iv = model_iv.fit(cov_type='clustered', clusters=data.index.get_level_values('country'))

# Print the results
print(results_iv)


                          IV-2SLS Estimation Summary                          
Dep. Variable:            onset2COWCS   R-squared:                      0.0258
Estimator:                    IV-2SLS   Adj. R-squared:                 0.0178
No. Observations:                1585   F-statistic:                    18.968
Date:                Wed, May 17 2023   P-value (F-stat)                0.1241
Time:                        00:08:23   Distribution:                 chi2(13)
Cov. Estimator:             clustered                                         
                                                                              
                                     Parameter Estimates                                      
                            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
----------------------------------------------------------------------------------------------
Intercept                      0.1138     0.0466     2.4415     0.0146      0.0224 

YES Fixed effects, NO stationary controls, Tranport IV

In [7]:
# Load the data
data = pd.read_csv('gvcomix_transformed1.csv')

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS', 's6', 's19', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']
data = data.dropna(subset=columns_to_check)

# Create country and year dummies for fixed effects
country_dummies = pd.get_dummies(data.index.get_level_values('country'), drop_first=True, prefix='C').astype(str)
year_dummies = pd.get_dummies(data.index.get_level_values('t'), drop_first=True, prefix='Y').astype(str)
dummies = pd.concat([country_dummies, year_dummies], axis=1)

# Reset index for data and concatenate with dummies
data = data.reset_index()
data_with_dummies = pd.concat([data, dummies], axis=1)

# Store the original index for later use
original_index = data_with_dummies.set_index(['country', 't']).index

# Create the regression formula including the dummies
fixed_effects = ' + '.join(dummies.columns)
formula = f'onset2COWCS ~ 1 + [s1 ~ trans_outp_p] + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff + {fixed_effects}'

# Run the 2SLS regression with country and time fixed effects
model_iv = IV2SLS.from_formula(formula, data_with_dummies)
results_iv = model_iv.fit(cov_type='clustered', clusters=original_index.get_level_values('country'))

# Print the results
print(results_iv)

                          IV-2SLS Estimation Summary                          
Dep. Variable:            onset2COWCS   R-squared:                      0.1822
Estimator:                    IV-2SLS   Adj. R-squared:                 0.1057
No. Observations:                1929   F-statistic:                 -7.89e+13
Date:                Wed, May 17 2023   P-value (F-stat)                1.0000
Time:                        00:10:23   Distribution:                chi2(165)
Cov. Estimator:             clustered                                         
                                                                              
                                 Parameter Estimates                                 
                   Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------------
Intercept             0.1738     0.0564     3.0847     0.0020      0.0634      0.2843
C_ALB[T.True]        -0.

In [8]:
# Load the data
data = pd.read_csv('gvcomix_transformed1.csv')

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS', 's6', 's19', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']
data = data.dropna(subset=columns_to_check)

# Create country and year dummies for fixed effects
country_dummies = pd.get_dummies(data.index.get_level_values('country'), drop_first=True, prefix='C').astype(str)
dummies = pd.concat([country_dummies], axis=1)

# Reset index for data and concatenate with dummies
data = data.reset_index()
data_with_dummies = pd.concat([data, dummies], axis=1)

# Store the original index for later use
original_index = data_with_dummies.set_index(['country', 't']).index

# Create the regression formula including the dummies
fixed_effects = ' + '.join(dummies.columns)
formula = f'onset2COWCS ~ 1 + [s1 ~ trans_outp_p] + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff + {fixed_effects}'

# Run the 2SLS regression with country and time fixed effects
model_iv = IV2SLS.from_formula(formula, data_with_dummies)
results_iv = model_iv.fit(cov_type='clustered', clusters=original_index.get_level_values('country'))

# Print the results
print(results_iv)

                          IV-2SLS Estimation Summary                          
Dep. Variable:            onset2COWCS   R-squared:                     -0.3581
Estimator:                    IV-2SLS   Adj. R-squared:                -0.4735
No. Observations:                1929   F-statistic:                 5.571e+15
Date:                Wed, May 17 2023   P-value (F-stat)                0.0000
Time:                        00:11:15   Distribution:                chi2(151)
Cov. Estimator:             clustered                                         
                                                                              
                                 Parameter Estimates                                 
                   Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------------
Intercept             0.1610     0.0572     2.8177     0.0048      0.0490      0.2731
C_ALB[T.True]        -0.

YES fixed effects(no time), NO stationary controls, FUEL IV

In [9]:
# Load the data
data = pd.read_csv('gvcomix_transformed1.csv')

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS', 's6', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']
data = data.dropna(subset=columns_to_check)

# Create country and year dummies for fixed effects
country_dummies = pd.get_dummies(data.index.get_level_values('country'), drop_first=True, prefix='C').astype(str)
dummies = pd.concat([country_dummies], axis=1)

# Reset index for data and concatenate with dummies
data = data.reset_index()
data_with_dummies = pd.concat([data, dummies], axis=1)

# Store the original index for later use
original_index = data_with_dummies.set_index(['country', 't']).index

# Create the regression formula including the dummies
fixed_effects = ' + '.join(dummies.columns)
formula = f'onset2COWCS ~ 1 + [s1 ~ avgs1] + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff + {fixed_effects}'

# Run the 2SLS regression with country and time fixed effects
model_iv = IV2SLS.from_formula(formula, data_with_dummies)
results_iv = model_iv.fit(cov_type='clustered', clusters=original_index.get_level_values('country'))

# Print the results
print(results_iv)

                          IV-2SLS Estimation Summary                          
Dep. Variable:            onset2COWCS   R-squared:                      0.1742
Estimator:                    IV-2SLS   Adj. R-squared:                 0.1040
No. Observations:                1929   F-statistic:                 2.367e+17
Date:                Wed, May 17 2023   P-value (F-stat)                0.0000
Time:                        00:11:51   Distribution:                chi2(151)
Cov. Estimator:             clustered                                         
                                                                              
                                 Parameter Estimates                                 
                   Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------------
Intercept             0.1481     0.0086     17.168     0.0000      0.1312      0.1650
C_ALB[T.True]        -0.