In [99]:
import pandas as pd
import numpy as np
from linearmodels.iv import IV2SLS

First we run regression without stationary controls and without time fixed effect

In [106]:
data = pd.read_csv("gvcomix_transformed.csv")
data = data.dropna(subset=['onset2COWCS','decade', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff'])

data = pd.concat([data, pd.get_dummies(data['country'], drop_first=True)], axis=1)

# Dependent variable
dependent_var = data['onset2COWCS']

# Independent variable (and non-stationary controls)
independent_vars = data[['s6','decade', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']]

# Add country dummies to the independent variables (excluding the reference category)
country_dummies = data.columns[-(len(data['country'].unique()) - 1):]
independent_vars = independent_vars.join(data[country_dummies])

# Instrument variable
instrument = data['s19']

# Run the IV regression
model = IV2SLS(dependent_var, independent_vars, None, instrument)
result = model.fit(cov_type="robust")

# Print the results
print(result.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:            onset2COWCS   R-squared:                      0.1829
Estimator:                    IV-2SLS   Adj. R-squared:                 0.1130
No. Observations:                1929   F-statistic:                    39.071
Date:                Tue, May 09 2023   P-value (F-stat)                1.0000
Time:                        13:39:46   Distribution:                chi2(152)
Cov. Estimator:                robust                                         
                                                                              
                                 Parameter Estimates                                 
                   Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------------
s6                    0.2124     0.1712     1.2409     0.2147     -0.1231      0.5480
decade                0.

In [109]:
data = pd.read_csv("gvcomix_transformed.csv")
data = data.dropna(subset=['onset2COWCS','decade', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff'])

data = pd.concat([data, pd.get_dummies(data['country'], drop_first=True)], axis=1)

# Dependent variable
dependent_var = data['onset2COWCS']

# Independent variable (and non-stationary controls)
independent_vars = data[['s6','decade', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']]

# Add country dummies to the independent variables (excluding the reference category)
country_dummies = data.columns[-(len(data['country'].unique()) - 1):]
independent_vars = independent_vars.join(data[country_dummies])

# Instrument variable
instrument = data['avgs6']

# Run the IV regression
model = IV2SLS(dependent_var, independent_vars, None, instrument)
result = model.fit(cov_type="robust")

# Print the results
print(result.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:            onset2COWCS   R-squared:                      0.1829
Estimator:                    IV-2SLS   Adj. R-squared:                 0.1130
No. Observations:                1929   F-statistic:                    40.938
Date:                Tue, May 09 2023   P-value (F-stat)                1.0000
Time:                        15:38:19   Distribution:                chi2(152)
Cov. Estimator:                robust                                         
                                                                              
                                 Parameter Estimates                                 
                   Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------------
s6                    0.2124     0.1712     1.2409     0.2147     -0.1231      0.5480
decade                0.

RUN REGRESSION WITH STATIONARY CONTROLS

In [107]:
data = pd.read_csv("gvcomix_transformed.csv")
data = data.dropna(subset=['onset2COWCS','decade',
                         'logmountain', 'ethnic_fractionalization',
                         'religion_fractionalization', 'language_fractionalization',
                         'leg_british', 'opec',
                         'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff'])

data = pd.concat([data, pd.get_dummies(data['country'], drop_first=True)], axis=1)

# Dependent variable
dependent_var = data['onset2COWCS']

independent_vars = data[['s6', 'decade',
                         'logmountain', 'ethnic_fractionalization',
                         'religion_fractionalization', 'language_fractionalization',
                         'leg_british', 'opec',
                         'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned']]

# Add country dummies to the independent variables (excluding the reference category)
country_dummies = data.columns[-(len(data['country'].unique()) - 1):]
independent_vars = independent_vars.join(data[country_dummies])

# Instrument variable
instrument = data['s19']

# Run the IV regression
model = IV2SLS(dependent_var, independent_vars, None, instrument)
result = model.fit(cov_type="robust")

# Print the results
print(result.summary)

ValueError: regressors [exog endog] do not have full column rank

Now to time fixed affects without stationary controls

In [111]:
data = pd.read_csv("gvcomix_transformed.csv")
data = data.dropna(subset=['s6','onset2COWCS', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff'])

data = pd.concat([data, pd.get_dummies(data['country'], drop_first=True)], axis=1)

# Dependent variable
dependent_var = data['onset2COWCS']

# Independent variable (and non-stationary controls)
independent_vars = data[['s6', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']]

# Add country dummies to the independent variables (excluding the reference category)
country_dummies = data.columns[-(len(data['country'].unique()) - 1):]
independent_vars = independent_vars.join(data[country_dummies])

year_dummies = data.columns[data.columns.str.startswith('year_')]
independent_vars = independent_vars.join(data[year_dummies])

# Instrument variable
instrument = data['s19']

# Run the IV regression
model = IV2SLS(dependent_var, independent_vars, None, instrument)
result = model.fit(cov_type="robust")

# Print the results
print(result.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:            onset2COWCS   R-squared:                      0.1826
Estimator:                    IV-2SLS   Adj. R-squared:                 0.1132
No. Observations:                1929   F-statistic:                    36.866
Date:                Tue, May 09 2023   P-value (F-stat)                1.0000
Time:                        16:51:36   Distribution:                chi2(151)
Cov. Estimator:                robust                                         
                                                                              
                                 Parameter Estimates                                 
                   Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------------
s6                    0.2163     0.1691     1.2789     0.2009     -0.1152      0.5477
logpop_M_diff     -2.212

In [112]:
data = pd.read_csv("gvcomix_transformed.csv")
data = data.dropna(subset=['s6', 'onset2COWCS', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff'])

data = pd.concat([data, pd.get_dummies(data['country'], drop_first=True)], axis=1)

# Dependent variable
dependent_var = data['onset2COWCS']

# Independent variable (and non-stationary controls)
independent_vars = data[['s6', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']]

# Add country dummies to the independent variables (excluding the reference category)
country_dummies = data.columns[-(len(data['country'].unique()) - 1):]
independent_vars = independent_vars.join(data[country_dummies])

year_dummies = data.columns[data.columns.str.startswith('year_')]
independent_vars = independent_vars.join(data[year_dummies])

# Instrument variable
instrument = data['avgs6']

# Run the IV regression
model = IV2SLS(dependent_var, independent_vars, None, instrument)
result = model.fit(cov_type="robust")

# Print the results
print(result.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:            onset2COWCS   R-squared:                      0.1826
Estimator:                    IV-2SLS   Adj. R-squared:                 0.1132
No. Observations:                1929   F-statistic:                    36.791
Date:                Tue, May 09 2023   P-value (F-stat)                1.0000
Time:                        16:51:47   Distribution:                chi2(151)
Cov. Estimator:                robust                                         
                                                                              
                                 Parameter Estimates                                 
                   Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------------
s6                    0.2163     0.1691     1.2789     0.2009     -0.1152      0.5477
logpop_M_diff     -2.212

Now add stationary controls to fixed effects

In [113]:
data = pd.read_csv("gvcomix_transformed.csv")
data = data.dropna(subset=['onset2COWCS','decade',
                         'logmountain', 'ethnic_fractionalization',
                         'religion_fractionalization', 'language_fractionalization',
                         'leg_british', 'opec',
                         'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff'])

num_rows = data.shape[0]

# Print the number of rows
print("Number of rows: ", num_rows)

data = pd.concat([data, pd.get_dummies(data['country'], drop_first=True)], axis=1)

independent_vars = data[['s6', 'decade',
                         'logmountain', 'ethnic_fractionalization',
                         'religion_fractionalization', 'language_fractionalization',
                         'leg_british', 'opec',
                         'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']]

# Add country dummies to the independent variables (excluding the reference category)
country_dummies = data.columns[-(len(data['country'].unique()) - 1):]
independent_vars = independent_vars.join(data[country_dummies])

# Add year dummies to the independent variables (excluding the reference category)
year_dummies = data.columns[data.columns.str.startswith('year_')]
independent_vars = independent_vars.join(data[year_dummies])

# Instrument variable
instrument = data['s19']

# Run the IV regression
model = IV2SLS(dependent_var, independent_vars, None, instrument)
result = model.fit(cov_type="robust")

# Print the results
print(result.summary)

Number of rows:  1585


ValueError: Array required to have 1929 obs, has 1585

In [None]:
#**********THIS ONE IS WRONG**************KEPT ONLY FOR REFERENCE

data = pd.read_csv("gvcobp_transformed.csv")
data = data.dropna(subset=['onset2COWCS'])

data = pd.concat([data, pd.get_dummies(data['country'], drop_first=True)], axis=1)

# Dependent variable
dependent_var = data['onset2COWCS']

# Independent variables
independent_vars = data[['s1', 's2', 's6', 's10']]

# Add country dummies to the independent variables (excluding the reference category)
country_dummies = data.columns[-(len(data['country'].unique()) - 1):]
independent_vars = independent_vars.join(data[country_dummies])

# Instrument variable
instrument = data['s19']

# Run the IV regression
model = IV2SLS(dependent_var, independent_vars, None, instrument)
result = model.fit(cov_type="robust")

# Print the results
print(result.summary)

independent_vars = data[['s1', 's2', 's6', 's10',
                         'decade', 'ecgrowth', 'logpop_M', 'logpopdens', 'logoutreg',
                         'democracy', 'logmountain', 'ethnic_fractionalization',
                         'religion_fractionalization', 'language_fractionalization',
                         'leg_british', 'opec']]

# Run the IV regression
model = IV2SLS(dependent_var, independent_vars, None, instrument)
result = model.fit(cov_type="robust")

# Print the results
print(result.summary)