In [1]:
import pandas as pd
import numpy as np
from linearmodels import IV2SLS
from linearmodels.iv.results import compare
import os

No fixed affects, No stationary contrls

In [6]:
# Load the data
#data = pd.read_csv('gvcofp_transformed1.csv')
data = pd.read_csv('../../data/gvcofp_data.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS','decade','logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']

# Replace infinite values with NaN
data[columns_to_check] = data[columns_to_check].replace([np.inf, -np.inf], np.nan)

# Drop rows with NaN in these columns
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Define the dependent variable
dependent = data['onset2COWCS']

# List of variables to regress on
variables = ['s1', 's2', 's6', 's10']

# Corresponding instrument variables for transport
instruments_transport = ['trans_outp_p', 'trans_outp_p', 'trans_outp_p', 'trans_outp_p']

# Corresponding instrument variables for World GVC
instruments_gvc = ['avgs1', 'avgs2', 'avgs6', 'avgs10']

# DataFrame to store results
results_data = []

# Loop through each variable and its corresponding instrument and run the regression for both instruments
for var, inst_transport, inst_gvc in zip(variables, instruments_transport, instruments_gvc):
    for inst, inst_name in zip([inst_transport, inst_gvc], ["transport", "World GVC"]):
        formula = 'onset2COWCS ~ 1 + [' + var + ' ~ ' + inst + '] + decade + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff'
        model_iv = IV2SLS.from_formula(formula, data)
        results_iv = model_iv.fit(cov_type='clustered', clusters=data.index.get_level_values('country'))
       
        # Add the results to the list
        results_data.append({
            'Variable': var,
            'Instrument': inst_name,
            'Coefficient': results_iv.params[var],
            'Std Error': results_iv.std_errors[var],
            'P-value': results_iv.pvalues[var]
        })

# Convert list to DataFrame and set the MultiIndex
results_df = pd.DataFrame(results_data)

# Output to file
results_df.to_csv('fp_results.csv')

NO Fixed effects, YES stationary controls

In [7]:
# Load the data
#data = pd.read_csv('gvcofp_transformed1.csv')
data = pd.read_csv('../../data/gvcomix_data.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS','decade',
                         'logmountain', 'ethnic_fractionalization',
                         'religion_fractionalization', 'language_fractionalization',
                         'leg_british', 'opec',
                         'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']

# Replace infinite values with NaN
data[columns_to_check] = data[columns_to_check].replace([np.inf, -np.inf], np.nan)

# Drop rows with NaN in these columns
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Define the dependent variable
dependent = data['onset2COWCS']

# List of variables to regress on
variables = ['s1', 's2', 's6', 's10']

# Corresponding instrument variables for transport
instruments_transport = ['trans_outp_p', 'trans_outp_p', 'trans_outp_p', 'trans_outp_p']

# Corresponding instrument variables for World GVC
instruments_gvc = ['avgs1', 'avgs2', 'avgs6', 'avgs10']

# DataFrame to store results
results_data = []

# Loop through each variable and its corresponding instrument and run the regression for both instruments
for var, inst_transport, inst_gvc in zip(variables, instruments_transport, instruments_gvc):
    for inst, inst_name in zip([inst_transport, inst_gvc], ["transport", "World GVC"]):
        formula = 'onset2COWCS ~ 1 + [' + var + ' ~ ' + inst + '] + decade + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff + logmountain + ethnic_fractionalization + religion_fractionalization + language_fractionalization + leg_british + opec'
        model_iv = IV2SLS.from_formula(formula, data)
        results_iv = model_iv.fit(cov_type='clustered', clusters=data.index.get_level_values('country'))
       
        # Add the results to the list
        results_data.append({
            'Variable': var,
            'Instrument': inst_name,
            'Coefficient': results_iv.params[var],
            'Std Error': results_iv.std_errors[var],
            'P-value': results_iv.pvalues[var]
        })

# Output to file
results_df = pd.DataFrame(results_data)

results_df.to_csv('fp_results.csv', mode='a', header=False, index=False)

YES Fixed effects, NO stationary controls

In [8]:
# Load the data
#data = pd.read_csv('gvcofp_transformed1.csv')
data = pd.read_csv('../../data/gvcofp_data.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS','decade','logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']

# Replace infinite values with NaN
data[columns_to_check] = data[columns_to_check].replace([np.inf, -np.inf], np.nan)

# Drop rows with NaN in these columns
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Now you can create your dummies
country_dummies = pd.get_dummies(data.index.get_level_values('country'), drop_first=True, prefix='C').astype(str)
year_dummies = pd.get_dummies(data.index.get_level_values('t'), drop_first=True, prefix='Y').astype(str)
dummies = pd.concat([country_dummies, year_dummies], axis=1)

# Reset the index for 'data' and 'dummies', and then concatenate along axis=1
data = data.reset_index()
data_with_dummies = pd.concat([data, dummies], axis=1)

# Restore the original MultiIndex
data_with_dummies.set_index(['country', 't'], inplace=True)

# List of variables to regress on
variables = ['s1', 's2', 's6', 's10']

# Corresponding instrument variables for transport
instruments_transport = ['trans_outp_p', 'trans_outp_p', 'trans_outp_p', 'trans_outp_p']

# Corresponding instrument variables for World GVC
instruments_gvc = ['avgs1', 'avgs2', 'avgs6', 'avgs10']

# DataFrame to store results
results_data = []

# Loop through each variable and its corresponding instrument and run the regression for both instruments
for var, inst_transport, inst_gvc in zip(variables, instruments_transport, instruments_gvc):
    for inst, inst_name in zip([inst_transport, inst_gvc], ["transport", "World GVC"]):
        fixed_effects = ' + '.join(dummies.columns)
        formula = f'onset2COWCS ~ 1 + [' + var + ' ~ ' + inst + '] + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff + {fixed_effects}'
        model_iv = IV2SLS.from_formula(formula, data_with_dummies)
        results_iv = model_iv.fit(cov_type='clustered', clusters=data_with_dummies.index.get_level_values('country'))
       
        # Add the results to the list
        results_data.append({
            'Variable': var,
            'Instrument': inst_name,
            'Coefficient': results_iv.params[var],
            'Std Error': results_iv.std_errors[var],
            'P-value': results_iv.pvalues[var]
        })

# Convert list to DataFrame and set the MultiIndex
results_df = pd.DataFrame(results_data)
results_df.set_index(['Variable', 'Instrument'], inplace=True)

# Output to file
results_df = pd.DataFrame(results_data)

results_df.to_csv('fp_results.csv', mode='a', header=False, index=False)

YES fixed effects(ONLY COUNTRY), NO stationary controls

In [9]:
# Load the data
#data = pd.read_csv('data/gvcofp_data.csv')
data = pd.read_csv('../../data/gvcofp_data.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS','decade','logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']

# Replace infinite values with NaN
data[columns_to_check] = data[columns_to_check].replace([np.inf, -np.inf], np.nan)

# Drop rows with NaN in these columns
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Now you can create your dummies
country_dummies = pd.get_dummies(data.index.get_level_values('country'), drop_first=True, prefix='C').astype(str)
dummies = pd.concat([country_dummies, year_dummies], axis=1)

# Reset the index for 'data' and 'dummies', and then concatenate along axis=1
data = data.reset_index()
data_with_dummies = pd.concat([data, dummies], axis=1)

# Restore the original MultiIndex
data_with_dummies.set_index(['country', 't'], inplace=True)

# List of variables to regress on
variables = ['s1', 's2', 's6', 's10']

# Corresponding instrument variables for transport
instruments_transport = ['trans_outp_p', 'trans_outp_p', 'trans_outp_p', 'trans_outp_p']

# Corresponding instrument variables for World GVC
instruments_gvc = ['avgs1', 'avgs2', 'avgs6', 'avgs10']

# DataFrame to store results
results_data = []

# Loop through each variable and its corresponding instrument and run the regression for both instruments
for var, inst_transport, inst_gvc in zip(variables, instruments_transport, instruments_gvc):
    for inst, inst_name in zip([inst_transport, inst_gvc], ["transport", "World GVC"]):
        fixed_effects = ' + '.join(dummies.columns)
        formula = f'onset2COWCS ~ 1 + [' + var + ' ~ ' + inst + '] + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff + {fixed_effects}'
        model_iv = IV2SLS.from_formula(formula, data_with_dummies)
        results_iv = model_iv.fit(cov_type='clustered', clusters=data_with_dummies.index.get_level_values('country'))
       
        # Add the results to the list
        results_data.append({
            'Variable': var,
            'Instrument': inst_name,
            'Coefficient': results_iv.params[var],
            'Std Error': results_iv.std_errors[var],
            'P-value': results_iv.pvalues[var]
        })

# Convert list to DataFrame and set the MultiIndex
results_df = pd.DataFrame(results_data)

# Output to file
results_df = pd.DataFrame(results_data)

results_df.to_csv('fp_results.csv', mode='a', header=False, index=False)

In [None]:
data = pd.read_csv('fp_results.csv')