In [1]:
import pandas as pd
import numpy as np
from statsmodels.discrete.discrete_model import Probit
from linearmodels import IV2SLS
from linearmodels.iv.results import compare
import os

No fixed affects, No stationary contrls

In [7]:
# Load the data
data = pd.read_csv('../../data/combined_data/gvcomix_data.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS', 'decade', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']

# Replace infinite values with NaN
data[columns_to_check] = data[columns_to_check].replace([np.inf, -np.inf], np.nan)

# Drop rows with NaN in these columns
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Define the dependent variable
dependent = data['onset2COWCS']

# List of variables to regress on
variables = ['s1', 's2', 's6', 's10']

# Corresponding instrument variables for transport
instruments_transport = ['trans_outp_p', 'trans_outp_p', 'trans_outp_p', 'trans_outp_p']

# Corresponding instrument variables for World GVC
instruments_gvc = ['avgs1', 'avgs2', 'avgs6', 'avgs10']

# DataFrame to store results
results_data = []

# Loop through each variable and its corresponding instrument and run the regression for both instruments
for var, inst_transport, inst_gvc in zip(variables, instruments_transport, instruments_gvc):
    for inst, inst_name in zip([inst_transport, inst_gvc], ["transport", "World GVC"]):
        formula = 'onset2COWCS ~ [' + var + ' ~ ' + inst + '] + decade + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff'
        model_iv = IV2SLS.from_formula(formula, data)
        results_iv = model_iv.fit(cov_type='clustered', clusters=data.index.get_level_values('country'))
        
        # Perform probit transformation
        model_probit = Probit(dependent, data[var])
        results_probit = model_probit.fit()
       
        # Add the results to the list
        results_data.append({
            'Variable': var,
            'Instrument': inst_name,
            'Coefficient': results_probit.params[0],
            'Std Error': results_probit.bse[0],
            'P-value': results_probit.pvalues[0]
        })

# Convert list to DataFrame and set the MultiIndex
results_df = pd.DataFrame(results_data)
results_df = results_df.set_index(['Variable', 'Instrument'])

# Output to file
results_df.to_csv('probit_results.csv')

Optimization terminated successfully.
         Current function value: 0.687200
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.687200
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.692973
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.692973
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.685505
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.685505
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.688365
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.688365
         Iterations 5


NO Fixed effects, YES stationary controls

In [8]:
# Load the data
#data = pd.read_csv('gvcobp_transformed1.csv')
data = pd.read_csv('../../data/combined_data/gvcobp_data.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS','decade',
                         'logmountain', 'ethnic_fractionalization',
                         'religion_fractionalization', 'language_fractionalization',
                         'leg_british', 'opec',
                         'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']

# Replace infinite values with NaN
data[columns_to_check] = data[columns_to_check].replace([np.inf, -np.inf], np.nan)

# Drop rows with NaN in these columns
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Define the dependent variable
dependent = data['onset2COWCS']

# List of variables to regress on
variables = ['s1', 's2', 's6', 's10']

# Corresponding instrument variables for transport
instruments_transport = ['trans_outp_p', 'trans_outp_p', 'trans_outp_p', 'trans_outp_p']

# Corresponding instrument variables for World GVC
instruments_gvc = ['avgs1', 'avgs2', 'avgs6', 'avgs10']

# DataFrame to store results
results_data = []

# Loop through each variable and its corresponding instrument and run the regression for both instruments
for var, inst_transport, inst_gvc in zip(variables, instruments_transport, instruments_gvc):
    for inst, inst_name in zip([inst_transport, inst_gvc], ["transport", "World GVC"]):
        formula = 'coup ~ 1 + [' + var + ' ~ ' + inst + '] + decade + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff + logmountain + ethnic_fractionalization + religion_fractionalization + language_fractionalization + leg_british + opec'
        model_iv = IV2SLS.from_formula(formula, data)
        results_iv = model_iv.fit(cov_type='clustered', clusters=data.index.get_level_values('country'))
        
        # Perform probit transformation
        model_probit = Probit(dependent, data[var])
        results_probit = model_probit.fit()
       
        # Add the results to the list
        results_data.append({
            'Variable': var,
            'Instrument': inst_name,
            'Coefficient': results_probit.params[0],
            'Std Error': results_probit.bse[0],
            'P-value': results_probit.pvalues[0]
        })



# Output to file
results_df = pd.DataFrame(results_data)

results_df.to_csv('probit_results.csv', mode='a', header=False, index=False)

Optimization terminated successfully.
         Current function value: 0.686252
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.686252
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.692644
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.692644
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.687607
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.687607
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.683972
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.683972
         Iterations 4


In [9]:
databp = pd.read_csv('bp_results.csv')
datafp = pd.read_csv('fp_results.csv')
datamix = pd.read_csv('mix_results.csv')

databp = databp[databp['Variable'] == 's6']
datafp = datafp[datafp['Variable'] == 's6']
datamix = datamix[datamix['Variable'] == 's6']
databp = databp[databp['Instrument'] == 'World GVC']
datafp = datafp[datafp['Instrument'] == 'World GVC']
datamix = datamix[datamix['Instrument'] == 'World GVC']

fuel_data = pd.concat([databp, datafp, datamix], ignore_index=True)
fuel_data.to_csv('robustness_results.csv')