In [2]:
import pandas as pd
from pydynpd import regression
import pathlib
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

# Questions
1. perform one-step GMM estimation rather than the default two-step GMM estimation.

# Assumption tests
## AR
We expect to reject AR(1) but not AR(2)
## Overidentification
A suggested rule of thumb is that any p-value between 0.1 and 0.25 should be fine as it is reasonably larger than a significance level of 0.05 and it is reasonably small enough to not suspect severe p-value inflation. In that case, system GMM without time dummy is the best result that we can get.

In [3]:
unstationary = 'ecgrowth logpop_M logpopdens logoutreg'
stationary = 'logmountain ethnic_fractionalization religion_fractionalization language_fractionalization leg_british opec'

path = pathlib.Path().resolve()
iv = pd.read_csv(str(path).replace("code/Internal_IV", "data") + "/transportIV_file.csv", na_values= '#DIV/0!')
# We do not introduce stationary controls under most circumstance. 
# Usually they do not change the results a lot, but that should be verified in the robustness check.
# No time FE if we are using world-level average gvc participation as external IV

In [4]:
def load_data(predictor = "gvcomix"):
    df = pd.read_csv(str(path).replace("code/Internal_IV", "data") + "/" + predictor + "_transformed1.csv", na_values= '#DIV/0!')
    df = pd.merge(df, iv, on=["country", "t"])
    label = le.fit_transform(df['country'])
    df.drop('country', axis=1, inplace=True)
    df['country'] = label
    df.fillna(0, inplace=True)
    return df

In [5]:
def reg(method=" ", additional_cntrl = False, time = False, iv = 'trans_outp_p_x'):
    # System GMM by default (method = "nolevel" produces difference GMM)
    # No stationary controls by default
    # IV = transport by default
    command_str = 'onset2COWCS L(1:?).onset2COWCS s10 ' + iv + ' ' + unstationary + ' '
    if additional_cntrl:
        command_str = command_str + stationary
    command_str = command_str + ' | gmm(onset2COWCS, 2:3) gmm(s10, 2:3) iv(' + iv + ') | ' + ' ' + method
    if time:
         command_str = command_str + ' timedumm'
    mydpd = regression.abond(command_str, df, ['country', 't']);
    return mydpd #.models[0] #.regression_table

In [6]:
def all_reg():
    # world average gvc participation IV.
    # Do not add time dummy
    ## System GMM
    m1 = reg(iv = 'avgs10');
    ## Difference GMM
    m2 = reg(iv = 'avgs10', method="nolevel");

    # Transport IV
    ## System GMM
    m3 = reg();
    m4 = reg(time = True);
    ## Difference GMM
    m5 = reg(method = "nolevel");
    m6 = reg(time = True, method="nolevel");

    results = [m1, m2, m3, m4, m5, m6]
    return results

In [7]:
def print_results(results):
    for idx in range(len(results)):
        if idx == 0:
            print("IV = World Average; Sys.")
        if idx == 1:
            print("IV = World Average; Dif.")
        if idx == 3:
            print("IV = Transport; Sys.")
        if idx == 4:
            print("IV = World Average; Sys; t-FE.")
        if idx == 5:
            print("IV = Transport; Dif.")
        if idx == 6:
            print("IV = World Average; Dif; t-FE.")
        m = results[idx].models[0]
        print(m.regression_table)
        print("AR(1) test P value: ", end=" ")
        print(m.AR_list[0].P_value)
        print("AR(2) test P value: ", end=" ")
        print(m.AR_list[1].P_value)
        print("Hansen test P value: ", end=" ")
        print(m.hansen.p_value)
        print("\n")

In [8]:
%%capture
predictor_list = ["gvcomix", "gvcobp", "gvcofp"]
results_list = []
for predictor in predictor_list:
    df = load_data(predictor);
    results_list.append(all_reg());

In [10]:
for idx in range(len(results_list)):
    print("========Predictor = ", predictor_list[idx], "========")
    print_results(results_list[idx])

IV = World Average; Sys.
         variable  coefficient   std_err   z_value   p_value sig
0  L1.onset2COWCS     0.047330  0.035311  1.340371  0.180125    
1              s6     0.301259  0.341425  0.882359  0.377583    
2           avgs6    -0.840849  0.550114 -1.528500  0.126389    
3        ecgrowth    -0.151795  0.063891 -2.375852  0.017508   *
4        logpop_M     0.113434  0.049493  2.291939  0.021909   *
5      logpopdens    -0.004777  0.105076 -0.045464  0.963738    
6       logoutreg    -0.162757  0.215566 -0.755020  0.450237    
7            _con     0.030417  0.037069  0.820539  0.411909    
AR(1) test P value:  2.1896020463913873e-05
AR(2) test P value:  0.404836556221706
Hansen test P value:  0.6599171845098645


IV = World Average; Dif.
         variable  coefficient   std_err   z_value   p_value sig
0  L1.onset2COWCS     0.058297  0.042373  1.375809  0.168881    
1              s6     0.212817  0.329711  0.645467  0.518624    
2           avgs6     0.044695  0.854674  0.