In [3]:
import numpy as np
import pandas as pd
import pickle
from utils import load_PENN_data, decompose_Y, generate_simulation_components, parallel_experiments

In [4]:
data_dict = {}
RMSE = {}
bias = {}

# set n_jobs to the number of cores
num_cores = 36
num_experiments = 1000

## Load and Format Data

In [5]:
# configs = {'Democracy': ['log_gdp', 'dem', None],
#             'No AR': ['log_gdp', 'dem', 'No Corr'],
#             'Education': ['log_gdp', 'educ', None],
#             'Random': ['log_gdp', 'dem', 'Random']}

# configs = {'Baseline': ['log_gdp', 'dem', None],
#             'No Auto Corr': ['log_gdp', 'dem', 'No Corr'],
#             'No M': ['log_gdp', 'dem', 'No M'],
#             'No F': ['log_gdp', 'dem', 'No F'],
#             'Only Noise': ['log_gdp', 'dem', 'Only Noise']
#           }

# TROP_dict = {'Baseline': [0.3, 0.4, 0.006],
#             'No Auto Corr':  [0.2, 0.3, 0.016],
#             'No M': [0, 0.12, 0.0511],
#             'No F': [1.6, 0.2, 0.0412],
#             'Only Noise': [2, 0.1, 0.1417]}

configs = {'Baseline': ['log_gdp', 'dem', None],
          }

TROP_dict = {'Baseline': [0.3, 0.4, 0.006],}

for setting, config in configs.items():
    
    print(setting)
    
    # load and process data for each setting
    outcome, treatment, option = config
    data = load_PENN_data(outcome, treatment)
    data_dict[setting] = data
    
    # run simulations
    simulation_components = generate_simulation_components(data)
    np.random.seed(0)
    RMSE[setting], bias[setting] = parallel_experiments(num_cores, num_experiments, simulation_components, TROP_dict[setting], option)


Baseline


In [6]:
RMSE

{'Baseline': array([0.02339231, 0.03626881, 0.040122  , 0.19819533, 0.04517151,
        0.0389131 ])}

In [7]:
bias

{'Baseline': array([ 0.00974151, -0.01092643,  0.00079464,  0.17592097,  0.03503156,
        -0.00467867])}

# Optimal Tuning Parameter

In [3]:
from methods import TROP_TWFE_average
from utils import generate_data
from joblib import Parallel, delayed
import matplotlib.pyplot as plt

In [4]:
np.random.seed(0)
outcome, treatment, option = ['log_gdp', 'dem', None]
data = load_PENN_data(outcome, treatment)
F, M, cov_mat, pi = generate_simulation_components(data)

In [5]:
#Y_true, _, _, _ = generate_data(F, M, cov_mat, pi, option)
treated_periods = 10
treated_unit_number = 1

In [6]:
def get_ATE(trial, lambda_unit, lambda_time, lambda_nn):
    np.random.seed(trial)
    Y_true, _, treated_units, _ = generate_data(F, M, cov_mat, pi, option)
    
    Y_true = np.delete(Y_true, treated_units, axis=0)
    N_total, _ = Y_true.shape
    test_units = np.random.choice(np.arange(N_total), size=treated_unit_number,replace=False)
    
    W_test = np.zeros(Y_true.shape)
    W_test[test_units,-treated_periods:] = 1
    estimate = TROP_TWFE_average(Y_true,W_test, test_units,lambda_unit=lambda_unit,lambda_time=lambda_time,lambda_nn=lambda_nn, treated_periods=treated_periods)
    return estimate

## Select lambda_unit

In [23]:
Q = []
lambda_units = np.arange(0,3,3/10)
for lambda_unit in lambda_units:
    lambda_time = 0.4
    lambda_nn = 0.005
    print(lambda_unit,lambda_time,lambda_nn)
    ATEs = Parallel(n_jobs=36, prefer='processes')(
                 delayed(get_ATE)(trial,lambda_unit=lambda_unit,lambda_time=lambda_time,lambda_nn=lambda_nn)
                 for trial in range(100))
    Q.append(np.sqrt(np.mean(np.square(ATEs))))
    print(np.sqrt(np.mean(np.square(ATEs))))

0.0 0.4 0.005
0.08602054327163458
0.3 0.4 0.005
0.08256251780212719
0.6 0.4 0.005
0.08303860399800457
0.8999999999999999 0.4 0.005
0.0891585263616667
1.2 0.4 0.005
0.09596066027712545
1.5 0.4 0.005


Process LokyProcess-152:
Process LokyProcess-159:
Traceback (most recent call last):
  File "/share/software/user/open/python/3.9.0/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/share/software/user/open/python/3.9.0/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/users/zhaonanq/.local/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py", line 510, in _process_worker
    gc.collect()
Traceback (most recent call last):
KeyboardInterrupt
  File "/share/software/user/open/python/3.9.0/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/share/software/user/open/python/3.9.0/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/users/zhaonanq/.local/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py", line 510, in _process_worker
    gc.

KeyboardInterrupt: 

In [None]:
lambda_units[np.argmin(Q)]

In [None]:
plt.plot(lambda_units,Q)
plt.xlabel('lambda_unit')
plt.ylabel('Q value')
plt.title('Q function for lambda_unit')
plt.show()

## Select lambda_time

In [24]:
Q = []
lambda_times = np.arange(0,2,2/10)
for lambda_time in lambda_times:
    lambda_nn = 0.005
    lambda_unit = 0.3
    print(lambda_unit,lambda_time,lambda_nn)
    ATEs = Parallel(n_jobs=36, prefer='processes')(
                 delayed(get_ATE)(trial,lambda_unit=lambda_unit,lambda_time=lambda_time,lambda_nn=lambda_nn)
                 for trial in range(100))
    Q.append(np.sqrt(np.mean(np.square(ATEs))))
    print(np.sqrt(np.mean(np.square(ATEs))))

0.3 0.0 0.005
0.12863180524787382
0.3 0.2 0.005
0.11429899049404343
0.3 0.4 0.005
0.08256251780212719
0.3 0.6000000000000001 0.005
0.12258257120024067
0.3 0.8 0.005
0.1351288870904051
0.3 1.0 0.005
0.13746387445573816
0.3 1.2000000000000002 0.005


KeyboardInterrupt: 

In [None]:
lambda_times[np.argmin(Q)]

In [None]:
plt.plot(lambda_times,Q)
plt.xlabel('lambda_time')
plt.ylabel('Q value')
plt.title('Q function for lambda_time')
plt.show()

### Select lambad_nn

In [25]:
Q = []
lambda_nns = np.arange(0.005,0.055,0.05/10)
for lambda_nn in lambda_nns:
    lambda_time = 0.4
    lambda_unit = 0.3
    print(lambda_unit,lambda_time,lambda_nn)
    ATEs = Parallel(n_jobs=36, prefer='processes')(
                 delayed(get_ATE)(trial,lambda_unit=lambda_unit,lambda_time=lambda_time,lambda_nn=lambda_nn)
                 for trial in range(100))
    Q.append(np.sqrt(np.mean(np.square(ATEs))))
    print(np.sqrt(np.mean(np.square(ATEs))))

0.3 0.4 0.005
0.08256251780212719
0.3 0.4 0.01
0.09239141418156772
0.3 0.4 0.015


Process LokyProcess-233:
Process LokyProcess-248:
Traceback (most recent call last):
  File "/share/software/user/open/python/3.9.0/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/share/software/user/open/python/3.9.0/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/users/zhaonanq/.local/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py", line 510, in _process_worker
    gc.collect()
KeyboardInterrupt
Traceback (most recent call last):
  File "/share/software/user/open/python/3.9.0/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/share/software/user/open/python/3.9.0/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/users/zhaonanq/.local/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py", line 510, in _process_worker
    gc.

KeyboardInterrupt: 

In [None]:
lambda_nns[np.argmin(Q)]

In [None]:
plt.plot(lambda_nns,Q)
plt.xlabel('lambda_nn')
plt.ylabel('Q value')
plt.title('Q function for lambda_nn')
plt.show()