In [3]:
import os
import pandas as pd

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 100)

In [4]:
def format_results(filename, print_table = True):
    results = pd.read_csv(filename)
    results = results.drop(results[results.bias == 1].index)
    results = results.reindex(['v', 'CATE_v', 'alg', 'mean_est', 'mean_SE', 'CI 80', 'CI 95'], axis=1).dropna(how='all', axis=1)
    results['diff 80'] = results['CI 80'] - 0.8
    results['diff 95'] = results['CI 95'] - 0.95

    results = results.round(2)
    results['alg'] = results['alg'].str[4:].astype(float)
    results['CI 80'] = pd.Series(["{0:.0f}%".format(val * 100) for val in results['CI 80']], index = results.index)
    results['CI 95'] = pd.Series(["{0:.0f}%".format(val * 100) for val in results['CI 95']], index = results.index)
    results['diff 80'] = pd.Series(["{0:.0f}%".format(val * 100) for val in results['diff 80']], index = results.index)
    results['diff 95'] = pd.Series(["{0:.0f}%".format(val * 100) for val in results['diff 95']], index = results.index)

    # results = results.drop(results[results.alg != 0.50].index)

    if print_table == True:
        print(results.to_string(index = False))
    else:
        return results

In [11]:
def format_results(filename, print_table = True, latex = False):
    results = pd.read_csv(filename)
    results = results.drop(results[results.bias == 1].index)
    results = results.reindex(['v', 'CATE_v', 'alg', 'mean_est', 'mean_SE', 'CI.80', 'CI.95'], axis=1).dropna(how='all', axis=1)
    # results['diff 80'] = results['CI.80'] - 0.8
    # results['diff 95'] = results['CI.95'] - 0.95

    results = results.round(2)
    results['alg'] = results['alg'].str[4:].astype(float)
    if latex == True:
        results['CI.80'] = pd.Series(["{0:.0f}\%".format(val * 100) for val in results['CI.80']], index = results.index)
        results['CI.95'] = pd.Series(["{0:.0f}\%".format(val * 100) for val in results['CI.95']], index = results.index)
    else:
        results['CI.80'] = pd.Series(["{0:.0f}%".format(val * 100) for val in results['CI.80']], index = results.index)
        results['CI.95'] = pd.Series(["{0:.0f}%".format(val * 100) for val in results['CI.95']], index = results.index)
    # results['diff 80'] = pd.Series(["{0:.0f}%".format(val * 100) for val in results['diff 80']], index = results.index)
    # results['diff 95'] = pd.Series(["{0:.0f}%".format(val * 100) for val in results['diff 95']], index = results.index)

    # results = results.drop(results[results.alg != 0.50].index)

    if print_table == True:
        if latex == True:
            results = results.style.format(precision = 2).hide()  # No row names
            print(results.to_latex())
        else:
            print(results.to_string(index = False))
    else:
        return results

In [13]:
dir = "it_500_lam_0.05_0.01_0.005"
os.chdir('C:/Users/liuw/Dropbox (Personal)/MIT Pre-Doc Personal/Rahul Singh/Debiased Kernel Methods/New Code/results/simulation' + '/' + dir)
file_list = ['n100_p13_CEF1.csv', 'n100_p13_CEF2.csv', 'n100_p13_CEF3.csv', 'n100_p57_CEF1.csv', 'n100_p57_CEF2.csv', 'n100_p57_CEF3.csv']

for file in file_list:
    format_results(file, print_table = True, latex = True)

\begin{tabular}{rrrrrll}
v & CATE_v & alg & mean_est & mean_SE & CI.80 & CI.95 \\
-0.25 & -0.10 & 0.25 & -0.10 & 0.09 & 90\% & 99\% \\
-0.25 & -0.10 & 0.50 & -0.08 & 0.06 & 91\% & 99\% \\
-0.25 & -0.10 & 1.00 & -0.06 & 0.04 & 77\% & 97\% \\
0.00 & 0.00 & 0.25 & 0.02 & 0.07 & 80\% & 94\% \\
0.00 & 0.00 & 0.50 & 0.03 & 0.06 & 84\% & 98\% \\
0.00 & 0.00 & 1.00 & 0.05 & 0.04 & 63\% & 92\% \\
0.25 & 0.32 & 0.25 & 0.36 & 0.15 & 93\% & 98\% \\
0.25 & 0.32 & 0.50 & 0.37 & 0.11 & 93\% & 99\% \\
0.25 & 0.32 & 1.00 & 0.34 & 0.08 & 93\% & 99\% \\
\end{tabular}

\begin{tabular}{rrrrrll}
v & CATE_v & alg & mean_est & mean_SE & CI.80 & CI.95 \\
-0.25 & -0.10 & 0.25 & -0.10 & 0.05 & 85\% & 94\% \\
-0.25 & -0.10 & 0.50 & -0.10 & 0.04 & 83\% & 94\% \\
-0.25 & -0.10 & 1.00 & -0.08 & 0.03 & 68\% & 87\% \\
0.00 & 0.00 & 0.25 & 0.00 & 0.03 & 74\% & 94\% \\
0.00 & 0.00 & 0.50 & 0.01 & 0.02 & 75\% & 93\% \\
0.00 & 0.00 & 1.00 & 0.03 & 0.02 & 52\% & 80\% \\
0.25 & 0.32 & 0.25 & 0.39 & 0.19 & 85\% & 94\% \\
0.2

In [9]:
# col = 'mean_est'
# col = 'CI.80'
col = 'CI.95'

file_list = ['n100_p13_CEF1.csv', 'n100_p13_CEF2.csv', 'n100_p13_CEF3.csv', 'n100_p57_CEF1.csv', 'n100_p57_CEF2.csv', 'n100_p57_CEF3.csv']
dir_list = ["lam_1", "lam_0.5", "lam_0.1", "lam_0.05", "lam_0.01", "lam_0.005", "lam_0.001"]
res_list = []

for file in file_list:
    df = pd.DataFrame()

    for dir in dir_list:
        os.chdir('C:/Users/liuw/Dropbox (Personal)/MIT Pre-Doc Personal/Rahul Singh/Debiased Kernel Methods/New Code/results/simulation' + '/' + dir)
        df[dir] = format_results(file, print_table = False)[col]

    res_list.append(df.to_string(index = False))

for res in res_list:
    print(res)

lam_1 lam_0.5 lam_0.1 lam_0.05 lam_0.01 lam_0.005 lam_0.001
   3%     30%     99%     100%     100%      100%      100%
   0%      9%     97%     100%     100%      100%       99%
   0%      0%     80%      95%      97%       97%       97%
  60%     84%     94%      95%      97%       97%       97%
  47%     68%     92%      95%      98%       98%       99%
  36%     60%     91%      95%      95%       98%       96%
   8%     16%     84%      91%      98%       98%       99%
   1%      9%     68%      92%      98%       98%      100%
   0%      1%     36%      75%      96%       98%       97%
lam_1 lam_0.5 lam_0.1 lam_0.05 lam_0.01 lam_0.005 lam_0.001
  68%     76%     87%      90%      91%       91%       92%
  57%     67%     85%      88%      91%       92%       94%
  12%     21%     56%      69%      78%       84%       90%
  73%     78%     88%      92%      94%       93%       94%
  74%     80%     90%      91%      93%       94%       96%
  52%     54%     70%      76%      78% 