In [None]:
import pandas as pd
import numpy as np
import os


output_dir = "ihdp_tmle_approaches_results"
os.makedirs(output_dir, exist_ok=True)
    
path = os.path.join(output_dir, "ihdp_tmle_approaches_results_summary.csv")
    
try:
    df = pd.read_csv(path)
except:
    print(f"Error: Could not load results from {path}")


summary_rows = []

# Define all estimator prefixes we care about
estimator_prefixes = [
    ('model', 'std'),
    ('model', 'tmle_loss'),
    ('dr', 'std'),
    ('model', 'tmle'),
    ('model', 'post_last'),
    ('model', 'post_full'),
    ('model', 'ttr_last'),
    ('model', 'ttr_full'),
    ('model', 'combined_last'),
    ('model', 'combined_full'),
    ('model', 'tmle_update')
]

for est, stage in estimator_prefixes:
    prefix = f"{est}_ate_{stage}"
    cover_col = f"{est}_cover_{stage}"
    se_col = f"{est}_se_{stage}"
    meanIF_col = f"{est}_meanIF_{stage}"
    varIF_col = f"{est}_varIF_{stage}"
    
    if prefix not in df.columns:
        print(f"Warning: Column {prefix} not found in results")
        continue  # Skip if missing
    
    try:
        est_vals = df[prefix]
        true_vals = df['true_ate']
        se = df[se_col]
        covers = df[cover_col]
        meanIF = df[meanIF_col]
        varIF = df[varIF_col]
    except KeyError as e:
        print(f"Warning: Missing column: {e}")
        continue  # skip if any column is missing
    
    # Compute metrics
    bias = np.mean(est_vals - true_vals)
    variance = np.var(est_vals - true_vals, ddof=1)
    mse = bias**2 + variance
    rmse = np.sqrt(mse)
    coverage = np.mean(covers)
    mean_if_avg = np.mean(meanIF)
    var_if_avg = np.mean(varIF)
    se_avg = np.mean(se)
    
    summary_rows.append({
        'estimator': f'{est}_{stage}',
        'bias': bias,
        'variance': variance,
        'mse': mse,
        'rmse': rmse,
        'coverage': coverage,
        'se_ci_avg': se_avg,
        'mean_IF': mean_if_avg,
        'var_IF': var_if_avg
    })


    
summary_df = pd.DataFrame(summary_rows)

# Add a description column for clarity
descriptions = {
    'model_std': 'Naive Plug-in',
    'model_tmle_loss': 'T-reg (ϵ)',
    'dr_std': 'A-IPTW',
    'model_tmle': 'Post-TMLE',
    'model_post_last': 'TDA-Last',
    'model_post_full': 'TDA-Full',
    'model_ttr_last': 'TDA Targeting during Training (Last Layer)',
    'model_ttr_full': 'TDA Targeting during Training (All Layers)',
    'model_combined_last': 'Combined TDA Targeting during Training with Post TDA Targeting (Last Layer)',
    'model_combined_full': 'Combined TDA Targeting during Training with Post TDA Targeting (All Layers)',
    'model_tmle_update': 'Training with TMLE-style Loss + TMLE Update'
}

summary_df['description'] = summary_df['estimator'].map(descriptions)

# Reorder columns to put description first
cols = summary_df.columns.tolist()
cols.remove('description')
summary_df = summary_df[['description'] + cols]
print(summary_df)


                                          description            estimator  \
0                                       Naive Plug-in            model_std   
1                                           T-reg (ϵ)      model_tmle_loss   
2                                              A-IPTW               dr_std   
3                                           Post-TMLE           model_tmle   
4                                            TDA-Last      model_post_last   
5                                            TDA-Full      model_post_full   
6          TDA Targeting during Training (Last Layer)       model_ttr_last   
7          TDA Targeting during Training (All Layers)       model_ttr_full   
8   Combined TDA Targeting during Training with Po...  model_combined_last   
9   Combined TDA Targeting during Training with Po...  model_combined_full   
10        Training with TMLE-style Loss + TMLE Update    model_tmle_update   

        bias  variance       mse      rmse  coverage  se_ci_avg