In [1]:
import sys
import os
sys.path.append(os.path.abspath("../src"))

import yaml
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from collections import defaultdict
from evaluation.run_experiment import get_job_list
from utils.metrics import f1, precision, recall
from config.constants import COL_NAMES, RANDOM_EPE
from scipy.stats import sem

In [2]:
config_name = "table_2"

job_list = get_job_list(config_name, config_dir='../experiments/configs')

with open(f"../experiments/configs/{config_name}.yaml", "r") as f:
    config = yaml.safe_load(f)

res_dict = defaultdict(lambda: pd.DataFrame())
for i, job in enumerate(job_list):
    dataset_label = f"{job['dataset']}"
    try:
        df = pd.read_pickle(f"../results/{config_name}/raw/{i}_results.pkl")
        df['method'] = job['method']
        res_dict[dataset_label] = pd.concat([res_dict[dataset_label], df], ignore_index=True)
    except:
        print(f'Missing results for job {i}')

In [3]:
epe_select = 'train'
size_threshold = 0.1
d = 2
R_star = np.ones((2, d))
R_star[0] *= -(1/6) ** (1/d)
R_star[1] *= (1/6) ** (1/d)
figs = 2

res_df = pd.DataFrame(columns=['Dataset', 'Method', 'EPE', 'C-Index', 'F1'])
for dataset in res_dict:
    df = res_dict[dataset]
    for method in df['method'].unique():
        test_epes = []
        test_c_inds = []
        test_sizes = []
        f1s = []
        precs = []
        recs = []

        for seed in df['seed'].unique():
            filtered_df = df[(df['method'] == method) & (df['seed'] == seed) & (df['train_size'] > size_threshold)].sort_values(by=f'{epe_select}_epe').reset_index(drop=True)
            if len(filtered_df) > 0:
                test_epes.append(filtered_df['test_epe'].loc[0])
                test_c_inds.append(filtered_df['test_c_ind'].loc[0])
                test_sizes.append(filtered_df['test_size'].loc[0])
                R = filtered_df['R'].loc[0]
                f1s.append(f1(R, R_star))
                precs.append(precision(R, R_star))
                recs.append(recall(R, R_star))
            else:
                continue
        
        row = {
            'Dataset': dataset,
            'Method': method,
            'EPE': f'{round(np.mean(test_epes), figs)} ({round(sem(test_epes), figs)})',
            'C-Index': f'{round(np.mean(test_c_inds), figs)} ({round(sem(test_c_inds), figs)})',
            'Precision': f'{round(np.mean(precs), figs)} ({round(sem(precs), figs)})',
            'Recall': f'{round(np.mean(recs), figs)} ({round(sem(recs), figs)})',
            'F1': f'{round(np.mean(f1s), figs)} ({round(sem(f1s), figs)})'
        }
        res_df = pd.concat([res_df, pd.DataFrame([row])])

  return 2. / ((1. / recall(R_hat, R)) + (1. / precision(R_hat, R)))


In [4]:
res_df

Unnamed: 0,Dataset,Method,EPE,C-Index,F1,Precision,Recall
0,nonlinear,ddgroup,0.38 (0.02),0.87 (0.01),0.97 (0.01),0.96 (0.01),0.98 (0.01)
0,nonlinear,base,0.69 (0.0),0.54 (0.0),0.29 (0.0),0.17 (0.0),1.0 (0.0)
0,nonlinear,prim,0.69 (0.0),0.54 (0.0),0.3 (0.01),0.17 (0.0),1.0 (0.0)
0,nonlinear,survival_tree,0.66 (0.01),0.59 (0.01),0.28 (0.03),0.26 (0.04),0.34 (0.04)
0,nonlinear,cox_tree,0.64 (0.04),0.6 (0.03),0.33 (0.08),0.26 (0.09),0.55 (0.09)
0,nonlinear,random,0.56 (0.03),0.74 (0.02),0.75 (0.04),0.78 (0.04),0.75 (0.05)
0,nonlinear,c_ind_ddgroup,0.39 (0.02),0.86 (0.01),0.89 (0.03),0.96 (0.01),0.85 (0.05)
0,nonlinear,pl_ddgroup,0.64 (0.02),0.61 (0.02),0.46 (0.03),0.43 (0.04),0.63 (0.09)
0,nonlinear,no_exp_ddgroup,0.29 (0.01),0.88 (0.0),0.9 (0.01),0.99 (0.01),0.83 (0.02)
