# Reproduce Table 3 (Real Data)

Due to the number of jobs required to reproduce the experiments on the real data, we split into 4 SLURM submissions.
These are specified by table31.yaml, table32.yaml, table33.yaml, and table34.yaml.
They have also been collected into a single config (table_3.yaml) if you wish to reproduce all of the experiments at once.

To reproduce Table 3 from the raw results of the runs, follow these steps:

1. In the analysis directory, run: `python process_results.py table31` (and then the same for `table32`, `table33`, `table34`)
2. Again in the analysis directory: `python gen_combined_tables.py table31 table32 table33 table34`
3. Finally, run this notebook and the tables will be displayed.
4. If you instead recreate the raw results using the `table_3.yaml` config, replace the `config_names` list below with `config_names = ['table_3']`

In [1]:
import sys
import os
sys.path.append(os.path.abspath("../src"))

import pandas as pd
import numpy as np
from glob import glob
from IPython.display import display, Markdown

In [2]:
# Configuration
config_names = ['table31', 'table32', 'table33', 'table34']
size_threshold = 0.10

combined_suffix = '_'.join(config_names)
results_dir = f"../results/combined_{combined_suffix}/tables/size_{size_threshold:.2f}"

print(f"Loading tables from: {results_dir}")

Loading tables from: ../results/combined_table31_table32_table33_table34/tables/size_0.10


In [3]:
# Find all table files
table_files = sorted(glob(f"{results_dir}/*.pkl"))

print(f"Found {len(table_files)} summary tables in {results_dir}\n")

if len(table_files) == 0:
    print("No tables found! Run gen_summary_tables.py first.")

Found 7 summary tables in ../results/combined_table31_table32_table33_table34/tables/size_0.10



In [4]:
col_map = {
    'base': 'Base',
    'ddgroup': 'DG',
    'c_ind_ddgroup': 'DG-CI',
    'pl_ddgroup': 'DG-PL',
    'no_exp_ddgroup': 'DG-NE',
    'cox_tree': 'CT',
    'prim': 'PRIM',
    'random': 'Rand',
    'survival_tree': 'ST',
}

metric_map = {
    'Test EPE': 'EPE',
    'Test Rej@5%': 'Rej@5\%',
    'Test Rej@10%': 'Rej@10\%',
    'Test C-Index': 'C-Index',
    'Test Size': 'Size'
}

col_order = ['Base', 'Rand', 'PRIM', 'ST', 'CT', 'DG-PL', 'DG-CI', 'DG-NE', 'DG']

In [5]:
for table_file in table_files:
    dataset_label = os.path.basename(table_file).replace('.pkl', '')
    
    df = pd.read_pickle(table_file)
    df = df.rename(columns=col_map)
    df = df.T[['Test EPE', 'Test Rej@10%', 'Test C-Index', 'Test Size']].T
    df = df[[m for m in col_order if m in df.columns]]

    display(Markdown(f"## {dataset_label}"))
    display(df)
    print("\\n" + "="*80 + "\\n")

## aids-['cd4']-['age']

Unnamed: 0_level_0,Base,Rand,PRIM,ST,CT,DG-PL,DG-CI,DG-NE,DG
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Test EPE,0.544 (0.018),0.593 (0.128),0.531 (0.016),0.581 (0.099),0.496 (0.083),0.541 (0.085),0.593 (0.064),0.456 (0.086),0.427 (0.039)
Test Rej@10%,0.071 (0.013),0.107 (0.050),0.061 (0.014),0.079 (0.026),0.125 (0.046),0.082 (0.030),0.100 (0.022),0.050 (0.021),0.075 (0.053)
Test C-Index,0.714 (0.014),0.725 (0.055),0.726 (0.014),0.706 (0.045),0.740 (0.042),0.725 (0.051),0.683 (0.033),0.746 (0.045),0.757 (0.028)
Test Size,1.000 (0.000),0.169 (0.028),0.869 (0.038),0.316 (0.106),0.166 (0.022),0.733 (0.136),0.554 (0.149),0.144 (0.007),0.198 (0.028)




## aids-['karnof']-['age']

Unnamed: 0_level_0,Base,Rand,PRIM,ST,CT,DG-PL,DG-CI,DG-NE,DG
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Test EPE,0.617 (0.029),0.579 (0.107),0.618 (0.029),0.645 (0.075),0.720 (0.220),0.618 (0.039),0.720 (0.218),0.767 (0.221),0.379 (0.069)
Test Rej@10%,0.151 (0.006),0.051 (0.014),0.154 (0.006),0.106 (0.025),0.193 (0.023),0.122 (0.020),0.111 (0.019),0.066 (0.024),0.111 (0.031)
Test C-Index,0.661 (0.028),0.681 (0.074),0.661 (0.029),0.636 (0.051),0.685 (0.073),0.660 (0.034),0.689 (0.070),0.665 (0.075),0.839 (0.049)
Test Size,1.000 (0.000),0.132 (0.009),0.958 (0.022),0.214 (0.086),0.444 (0.105),0.817 (0.122),0.646 (0.145),0.129 (0.008),0.154 (0.011)




## aids-['priorzdv']-['age']

Unnamed: 0_level_0,Base,Rand,PRIM,ST,CT,DG-PL,DG-CI,DG-NE,DG
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Test EPE,0.699 (0.004),0.666 (0.043),0.698 (0.002),0.687 (0.048),0.678 (0.040),0.661 (0.030),0.699 (0.004),0.668 (0.053),0.646 (0.044)
Test Rej@10%,0.060 (0.004),0.050 (0.004),0.060 (0.004),0.057 (0.003),0.057 (0.003),0.053 (0.006),0.060 (0.004),0.052 (0.003),0.051 (0.006)
Test C-Index,0.464 (0.018),0.553 (0.042),0.457 (0.017),0.558 (0.038),0.568 (0.043),0.524 (0.059),0.464 (0.018),0.568 (0.042),0.582 (0.056)
Test Size,1.000 (0.000),0.157 (0.021),0.935 (0.030),0.105 (0.010),0.165 (0.012),0.642 (0.146),1.000 (0.000),0.122 (0.010),0.160 (0.014)




## gbsg2-['tsize']-['age']

Unnamed: 0_level_0,Base,Rand,PRIM,ST,CT,DG-PL,DG-CI,DG-NE,DG
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Test EPE,0.681 (0.002),0.623 (0.035),0.684 (0.003),0.682 (0.017),0.626 (0.040),0.677 (0.004),0.649 (0.027),0.612 (0.041),0.610 (0.036)
Test Rej@10%,0.083 (0.006),0.052 (0.022),0.080 (0.007),0.044 (0.011),0.054 (0.021),0.096 (0.011),0.072 (0.016),0.050 (0.022),0.054 (0.022)
Test C-Index,0.566 (0.008),0.625 (0.042),0.553 (0.011),0.575 (0.030),0.623 (0.039),0.577 (0.015),0.641 (0.046),0.636 (0.041),0.639 (0.039)
Test Size,1.000 (0.000),0.152 (0.024),0.939 (0.014),0.256 (0.068),0.113 (0.009),0.902 (0.098),0.386 (0.135),0.117 (0.009),0.124 (0.010)




## metabric-['MKI67']-['age at diagnosis']

Unnamed: 0_level_0,Base,Rand,PRIM,ST,CT,DG-PL,DG-CI,DG-NE,DG
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Test EPE,0.693 (0.000),0.704 (0.009),0.693 (0.001),0.696 (0.006),0.716 (0.011),0.693 (0.000),0.702 (0.009),0.725 (0.011),0.686 (0.006)
Test Rej@10%,0.083 (0.003),0.124 (0.011),0.082 (0.004),0.143 (0.011),0.133 (0.012),0.083 (0.003),0.091 (0.010),0.145 (0.015),0.114 (0.009)
Test C-Index,0.492 (0.008),0.500 (0.016),0.492 (0.010),0.520 (0.019),0.487 (0.022),0.492 (0.008),0.494 (0.016),0.486 (0.025),0.537 (0.015)
Test Size,1.000 (0.000),0.170 (0.016),0.843 (0.046),0.263 (0.041),0.254 (0.034),1.000 (0.000),0.644 (0.146),0.112 (0.007),0.380 (0.106)




## veterans_lung_cancer-['Karnofsky_score']-['Age_in_years']

Unnamed: 0_level_0,Base,Rand,PRIM,ST,CT,DG-PL,DG-CI,DG-NE,DG
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Test EPE,0.575 (0.017),0.417 (0.143),0.583 (0.018),0.449 (0.099),0.217 (0.046),0.315 (0.020),0.336 (0.057),0.186 (0.047),0.333 (0.064)
Test Rej@10%,0.043 (0.012),0.118 (0.036),0.032 (0.011),0.075 (0.028),0.207 (0.040),0.107 (0.053),0.103 (0.028),0.200 (0.041),0.089 (0.027)
Test C-Index,0.692 (0.018),0.842 (0.062),0.682 (0.017),0.770 (0.074),0.930 (0.021),0.923 (0.031),0.872 (0.030),0.931 (0.022),0.869 (0.041)
Test Size,1.000 (0.000),0.168 (0.020),0.879 (0.048),0.232 (0.053),0.196 (0.016),0.232 (0.031),0.222 (0.023),0.179 (0.018),0.282 (0.083)




## whas500-['diasbp']-['age']

Unnamed: 0_level_0,Base,Rand,PRIM,ST,CT,DG-PL,DG-CI,DG-NE,DG
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Test EPE,0.669 (0.007),0.833 (0.066),0.667 (0.006),0.650 (0.061),0.644 (0.068),0.671 (0.027),0.552 (0.083),0.743 (0.086),0.624 (0.072)
Test Rej@10%,0.135 (0.005),0.201 (0.025),0.128 (0.004),0.207 (0.028),0.158 (0.034),0.179 (0.028),0.166 (0.019),0.198 (0.027),0.186 (0.026)
Test C-Index,0.610 (0.007),0.505 (0.062),0.613 (0.007),0.678 (0.039),0.655 (0.063),0.635 (0.033),0.750 (0.065),0.632 (0.053),0.705 (0.058)
Test Size,1.000 (0.000),0.145 (0.010),0.982 (0.005),0.148 (0.021),0.159 (0.015),0.623 (0.149),0.197 (0.090),0.147 (0.020),0.125 (0.009)


