### First run the hyperparameter selection and benchmark

In [5]:
%%bash
if ! [ -x "$(command -v tmux)" ]; then
  bash ./run_experiment.sh
else
  tmux new-session -d -s experiment "bash ./run_experiment.sh"
fi

# running takes around 2 hours.

### Examine benchmark results

In [1]:
import pandas as pd
from IPython.display import display, HTML
from analysis import find_best_method
from benchmarks import prepare_benchmark

In [2]:
metrics = ['ROC', 'PRC', 'Silhouette_auc','Hroc','Hprc']
synth_metrics= ['ROC', 'PRC', 'PURITY','RAND','MI','Silhouette_auc','Hroc','Hprc']

result_dir = 'benchmark_results'

In [3]:
for dataname in ['Synth','ICU', 'ADNI']:
    results = pd.read_csv(f'{result_dir}/{dataname}_benchmark_complete.csv',index_col=0)
    if dataname=='Synth':
        m = synth_metrics
    else:
        m = metrics
    ttest = find_best_method(results, m)
    print(f'dataset: {dataname}')
    print('performance')
    display(HTML(results[['method']+m].to_html()))
    # p-value of two-sample test (of equal mean)
    print('p-value')
    display(HTML(ttest.to_html()))


dataset: Synth
performance


Unnamed: 0,method,ROC,PRC,PURITY,RAND,MI,Silhouette_auc,Hroc,Hprc
0,KM-E2P(y),0.966+-0.019,0.95+-0.021,0.653+-0.019,0.458+-0.051,0.539+-0.062,0.709+-0.095,0.814+-0.065,0.808+-0.064
1,KM-E2P(z),0.966+-0.015,0.945+-0.025,0.642+-0.016,0.377+-0.043,0.472+-0.038,0.780+-0.048,0.862+-0.031,0.854+-0.033
2,KM-DTW-D,0.722+-0.033,0.649+-0.028,0.469+-0.017,0.068+-0.021,0.077+-0.022,0.867+-0.002,0.787+-0.020,0.742+-0.019
3,T-Phenotype,0.983+-0.011,0.976+-0.011,0.868+-0.143,0.772+-0.232,0.801+-0.172,0.852+-0.057,0.912+-0.035,0.909+-0.036
4,KM-Laplace,0.642+-0.048,0.591+-0.036,0.634+-0.049,0.337+-0.091,0.39+-0.074,0.875+-0.006,0.739+-0.032,0.704+-0.025
5,T-Phenotype(joint),0.927+-0.042,0.893+-0.058,0.675+-0.060,0.407+-0.093,0.454+-0.097,0.78+-0.012,0.847+-0.021,0.832+-0.027
6,AC-TPC,0.964+-0.015,0.949+-0.02,0.66+-0.019,0.488+-0.033,0.592+-0.037,0.897+-0.012,0.929+-0.012,0.922+-0.015
7,SEQ2SEQ,0.507+-0.028,0.505+-0.014,0.378+-0.008,-0.003+-0.003,0.005+-0.003,0.832+-0.004,0.63+-0.022,0.628+-0.011


p-value


Unnamed: 0,method,ROC,PRC,PURITY,RAND,MI,Silhouette_auc,Hroc,Hprc
0,KM-E2P(y),0.009449,0.001344,0.000182,0.000408,0.000238,0.000026,0.000062,0.000063
1,KM-E2P(z),0.00392,0.001087,0.000127,0.000081,0.000037,0.000006,0.000021,0.000036
2,KM-DTW-D,0.0,0.0,0.000002,0.000001,0.0,0.000005,0.0,0.0
3,T-Phenotype,best,best,best,best,best,0.009559,0.078828,0.182176
4,KM-Laplace,0.0,0.0,0.00014,0.00006,0.000011,0.000094,0.0,0.0
5,T-Phenotype(joint),0.00048,0.00027,0.000606,0.000209,0.000057,0.0,0.0,0.000001
6,AC-TPC,0.002065,0.000838,0.000228,0.000718,0.000816,best,best,best
7,SEQ2SEQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


dataset: ICU
performance


Unnamed: 0,method,ROC,PRC,Silhouette_auc,Hroc,Hprc
0,KM-E2P(y),0.655+-0.025,0.573+-0.014,0.703+-0.061,0.676+-0.029,0.629+-0.026
1,KM-E2P(z),0.638+-0.071,0.562+-0.033,0.704+-0.085,0.662+-0.049,0.621+-0.036
2,KM-DTW-D,0.539+-0.030,0.515+-0.011,0.786+-0.072,0.636+-0.023,0.621+-0.021
3,T-Phenotype,0.701+-0.015,0.603+-0.012,0.705+-0.049,0.702+-0.027,0.649+-0.022
4,KM-Laplace,0.568+-0.013,0.528+-0.006,0.825+-0.002,0.673+-0.009,0.644+-0.005
5,T-Phenotype(joint),0.674+-0.027,0.581+-0.016,0.68+-0.068,0.676+-0.046,0.625+-0.037
6,AC-TPC,0.668+-0.011,0.579+-0.006,0.708+-0.064,0.686+-0.034,0.636+-0.029
7,SEQ2SEQ,0.594+-0.023,0.54+-0.011,0.827+-0.014,0.691+-0.010,0.653+-0.004


p-value


Unnamed: 0,method,ROC,PRC,Silhouette_auc,Hroc,Hprc
0,KM-E2P(y),0.000123,0.000099,0.000024,0.020643,0.003958
1,KM-E2P(z),0.0052,0.00091,0.000244,0.013942,0.004729
2,KM-DTW-D,0.0,0.0,0.039948,0.000038,0.000177
3,T-Phenotype,best,best,0.000006,best,0.455624
4,KM-Laplace,0.0,0.0,0.552625,0.002095,0.000271
5,T-Phenotype(joint),0.00501,0.00132,0.000015,0.065145,0.010901
6,AC-TPC,0.000054,0.00005,0.000045,0.144947,0.034438
7,SEQ2SEQ,0.0,0.0,best,0.132587,best


dataset: ADNI
performance


Unnamed: 0,method,ROC,PRC,Silhouette_auc,Hroc,Hprc
0,KM-E2P(y),0.898+-0.006,0.734+-0.019,0.682+-0.010,0.775+-0.006,0.707+-0.009
1,KM-E2P(z),0.796+-0.123,0.617+-0.125,0.627+-0.055,0.699+-0.078,0.618+-0.088
2,KM-DTW-D,0.743+-0.013,0.522+-0.02,0.762+-0.049,0.752+-0.027,0.618+-0.021
3,T-Phenotype,0.886+-0.006,0.704+-0.020,0.699+-0.007,0.781+-0.003,0.701+-0.008
4,KM-Laplace,0.656+-0.043,0.44+-0.025,0.811+-0.016,0.725+-0.031,0.57+-0.024
5,T-Phenotype(joint),0.856+-0.014,0.663+-0.034,0.711+-0.009,0.777+-0.005,0.686+-0.016
6,AC-TPC,0.859+-0.019,0.664+-0.021,0.71+-0.026,0.777+-0.022,0.686+-0.020
7,SEQ2SEQ,0.775+-0.024,0.550+-0.031,0.772+-0.014,0.774+-0.013,0.642+-0.022


p-value


Unnamed: 0,method,ROC,PRC,Silhouette_auc,Hroc,Hprc
0,KM-E2P(y),best,best,0.0,0.004418,best
1,KM-E2P(z),0.006684,0.003656,0.000001,0.001746,0.002258
2,KM-DTW-D,0.0,0.0,0.003138,0.001585,0.0
3,T-Phenotype,0.00026,0.001416,0.0,best,0.060505
4,KM-Laplace,0.0,0.0,best,0.000049,0.0
5,T-Phenotype(joint),0.000002,0.000044,0.0,0.016903,0.001035
6,AC-TPC,0.000026,0.000005,0.000001,0.452557,0.003011
7,SEQ2SEQ,0.0,0.0,0.000042,0.050512,0.000002


### Check hyperparameters

In [4]:
def print_hparams(model, config, loss_weights):
    if model.__name__ == 'KME2P':
        hparams = ['num_layers','hidden_size','latent_size']
        print(f'model: KME2P{config["latent_space"]}')
        for k in hparams:
            print(f'{k}={config[k]}')
        print()
    elif model.__name__ == 'Predictor':
        print(f'model: T-Phenotype')
        print('encoder parameters:')
        for k in ['pole_separation','max_degree']:
            print(f'{k}={config["encoder_config"][k]}')
        for k in ['pole','real']:
            print(f'loss coeff {k}={loss_weights[k]}')

        print('predictor parameters:')
        for k in ['num_layer','hidden_size']:
            print(f'{k}={config[k]}')
        print()
        
for dataname in ['Synth','ICU', 'ADNI']:
    splits, setup_list = prepare_benchmark(dataname)
    print('hyperparameters')
    
    for model, config, loss_weights in setup_list:
        print_hparams(model, config, loss_weights)
    print()

Synth
total samples: 1200
max length: 20
x_dim: 2
y_dim: 2
features: ['x1', 'x2']
temporal dims: [0, 1]
dataset Synth, cluster num K= 3
hyperparameters
model: KME2Py
num_layers=2
hidden_size=20
latent_size=10

model: KME2Pz
num_layers=2
hidden_size=30
latent_size=10

model: T-Phenotype
encoder parameters:
pole_separation=1.5
max_degree=1
loss coeff pole=1.0
loss coeff real=1.0
predictor parameters:
num_layer=2
hidden_size=10


ICU
total samples: 1554
max length: 24
x_dim: 4
y_dim: 2
features: ['Age', 'Gender', 'GCS', 'PaCO2']
temporal dims: [2, 3]
dataset ICU, cluster num K= 3
hyperparameters
model: KME2Py
num_layers=4
hidden_size=5
latent_size=10

model: KME2Pz
num_layers=3
hidden_size=30
latent_size=5

model: T-Phenotype
encoder parameters:
pole_separation=1.5
max_degree=2
loss coeff pole=1.0
loss coeff real=0.1
predictor parameters:
num_layer=2
hidden_size=20


ADNI
total samples: 1346
max length: 12
x_dim: 3
y_dim: 3
features: ['APOE4', 'CDRSB', 'Hippocampus']
temporal dims: [1, 2]