### First run the hyperparameter selection and benchmark

In [1]:
%%bash
if ! [ -x "$(command -v tmux)" ]; then
  bash./run_experiment.sh
else
  tmux new-session -d -s experiment "bash ./run_experiment.sh"
fi

# running takes around 2 hours.

### Examine benchmark results

In [1]:
import pandas as pd
from IPython.display import display, HTML
from analysis import find_best_method
from benchmarks import prepare_benchmark

In [2]:
metrics = ['ROC', 'PRC', 'Silhouette_auc','Hroc','Hprc']
synth_metrics= ['ROC', 'PRC', 'PURITY','RAND','MI','Silhouette_auc','Hroc','Hprc']

result_dir = 'benchmark_results'

In [4]:
for dataname in ['Synth','ICU', 'ADNI']:
    results = pd.read_csv(f'{result_dir}/{dataname}_benchmark_complete.csv',index_col=0)
    if dataname=='Synth':
        m = synth_metrics
    else:
        m = metrics
    ttest = find_best_method(results, m)
    print(f'dataset: {dataname}')
    print('performance')
    display(HTML(results[['method']+m].to_html()))
    # p-value of two-sample test (of equal mean)
    print('p-value')
    display(HTML(ttest.to_html()))


dataset: Synth
performance


Unnamed: 0,method,ROC,PRC,PURITY,RAND,MI,Silhouette_auc,Hroc,Hprc
0,KM-E2P(y),0.96+-0.026,0.944+-0.028,0.653+-0.024,0.457+-0.064,0.543+-0.099,0.649+-0.114,0.463+-0.372,0.459+-0.37
1,KM-E2P(z),0.966+-0.015,0.945+-0.025,0.642+-0.016,0.377+-0.043,0.472+-0.038,0.780+-0.048,0.862+-0.031,0.854+-0.033
2,KM-DTW-D,0.722+-0.033,0.649+-0.028,0.469+-0.017,0.068+-0.021,0.077+-0.022,0.867+-0.002,0.787+-0.020,0.742+-0.019
3,T-Phenotype,0.972+-0.035,0.958+-0.046,0.838+-0.180,0.726+-0.297,0.751+-0.253,0.819+-0.115,0.884+-0.078,0.878+-0.08
4,KM-Laplace,0.664+-0.109,0.613+-0.084,0.69+-0.063,0.417+-0.092,0.443+-0.075,0.855+-0.016,0.743+-0.071,0.711+-0.058
5,T-Phenotype(joint),0.937+-0.038,0.919+-0.042,0.689+-0.145,0.475+-0.237,0.524+-0.195,0.812+-0.048,0.87+-0.040,0.862+-0.043
6,AC-TPC,0.964+-0.015,0.949+-0.02,0.66+-0.019,0.488+-0.033,0.592+-0.037,0.897+-0.012,0.929+-0.012,0.922+-0.015
7,SEQ2SEQ,0.507+-0.028,0.505+-0.014,0.378+-0.008,-0.003+-0.003,0.005+-0.003,0.832+-0.004,0.63+-0.022,0.628+-0.011


p-value


Unnamed: 0,method,ROC,PRC,PURITY,RAND,MI,Silhouette_auc,Hroc,Hprc
0,KM-E2P(y),0.262186,0.287507,0.002097,0.004672,0.01,0.000013,0.000582,0.000588
1,KM-E2P(z),0.509371,0.308178,0.001439,0.000933,0.001392,0.000006,0.000021,0.000036
2,KM-DTW-D,0.0,0.0,0.000019,0.000011,0.000003,0.000005,0.0,0.0
3,T-Phenotype,best,best,best,best,best,0.018242,0.036995,0.045303
4,KM-Laplace,0.000002,0.0,0.009343,0.002427,0.000912,0.000016,0.000003,0.0
5,T-Phenotype(joint),0.017894,0.025291,0.022313,0.020042,0.014347,0.000067,0.000262,0.000417
6,AC-TPC,0.3841,0.45431,0.00258,0.008191,0.026032,best,best,best
7,SEQ2SEQ,0.0,0.0,0.000004,0.000005,0.000001,0.0,0.0,0.0


dataset: ICU
performance


Unnamed: 0,method,ROC,PRC,Silhouette_auc,Hroc,Hprc
0,KM-E2P(y),0.643+-0.027,0.561+-0.011,0.737+-0.086,0.683+-0.038,0.634+-0.034
1,KM-E2P(z),0.634+-0.017,0.555+-0.008,0.775+-0.020,0.697+-0.009,0.647+-0.006
2,KM-DTW-D,0.529+-0.020,0.51+-0.007,0.864+-0.052,0.655+-0.008,0.640+-0.010
3,T-Phenotype,0.614+-0.061,0.550+-0.028,0.780+-0.028,0.704+-0.011,0.653+-0.004
4,KM-Laplace,0.536+-0.039,0.514+-0.014,0.868+-0.046,0.661+-0.018,0.645+-0.005
5,T-Phenotype(joint),0.679+-0.011,0.578+-0.007,0.762+-0.019,0.718+-0.008,0.657+-0.006
6,AC-TPC,0.636+-0.035,0.562+-0.014,0.781+-0.026,0.701+-0.024,0.653+-0.014
7,SEQ2SEQ,0.505+-0.016,0.502+-0.005,0.888+-0.003,0.643+-0.013,0.641+-0.004


p-value


Unnamed: 0,method,ROC,PRC,Silhouette_auc,Hroc,Hprc
0,KM-E2P(y),0.000637,0.000447,0.000058,0.004235,0.019302
1,KM-E2P(z),0.00001,0.000013,0.0,0.000061,0.000858
2,KM-DTW-D,0.0,0.0,0.078128,0.0,0.000212
3,T-Phenotype,0.001765,0.002792,0.0,0.001973,0.041133
4,KM-Laplace,0.0,0.0,0.093754,0.000001,0.000147
5,T-Phenotype(joint),best,best,0.0,best,best
6,AC-TPC,0.000889,0.002056,0.0,0.018564,0.282989
7,SEQ2SEQ,0.0,0.0,best,0.0,0.00001


dataset: ADNI
performance


Unnamed: 0,method,ROC,PRC,Silhouette_auc,Hroc,Hprc
0,KM-E2P(y),0.898+-0.006,0.734+-0.019,0.682+-0.010,0.775+-0.006,0.707+-0.009
1,KM-E2P(z),0.796+-0.123,0.617+-0.125,0.627+-0.055,0.699+-0.078,0.618+-0.088
2,KM-DTW-D,0.743+-0.013,0.522+-0.02,0.762+-0.049,0.752+-0.027,0.618+-0.021
3,T-Phenotype,0.886+-0.006,0.704+-0.020,0.699+-0.007,0.781+-0.003,0.701+-0.008
4,KM-Laplace,0.656+-0.043,0.44+-0.025,0.811+-0.016,0.725+-0.031,0.57+-0.024
5,T-Phenotype(joint),0.856+-0.014,0.663+-0.034,0.711+-0.009,0.777+-0.005,0.686+-0.016
6,AC-TPC,0.859+-0.019,0.664+-0.021,0.71+-0.026,0.777+-0.022,0.686+-0.020
7,SEQ2SEQ,0.775+-0.024,0.550+-0.031,0.772+-0.014,0.774+-0.013,0.642+-0.022


p-value


Unnamed: 0,method,ROC,PRC,Silhouette_auc,Hroc,Hprc
0,KM-E2P(y),best,best,0.0,0.004418,best
1,KM-E2P(z),0.006684,0.003656,0.000001,0.001746,0.002258
2,KM-DTW-D,0.0,0.0,0.003138,0.001585,0.0
3,T-Phenotype,0.00026,0.001416,0.0,best,0.060505
4,KM-Laplace,0.0,0.0,best,0.000049,0.0
5,T-Phenotype(joint),0.000002,0.000044,0.0,0.016903,0.001035
6,AC-TPC,0.000026,0.000005,0.000001,0.452557,0.003011
7,SEQ2SEQ,0.0,0.0,0.000042,0.050512,0.000002


### Check hyperparameters

In [5]:
def print_hparams(model, config, loss_weights):
    if model.__name__ == 'KME2P':
        hparams = ['num_layers','hidden_size','latent_size']
        print(f'model: KME2P{config["latent_space"]}')
        for k in hparams:
            print(f'{k}={config[k]}')
        print()
    elif model.__name__ == 'Predictor':
        print(f'model: T-Phenotype')
        print('encoder parameters:')
        for k in ['pole_separation','max_degree']:
            print(f'{k}={config["encoder_config"][k]}')
        for k in ['pole','real']:
            print(f'loss coeff {k}={loss_weights[k]}')

        print('predictor parameters:')
        for k in ['num_layer','hidden_size']:
            print(f'{k}={config[k]}')
        print()
        
for dataname in ['Synth','ICU', 'ADNI']:
    splits, setup_list = prepare_benchmark(dataname)
    print('hyperparameters')
    
    for model, config, loss_weights in setup_list:
        print_hparams(model, config, loss_weights)
    print()

Synth
total samples: 1200
max length: 20
x_dim: 2
y_dim: 2
features: ['x1', 'x2']
temporal dims: [0, 1]
dataset Synth, cluster num K= 3
hyperparameters
model: KME2Py
num_layers=2
hidden_size=20
latent_size=10

model: KME2Pz
num_layers=2
hidden_size=30
latent_size=10

model: T-Phenotype
encoder parameters:
pole_separation=2.0
max_degree=1
loss coeff pole=1.0
loss coeff real=0.1
predictor parameters:
num_layer=3
hidden_size=10


ICU
total samples: 1554
max length: 24
x_dim: 4
y_dim: 2
features: ['Age', 'Gender', 'GCS', 'PaCO2']
temporal dims: [2, 3]
dataset ICU, cluster num K= 2
hyperparameters
model: KME2Py
num_layers=3
hidden_size=20
latent_size=10

model: KME2Pz
num_layers=3
hidden_size=20
latent_size=20

model: T-Phenotype
encoder parameters:
pole_separation=2.0
max_degree=2
loss coeff pole=1.0
loss coeff real=0.1
predictor parameters:
num_layer=4
hidden_size=10


ADNI
total samples: 1346
max length: 12
x_dim: 3
y_dim: 3
features: ['APOE4', 'CDRSB', 'Hippocampus']
temporal dims: [1, 