In [2]:
from datasets import get_dataset
from synthcity.plugins import Plugins
from synthcity.benchmark import Benchmarks
from pathlib import Path
from synthcity.utils.serialization import save_to_file, load_from_file

plugins = Plugins().list()

out_dir = Path("output")


def evaluate_dataset(name: str, dataset: str, plugins: list):
    df, duration_col, event_col, time_horizons = get_dataset(dataset)

    for plugin in plugins:
        bkp = out_dir / f"metricsv3_{dataset}_{name}_{plugin}.bkp"

        if bkp.exists():
            score = load_from_file(bkp)
        else:
            score = Benchmarks.evaluate(
                [plugin],
                df,
                task_type="survival_analysis",
                target_column=event_col,
                time_to_event_column=duration_col,
                time_horizons=time_horizons,
                synthetic_size=len(df),
                repeats=repeats,
            )
            save_to_file(bkp, score)

    return score

['nflow',
 'bayesian_network',
 'gaussian_copula',
 'rtvae',
 'tvae',
 'copulagan',
 'privbayes',
 'pategan',
 'ctgan',
 'adsgan']

In [3]:
base_plugins = [
    "privbayes",
    "adsgan",
    "ctgan",
    "tvae",
    "nflow",
]
survival_plugins = [
    "survival_gan",
]

repeats = 5

## AIDS dataset

In [4]:
base_score = evaluate_dataset("baseline", "aids", base_plugins)
Benchmarks.print(base_score)

[2022-05-16T10:27:01.263135+0300][195736][INFO] Benchmarking plugin : privbayes
[2022-05-16T10:27:01.266981+0300][195736][INFO]  Experiment repeat: 0 task type: survival_analysis Train df hash = 7600251698133035800
[2022-05-16T10:27:03.542343+0300][195736][INFO]  Performance eval for df hash = 7083658545831454673 ood hash = 3587437308865594471
[2022-05-16T10:27:03.987107+0300][195736][INFO]  Performance eval for df hash = 4379228602368725767 ood hash = 3587437308865594471
[2022-05-16T10:27:07.609211+0300][195736][INFO] Baseline performance score: {'c_index': (0.6458886460480732, 0.04032812018673624), 'brier_score': (0.07646497879530362, 0.001620832931544638)}
[2022-05-16T10:27:08.397413+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.41332810151127203, 0.013066743906420314), 'brier_score': (0.07661519293141063, 0.004444805642172608)}
[2022-05-16T10:27:09.066558+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.357569957851619, 0.037551635702591166),

[2022-05-16T10:33:43.732537+0300][195736][INFO] Baseline performance score: {'c_index': (0.7318275737022759, 0.044096833875296504), 'brier_score': (0.06483102408757001, 0.0015778626624192917)}




[2022-05-16T10:34:01.808143+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.3457708538798822, 0.01800521704380529), 'brier_score': (0.06633888815956009, 0.0019163250311000758)}


[2022-05-16T10:34:20.880143+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.3994268671712697, 0.08969025402981907), 'brier_score': (0.06183863045861946, 0.003664190706705681)}
[2022-05-16T10:34:46.996636+0300][195736][INFO]  Experiment repeat: 4 task type: survival_analysis Train df hash = 2799655183830991161
[2022-05-16T10:34:48.223925+0300][195736][INFO]  Performance eval for df hash = 2347796136148981590 ood hash = 7857508845427557426
[2022-05-16T10:34:48.714123+0300][195736][INFO] Baseline performance score: {'c_index': (0.7376771331108017, 0.029951337666016858), 'brier_score': (0.06345155209883215, 0.0011736319065047576)}
[2022-05-16T10:34:49.078743+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.4412623461809409, 0.050578357570653806), 'brier_score': (0.06617662559779035, 0.003715290724654111)}
[2022-05-16T10:34:49.324600+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5027555597069516, 0.20431782014385455), 'brier_score'

[2022-05-16T10:35:42.268530+0300][195736][INFO] Baseline performance score: {'c_index': (0.7560526804446517, 0.04528952592685041), 'brier_score': (0.06438600914793821, 0.004628518719019847)}


[2022-05-16T10:36:00.459822+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.39583232991290257, 0.05874422403238028), 'brier_score': (0.06561564025713097, 0.004932647109212832)}
[2022-05-16T10:36:19.366170+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.4925377952658214, 0.2055936967427312), 'brier_score': (0.06611541262341529, 0.008301978144135224)}
[2022-05-16T10:36:41.481917+0300][195736][INFO] Benchmarking plugin : adsgan
[2022-05-16T10:36:41.485541+0300][195736][INFO]  Experiment repeat: 0 task type: survival_analysis Train df hash = 7600251698133035800
[2022-05-16T10:36:42.715445+0300][195736][INFO]  Performance eval for df hash = 7083658545831454673 ood hash = 3587437308865594471
[2022-05-16T10:36:43.103338+0300][195736][INFO]  Performance eval for df hash = 4379228602368725767 ood hash = 3587437308865594471
[2022-05-16T10:36:44.702223+0300][195736][INFO] Baseline performance score: {'c_index': (0.6458886460480732, 0.04032812018673624), 'bri

[2022-05-16T10:41:17.507468+0300][195736][INFO] Baseline performance score: {'c_index': (0.730585409461042, 0.06673215614633331), 'brier_score': (0.06667289514426925, 0.0009311629726893831)}
[2022-05-16T10:41:35.218179+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5, 0.0), 'brier_score': (0.07296944209484786, 0.0018848089025678262)}
[2022-05-16T10:41:53.096869+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5, 0.0), 'brier_score': (0.05815305782988725, 0.006527588957177379)}
[2022-05-16T10:42:13.227481+0300][195736][INFO]  Experiment repeat: 3 task type: survival_analysis Train df hash = 1673427977495894659
[2022-05-16T10:43:11.208796+0300][195736][INFO]  Performance eval for df hash = 7518670409369813715 ood hash = 3740892683958751319
[2022-05-16T10:43:11.723849+0300][195736][INFO] Baseline performance score: {'c_index': (0.7021358372394065, 0.046706701235274546), 'brier_score': (0.06418415713790064, 0.0024136940558960956)}
[2022-05-16T10:43:11



[2022-05-16T10:44:03.018208+0300][195736][INFO] Baseline performance score: {'c_index': (0.736891629546534, 0.03600517870003014), 'brier_score': (0.06487540788652796, 0.0023140667530130135)}
[2022-05-16T10:44:20.565412+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5, 0.0), 'brier_score': (0.07089228312771435, 0.0026529746646130467)}
[2022-05-16T10:44:38.750266+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5, 0.0), 'brier_score': (0.06632353007182906, 0.0033523737857237505)}
[2022-05-16T10:44:55.081518+0300][195736][INFO]  Experiment repeat: 4 task type: survival_analysis Train df hash = 2799655183830991161
[2022-05-16T10:45:48.909931+0300][195736][INFO]  Performance eval for df hash = 2424075895702445820 ood hash = 7857508845427557426
[2022-05-16T10:45:49.287248+0300][195736][INFO]  Performance eval for df hash = 6511089220524370800 ood hash = 7857508845427557426
[2022-05-16T10:45:50.807114+0300][195736][INFO] Baseline performance score: {'c_i



[2022-05-16T10:46:42.164668+0300][195736][INFO] Baseline performance score: {'c_index': (0.7263371534297344, 0.08450530305797059), 'brier_score': (0.06430908161037159, 0.002859511600708086)}
[2022-05-16T10:47:00.662570+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6853601063313352, 0.11441825503370691), 'brier_score': (0.06709521418021965, 0.0035718106601504897)}
[2022-05-16T10:47:21.271202+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5454710893407958, 0.21205688833752712), 'brier_score': (0.07160014953581297, 0.010810070405411572)}
[2022-05-16T10:47:43.574617+0300][195736][INFO] Benchmarking plugin : ctgan
[2022-05-16T10:47:43.578512+0300][195736][INFO]  Experiment repeat: 0 task type: survival_analysis Train df hash = 7600251698133035800
[2022-05-16T10:47:44.569519+0300][195736][INFO]  Performance eval for df hash = 7083658545831454673 ood hash = 3587437308865594471
[2022-05-16T10:47:44.965647+0300][195736][INFO]  Performance eval for df ha

[2022-05-16T10:48:37.444988+0300][195736][INFO] Baseline performance score: {'c_index': (0.6871800494015959, 0.04036327969937281), 'brier_score': (0.06635246673737544, 0.0021339580455197386)}
[2022-05-16T10:48:56.635356+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6262002417235827, 0.0385982222518938), 'brier_score': (0.06767799444836825, 0.0016679911691818356)}
[2022-05-16T10:49:16.980030+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6454260773203699, 0.15126054323173244), 'brier_score': (0.05665896442478712, 0.011699844482441057)}
[2022-05-16T10:49:34.177539+0300][195736][INFO]  Experiment repeat: 1 task type: survival_analysis Train df hash = 4871573768128818830
[2022-05-16T10:49:35.111471+0300][195736][INFO]  Performance eval for df hash = 755344637464063550 ood hash = 6627142002620837558
[2022-05-16T10:49:35.640381+0300][195736][INFO] Baseline performance score: {'c_index': (0.7512485004374893, 0.04714978102141932), 'brier_score': (0.065

[2022-05-16T10:52:48.730002+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6325301092205652, 0.05352043910567877), 'brier_score': (0.071156955941426, 0.0009196651580626306)}


[2022-05-16T10:53:10.422632+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.624656814821048, 0.09456055037482942), 'brier_score': (0.058453993551039085, 0.0052693175091404155)}


[2022-05-16T10:53:29.110431+0300][195736][INFO]  Experiment repeat: 3 task type: survival_analysis Train df hash = 1673427977495894659
[2022-05-16T10:53:38.178154+0300][195736][INFO]  Performance eval for df hash = 2197055078523391743 ood hash = 3740892683958751319
[2022-05-16T10:53:38.679265+0300][195736][INFO] Baseline performance score: {'c_index': (0.7373101395638498, 0.04274365729864837), 'brier_score': (0.06356747228494575, 0.005813710658538893)}
[2022-05-16T10:53:39.091151+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.36552098320829574, 0.06386729529092916), 'brier_score': (0.07152627504356675, 0.004305078523363782)}
[2022-05-16T10:53:39.381498+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.3746625727943738, 0.10518041543162841), 'brier_score': (0.06648912874527475, 0.005617337597701962)}
[2022-05-16T10:53:39.387738+0300][195736][INFO]  Performance eval for df hash = 2077359944265216171 ood hash = 3740892683958751319
[2022-05-16T10:53:40



[2022-05-16T10:54:40.843412+0300][195736][INFO] Baseline performance score: {'c_index': (0.7080225591840699, 0.0681071051871905), 'brier_score': (0.06495059500835328, 0.0015665953085169182)}
[2022-05-16T10:55:02.265264+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5469035522443011, 0.0490112570790276), 'brier_score': (0.06818710460479271, 0.0015141589512680034)}
[2022-05-16T10:55:22.747849+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5010956509287108, 0.13294680148310933), 'brier_score': (0.06390355464554691, 0.005004259120178149)}
[2022-05-16T10:55:29.291299+0300][195736][INFO]  Experiment repeat: 4 task type: survival_analysis Train df hash = 2799655183830991161
[2022-05-16T10:55:38.040969+0300][195736][INFO]  Performance eval for df hash = 5063610932450445834 ood hash = 7857508845427557426
[2022-05-16T10:55:38.522085+0300][195736][INFO] Baseline performance score: {'c_index': (0.7512313133000795, 0.026162971253533854), 'brier_score': (0.06

[2022-05-16T10:56:32.688098+0300][195736][INFO] Baseline performance score: {'c_index': (0.7431002220845527, 0.06696043247444988), 'brier_score': (0.06443591317444823, 0.0013169947324962947)}
[2022-05-16T10:56:52.266530+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5434567259581838, 0.03229428404300073), 'brier_score': (0.07437775739280568, 0.0016864718465033077)}
[2022-05-16T10:57:12.356615+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.47120051631080734, 0.06626254204470458), 'brier_score': (0.07640490411791628, 0.00528161563270775)}
[2022-05-16T10:57:34.289039+0300][195736][INFO] Benchmarking plugin : tvae
[2022-05-16T10:57:34.292627+0300][195736][INFO]  Experiment repeat: 0 task type: survival_analysis Train df hash = 7600251698133035800
[2022-05-16T10:57:35.250466+0300][195736][INFO]  Performance eval for df hash = 7083658545831454673 ood hash = 3587437308865594471
[2022-05-16T10:57:35.650194+0300][195736][INFO]  Performance eval for df ha

[2022-05-16T11:00:15.211993+0300][195736][INFO] Baseline performance score: {'c_index': (0.703406398375867, 0.012855759227552457), 'brier_score': (0.0684277368688339, 0.0019967776227318187)}
[2022-05-16T11:00:32.750850+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6712591900330273, 0.020640855978257114), 'brier_score': (0.07502602191081158, 0.0020250613524371514)}
[2022-05-16T11:00:51.180378+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6670652465409009, 0.18057552660703935), 'brier_score': (0.049412775349072215, 0.007019753265858241)}
[2022-05-16T11:01:07.731473+0300][195736][INFO]  Experiment repeat: 2 task type: survival_analysis Train df hash = 5947425095989233042
[2022-05-16T11:01:08.640222+0300][195736][INFO]  Performance eval for df hash = 3637502659144005740 ood hash = 3695054488948482536
[2022-05-16T11:01:09.143021+0300][195736][INFO] Baseline performance score: {'c_index': (0.720235317699445, 0.028205460918150068), 'brier_score': (0.

[2022-05-16T11:02:00.315595+0300][195736][INFO] Baseline performance score: {'c_index': (0.730585409461042, 0.06673215614633331), 'brier_score': (0.06667289514426925, 0.0009311629726893831)}


[2022-05-16T11:02:19.293431+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5, 0.0), 'brier_score': (0.072796513395873, 0.0018802219119467632)}
[2022-05-16T11:02:37.706019+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5, 0.0), 'brier_score': (0.05801553287521307, 0.00651197450457831)}
[2022-05-16T11:02:51.103263+0300][195736][INFO]  Experiment repeat: 3 task type: survival_analysis Train df hash = 1673427977495894659
[2022-05-16T11:02:59.016896+0300][195736][INFO]  Performance eval for df hash = 7777322286780892127 ood hash = 3740892683958751319
[2022-05-16T11:02:59.508902+0300][195736][INFO] Baseline performance score: {'c_index': (0.7435798176356055, 0.016244337285300534), 'brier_score': (0.06271864675841868, 0.0037249295731663075)}
[2022-05-16T11:02:59.799719+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5373503076346866, 0.0652826214086392), 'brier_score': (0.2685714155142736, 0.028323175112918708)}
[2022-05-16T11:02:59.

[2022-05-16T11:04:11.120443+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6974686470270811, 0.04414443337020958), 'brier_score': (0.07042459969680437, 0.0032318726896794353)}
[2022-05-16T11:04:29.189832+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6085586487412612, 0.19453733319785882), 'brier_score': (0.06590050272129942, 0.003330131079241327)}
[2022-05-16T11:04:45.437724+0300][195736][INFO]  Experiment repeat: 4 task type: survival_analysis Train df hash = 2799655183830991161
[2022-05-16T11:04:53.629893+0300][195736][INFO]  Performance eval for df hash = 1549901147521390446 ood hash = 7857508845427557426
[2022-05-16T11:04:54.126594+0300][195736][INFO] Baseline performance score: {'c_index': (0.7559615232243536, 0.003713862029358933), 'brier_score': (0.062040993731329465, 0.004057229989090057)}
[2022-05-16T11:04:54.433137+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6020504727170471, 0.022012227248191886), 'brier_score'



[2022-05-16T11:05:47.695777+0300][195736][INFO] Baseline performance score: {'c_index': (0.7534990464542893, 0.018518635115369022), 'brier_score': (0.06431652442863628, 0.0027463080278990666)}
[2022-05-16T11:06:06.913307+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6689382121993899, 0.06592691346778512), 'brier_score': (0.06978332319322629, 0.002727201495982226)}
[2022-05-16T11:06:27.204081+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.4872040583716091, 0.2048596611377245), 'brier_score': (0.06969474516294026, 0.009423764605135967)}
[2022-05-16T11:06:41.791686+0300][195736][INFO] Benchmarking plugin : nflow
[2022-05-16T11:06:41.795065+0300][195736][INFO]  Experiment repeat: 0 task type: survival_analysis Train df hash = 7600251698133035800
[2022-05-16T11:06:42.993450+0300][195736][INFO]  Performance eval for df hash = 7083658545831454673 ood hash = 3587437308865594471
[2022-05-16T11:06:43.402935+0300][195736][INFO]  Performance eval for df ha

[2022-05-16T11:09:45.913006+0300][195736][INFO] Baseline performance score: {'c_index': (0.703406398375867, 0.012855759227552457), 'brier_score': (0.0684277368688339, 0.0019967776227318187)}
[2022-05-16T11:10:05.936421+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5659723968225396, 0.05484945916446544), 'brier_score': (0.14635819525895014, 0.010648249601723163)}
[2022-05-16T11:10:27.170908+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6848680327800905, 0.08119874965338424), 'brier_score': (0.13953300199155608, 0.009330344903485355)}
[2022-05-16T11:10:47.335501+0300][195736][INFO]  Experiment repeat: 2 task type: survival_analysis Train df hash = 5947425095989233042
[2022-05-16T11:10:48.472755+0300][195736][INFO]  Performance eval for df hash = 3637502659144005740 ood hash = 3695054488948482536
[2022-05-16T11:10:48.978958+0300][195736][INFO] Baseline performance score: {'c_index': (0.720235317699445, 0.028205460918150068), 'brier_score': (0.065



[2022-05-16T11:11:42.515273+0300][195736][INFO] Baseline performance score: {'c_index': (0.730585409461042, 0.06673215614633331), 'brier_score': (0.06667289514426925, 0.0009311629726893831)}
[2022-05-16T11:12:02.828950+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.47798443958414133, 0.0436710654848235), 'brier_score': (0.09543238685194476, 0.003692159138976176)}
[2022-05-16T11:12:25.730107+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.34707709707622986, 0.09782554537327097), 'brier_score': (0.08708121781509788, 0.00863088181113877)}
[2022-05-16T11:12:49.088143+0300][195736][INFO]  Experiment repeat: 3 task type: survival_analysis Train df hash = 1673427977495894659
[2022-05-16T11:13:03.324550+0300][195736][INFO]  Performance eval for df hash = 6500103258618489619 ood hash = 3740892683958751319
[2022-05-16T11:13:03.832020+0300][195736][INFO] Baseline performance score: {'c_index': (0.7222684895124875, 0.040556525034018236), 'brier_score': (0.06



[2022-05-16T11:16:13.507274+0300][195736][INFO] Baseline performance score: {'c_index': (0.7460060735212148, 0.03467936158790648), 'brier_score': (0.06418273557696555, 0.001350528731015126)}
[2022-05-16T11:16:34.295962+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.625406860049632, 0.0478814601839025), 'brier_score': (0.10558431508462873, 0.003022806542021494)}
[2022-05-16T11:16:56.484270+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.49476952262324453, 0.17549395187693492), 'brier_score': (0.10823815063236579, 0.011841670109957101)}



[4m[1mPlugin : nflow[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch.score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion.score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.nearest_syn_neighbor_distance.mean,0.117241,0.282102,0.201176,0.063785,0.219514,0.112202,5,0,0.01
sanity.close_values_probability.score,0.33913,0.908696,0.635652,0.23751,0.657609,0.488043,5,0,0.01
sanity.distant_values_probability.score,0.001087,0.007609,0.002826,0.00244,0.002174,0.001087,5,0,0.01
stats.jensenshannon_dist.marginal,0.236766,0.281629,0.258314,0.015141,0.25802,0.01656,5,0,0.08
stats.chi_squared_test.marginal,0.766441,0.864868,0.819239,0.042809,0.838563,0.087707,5,0,0.01
stats.feature_corr.joint,1.628148,2.090798,1.798422,0.167221,1.792021,0.192891,5,0,0.07
stats.inv_kl_divergence.marginal,0.803811,0.834816,0.821341,0.012101,0.82429,0.021633,5,0,0.01
stats.ks_test.marginal,0.698495,0.758194,0.730853,0.02359,0.734365,0.044147,5,0,0.01





In [5]:
survival_score = evaluate_dataset("survival", "aids", survival_plugins)
Benchmarks.print(survival_score)

[2022-05-16T11:17:25.232903+0300][195736][INFO] Benchmarking plugin : survival_gan
[2022-05-16T11:17:25.236541+0300][195736][INFO]  Experiment repeat: 0 task type: survival_analysis Train df hash = 7600251698133035800
[2022-05-16T11:17:26.458284+0300][195736][INFO]  Performance eval for df hash = 7083658545831454673 ood hash = 3587437308865594471
[2022-05-16T11:17:26.848615+0300][195736][INFO]  Performance eval for df hash = 4379228602368725767 ood hash = 3587437308865594471
[2022-05-16T11:17:28.443609+0300][195736][INFO] Baseline performance score: {'c_index': (0.6458886460480732, 0.04032812018673624), 'brier_score': (0.07646497879530362, 0.001620832931544638)}
[2022-05-16T11:17:29.202118+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.7207774693942667, 0.06137837895130282), 'brier_score': (0.0716501155527424, 0.002613780817348104)}
[2022-05-16T11:17:29.840666+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.7241307660274966, 0.04146970691953402),

[2022-05-16T11:18:19.236861+0300][195736][INFO] Baseline performance score: {'c_index': (0.6871800494015959, 0.04036327969937281), 'brier_score': (0.06635246673737544, 0.0021339580455197386)}
[2022-05-16T11:18:37.936360+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6790012249364853, 0.017939551839588546), 'brier_score': (0.06651719895271602, 0.002145370676521002)}
[2022-05-16T11:18:57.930055+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6645403680031702, 0.05061661348483922), 'brier_score': (0.057382880234505745, 0.010503017472326799)}
[2022-05-16T11:19:27.990867+0300][195736][INFO]  Experiment repeat: 1 task type: survival_analysis Train df hash = 4871573768128818830
[2022-05-16T11:19:29.182414+0300][195736][INFO]  Performance eval for df hash = 755344637464063550 ood hash = 6627142002620837558
[2022-05-16T11:19:29.679630+0300][195736][INFO] Baseline performance score: {'c_index': (0.7512485004374893, 0.04714978102141932), 'brier_score': (0.0

[2022-05-16T11:20:29.887182+0300][195736][INFO] Baseline performance score: {'c_index': (0.703406398375867, 0.012855759227552457), 'brier_score': (0.0684277368688339, 0.0019967776227318187)}


[2022-05-16T11:20:49.764609+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6940840509559396, 0.057032152221836246), 'brier_score': (0.06808279107557334, 0.001753774450788669)}
[2022-05-16T11:21:09.040058+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6491392394528334, 0.0447992320145176), 'brier_score': (0.04685325098670986, 0.006321484010472587)}
[2022-05-16T11:21:37.629739+0300][195736][INFO]  Experiment repeat: 2 task type: survival_analysis Train df hash = 5947425095989233042
[2022-05-16T11:21:38.763468+0300][195736][INFO]  Performance eval for df hash = 3637502659144005740 ood hash = 3695054488948482536
[2022-05-16T11:21:39.268852+0300][195736][INFO] Baseline performance score: {'c_index': (0.720235317699445, 0.028205460918150068), 'brier_score': (0.06563488612405532, 0.002650254167850425)}
[2022-05-16T11:21:39.327721+0300][195736][ERROR] Failed to evaluate synthetic ID performance. cox_ph: Convergence halted due to matrix inversion problem

[2022-05-16T11:22:31.643561+0300][195736][INFO] Baseline performance score: {'c_index': (0.730585409461042, 0.06673215614633331), 'brier_score': (0.06667289514426925, 0.0009311629726893831)}


[2022-05-16T11:22:50.322226+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.7245357990492517, 0.0753553941003446), 'brier_score': (0.06554722155481336, 0.000911969939064723)}
[2022-05-16T11:23:09.347687+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6829173405191745, 0.09335374730481234), 'brier_score': (0.05354947232714705, 0.004971843250188444)}
[2022-05-16T11:23:32.359985+0300][195736][INFO]  Experiment repeat: 3 task type: survival_analysis Train df hash = 1673427977495894659
[2022-05-16T11:23:32.401259+0300][195736][INFO] Using imbalanced time and censoring sampling
[2022-05-16T11:23:33.337795+0300][195736][INFO] Train the uncensoring model
[2022-05-16T11:23:37.277023+0300][195736][INFO] max T = 364.0, max syn T = 363.5200500488281
[2022-05-16T11:23:37.277559+0300][195736][INFO] Train the synthetic generator
[2022-05-16T11:24:36.258406+0300][195736][INFO]  Performance eval for df hash = 8744683341067475571 ood hash = 3740892683958751319
[202

[2022-05-16T11:25:28.860365+0300][195736][INFO] Baseline performance score: {'c_index': (0.7351976604931941, 0.009380622705413986), 'brier_score': (0.06503710290639104, 0.0025744090527886907)}


[2022-05-16T11:25:47.805618+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.7577465596585959, 0.014555046290376404), 'brier_score': (0.06369895272894327, 0.0024985127474883625)}
[2022-05-16T11:26:06.775032+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.7184085664415697, 0.11739913728211712), 'brier_score': (0.06017176070992914, 0.002808208210953939)}
[2022-05-16T11:26:34.997958+0300][195736][INFO]  Experiment repeat: 4 task type: survival_analysis Train df hash = 2799655183830991161
[2022-05-16T11:26:35.035117+0300][195736][INFO] Using imbalanced time and censoring sampling
[2022-05-16T11:26:35.977619+0300][195736][INFO] Train the uncensoring model
[2022-05-16T11:26:40.126272+0300][195736][INFO] max T = 364.0, max syn T = 363.98992919921875
[2022-05-16T11:26:40.126807+0300][195736][INFO] Train the synthetic generator
[2022-05-16T11:27:39.143247+0300][195736][INFO]  Performance eval for df hash = 6948022320798299602 ood hash = 7857508845427557426


[2022-05-16T11:28:33.217559+0300][195736][INFO] Baseline performance score: {'c_index': (0.7371585411529611, 0.021003822226355856), 'brier_score': (0.06436824521405582, 0.0050963667223021284)}


[2022-05-16T11:28:52.481501+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.7424107556130135, 0.04591660658012203), 'brier_score': (0.063427266961141, 0.004341100303647372)}
[2022-05-16T11:29:17.126127+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6723402296512372, 0.056451471817662115), 'brier_score': (0.06430527750483324, 0.006730199403544118)}



[4m[1mPlugin : survival_gan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch.score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion.score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.nearest_syn_neighbor_distance.mean,0.063757,0.084186,0.072943,0.008262,0.073134,0.015998,5,0,0.01
sanity.close_values_probability.score,0.940217,0.973913,0.959565,0.0136,0.965217,0.025,5,0,0.01
sanity.distant_values_probability.score,0.001087,0.003261,0.001957,0.000813,0.002174,0.001087,5,0,0.01
stats.jensenshannon_dist.marginal,0.058697,0.084849,0.073705,0.009109,0.075171,0.010997,5,0,0.07
stats.chi_squared_test.marginal,0.457049,0.747053,0.568678,0.097657,0.530353,0.05537,5,0,0.01
stats.feature_corr.joint,1.0255,1.361677,1.246404,0.123814,1.267766,0.145302,5,0,0.07
stats.inv_kl_divergence.marginal,0.894375,0.932214,0.914295,0.012772,0.911863,0.012807,5,0,0.01
stats.ks_test.marginal,0.920903,0.948161,0.93791,0.010834,0.942893,0.018227,5,0,0.01





## Metabric

In [6]:
base_score = evaluate_dataset("baseline", "metabric", base_plugins)
Benchmarks.print(base_score)

[2022-05-16T11:29:34.635179+0300][195736][INFO] Benchmarking plugin : privbayes
[2022-05-16T11:29:34.638637+0300][195736][INFO]  Experiment repeat: 0 task type: survival_analysis Train df hash = 1082615206799451559
[2022-05-16T11:29:36.744618+0300][195736][INFO]  Performance eval for df hash = 3714545451517944886 ood hash = 2369742275626103798
[2022-05-16T11:29:37.591043+0300][195736][INFO] Baseline performance score: {'c_index': (0.6362518222021938, 0.01043427122067225), 'brier_score': (0.18780283778280085, 0.013277638086733278)}
[2022-05-16T11:29:38.315394+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5027567288507236, 0.020932043670451757), 'brier_score': (0.21948133288874735, 0.014443940670838956)}
[2022-05-16T11:29:38.782119+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5202212584685437, 0.043581428820166024), 'brier_score': (0.21739877740228356, 0.036328800303041614)}
[2022-05-16T11:29:38.788517+0300][195736][INFO]  Performance eval for 

[2022-05-16T11:31:37.518796+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5309522454515104, 0.0319254000284303), 'brier_score': (0.21624821297019325, 0.03259286751046693)}
[2022-05-16T11:31:52.533602+0300][195736][INFO]  Experiment repeat: 1 task type: survival_analysis Train df hash = 9065649484940803752
[2022-05-16T11:31:54.555865+0300][195736][INFO]  Performance eval for df hash = 1180314678481592860 ood hash = 8312814499701059726
[2022-05-16T11:31:55.346495+0300][195736][INFO] Baseline performance score: {'c_index': (0.6370695299707485, 0.011040162087917997), 'brier_score': (0.18521492563382147, 0.009407559045704648)}
[2022-05-16T11:31:56.071824+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.4801527284046003, 0.010519291499781383), 'brier_score': (0.21376822667276332, 0.01687089662924653)}
[2022-05-16T11:31:56.538365+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.47835310873101794, 0.03048549173013869), 'brier_score': (

[2022-05-16T11:33:29.145797+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.45540568284608035, 0.008559332046720282), 'brier_score': (0.2122850004121104, 0.006508744118901054)}
[2022-05-16T11:33:56.386966+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.4697792512293206, 0.030977107490139804), 'brier_score': (0.25874772276066654, 0.09344622825426427)}
[2022-05-16T11:34:19.608940+0300][195736][INFO]  Experiment repeat: 2 task type: survival_analysis Train df hash = 7271128867335054582
[2022-05-16T11:34:21.646710+0300][195736][INFO]  Performance eval for df hash = 1031821224541529103 ood hash = 6870545931919139053
[2022-05-16T11:34:22.429117+0300][195736][INFO] Baseline performance score: {'c_index': (0.6372342504152105, 0.009251340289887762), 'brier_score': (0.18784895175534577, 0.01529063719607242)}
[2022-05-16T11:34:23.143500+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.44159755064935097, 0.013440462951000773), 'brier_score':

[2022-05-16T11:37:56.513131+0300][195736][INFO] Baseline performance score: {'c_index': (0.6403969707791197, 0.004781820964552472), 'brier_score': (0.1981766656420092, 0.013197958511275951)}


[2022-05-16T11:38:21.863747+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5410162999665085, 0.014745301477497193), 'brier_score': (0.2179566886756327, 0.016313164330089846)}
[2022-05-16T11:38:49.088112+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5249664706496807, 0.007832987067566716), 'brier_score': (0.22683027680764822, 0.02755779343189672)}
[2022-05-16T11:39:16.795374+0300][195736][INFO]  Experiment repeat: 4 task type: survival_analysis Train df hash = 2211166129410309295
[2022-05-16T11:39:20.968903+0300][195736][INFO]  Performance eval for df hash = 1049195125866826556 ood hash = 8548616833134766624
[2022-05-16T11:39:21.756905+0300][195736][INFO] Baseline performance score: {'c_index': (0.6286491017361081, 0.011087552891095765), 'brier_score': (0.19109813447609894, 0.012616769400008995)}
[2022-05-16T11:39:22.472235+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.4986808932118156, 0.016502935525712983), 'brier_score': 

[2022-05-16T11:45:15.500128+0300][195736][INFO] Baseline performance score: {'c_index': (0.6446570048677241, 0.011492767793291026), 'brier_score': (0.1957376232221791, 0.006818588947746551)}


[2022-05-16T11:45:40.631990+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5937637464824269, 0.013618404985109958), 'brier_score': (0.22221600788118942, 0.010646545329196795)}
[2022-05-16T11:46:07.651133+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5288652980621372, 0.03185572296745247), 'brier_score': (0.27891150218260896, 0.056397534310899354)}
[2022-05-16T11:46:28.778631+0300][195736][INFO]  Experiment repeat: 2 task type: survival_analysis Train df hash = 7271128867335054582
[2022-05-16T11:46:31.422133+0300][195736][INFO]  Performance eval for df hash = 1031821224541529103 ood hash = 6870545931919139053
[2022-05-16T11:46:32.206739+0300][195736][INFO] Baseline performance score: {'c_index': (0.6372342504152105, 0.009251340289887762), 'brier_score': (0.18784895175534577, 0.01529063719607242)}
[2022-05-16T11:46:33.134796+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.43509232039197815, 0.02488052868336233), 'brier_score': 

[2022-05-16T11:47:42.464917+0300][195736][INFO] Baseline performance score: {'c_index': (0.6362022539369426, 0.01575209345005484), 'brier_score': (0.20145258156149315, 0.019558556526948698)}
[2022-05-16T11:48:07.712217+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.43509613655591095, 0.022024262926220048), 'brier_score': (0.27041465744357224, 0.03554555655372979)}
[2022-05-16T11:48:34.811563+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.4401465932195037, 0.03600634910402601), 'brier_score': (0.2597638504608096, 0.04337158434615128)}
[2022-05-16T11:49:01.993752+0300][195736][INFO]  Experiment repeat: 3 task type: survival_analysis Train df hash = 768910174161884000
[2022-05-16T11:50:47.184154+0300][195736][INFO]  Performance eval for df hash = 4207807399589507144 ood hash = 2403283914219513140
[2022-05-16T11:50:47.973103+0300][195736][INFO] Baseline performance score: {'c_index': (0.6337135135043991, 0.012296118162988845), 'brier_score': (0.1849

[2022-05-16T11:56:30.708862+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.4887625942328369, 0.02681509876787272), 'brier_score': (0.24199617959146572, 0.011432568271841989)}


[2022-05-16T11:56:58.231846+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.4817798787535659, 0.022225694073151895), 'brier_score': (0.24401651908784658, 0.03054354099064739)}
[2022-05-16T11:57:18.198656+0300][195736][INFO] Benchmarking plugin : ctgan
[2022-05-16T11:57:18.202092+0300][195736][INFO]  Experiment repeat: 0 task type: survival_analysis Train df hash = 1082615206799451559
[2022-05-16T11:57:20.987871+0300][195736][INFO]  Performance eval for df hash = 3714545451517944886 ood hash = 2369742275626103798
[2022-05-16T11:57:21.797211+0300][195736][INFO] Baseline performance score: {'c_index': (0.6362518222021938, 0.01043427122067225), 'brier_score': (0.18780283778280085, 0.013277638086733278)}
[2022-05-16T11:57:22.541937+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5803997535401324, 0.010556119493770152), 'brier_score': (0.26408253097848794, 0.008359131355343268)}
[2022-05-16T11:57:23.013019+0300][195736][INFO] Synthetic OOD performance s

[2022-05-16T11:58:27.976347+0300][195736][INFO] Baseline performance score: {'c_index': (0.6322304584122765, 0.02222068733245527), 'brier_score': (0.20132617970069164, 0.008857442162430841)}
[2022-05-16T11:58:53.965659+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5347503076047316, 0.009126628175990125), 'brier_score': (0.2771558785901737, 0.006749084338595676)}
[2022-05-16T11:59:21.869813+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5024868193569952, 0.056055163627535945), 'brier_score': (0.2863902266610252, 0.014632774436163058)}
[2022-05-16T11:59:37.822683+0300][195736][INFO]  Experiment repeat: 1 task type: survival_analysis Train df hash = 9065649484940803752
[2022-05-16T11:59:40.399211+0300][195736][INFO]  Performance eval for df hash = 1180314678481592860 ood hash = 8312814499701059726
[2022-05-16T11:59:41.194401+0300][195736][INFO] Baseline performance score: {'c_index': (0.6370695299707485, 0.011040162087917997), 'brier_score': (0.18

[2022-05-16T12:03:12.235231+0300][195736][INFO] Baseline performance score: {'c_index': (0.6362022539369426, 0.01575209345005484), 'brier_score': (0.20145258156149315, 0.019558556526948698)}
[2022-05-16T12:03:37.491194+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5872378149757141, 0.017401964287935594), 'brier_score': (0.29080247394779785, 0.005241154833170826)}
[2022-05-16T12:04:04.066697+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5804169006654286, 0.021252088264773726), 'brier_score': (0.29599648852779364, 0.014930290443533391)}
[2022-05-16T12:04:26.860378+0300][195736][INFO]  Experiment repeat: 3 task type: survival_analysis Train df hash = 768910174161884000
[2022-05-16T12:04:51.531561+0300][195736][INFO]  Performance eval for df hash = 8872152847853436110 ood hash = 2403283914219513140
[2022-05-16T12:04:52.318639+0300][195736][INFO] Baseline performance score: {'c_index': (0.6333804762822133, 0.016036640601882545), 'brier_score': (0.1

[2022-05-16T12:06:00.266369+0300][195736][INFO] Baseline performance score: {'c_index': (0.6422781271080585, 0.007102320327660466), 'brier_score': (0.19815817315713924, 0.021589053846517544)}
[2022-05-16T12:06:26.468423+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5577199004183765, 0.006606712461814004), 'brier_score': (0.21281854863387092, 0.022838439508308123)}
[2022-05-16T12:06:54.385538+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5713055380299097, 0.01539687329951234), 'brier_score': (0.21951494410398661, 0.02847727301021494)}
[2022-05-16T12:07:10.477114+0300][195736][INFO]  Experiment repeat: 4 task type: survival_analysis Train df hash = 2211166129410309295
[2022-05-16T12:07:35.045899+0300][195736][INFO]  Performance eval for df hash = 7126684195519614330 ood hash = 8548616833134766624
[2022-05-16T12:07:35.840169+0300][195736][INFO] Baseline performance score: {'c_index': (0.6250753985400334, 0.015742491168120293), 'brier_score': (0.1

[2022-05-16T12:11:29.660402+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6300454842017952, 0.024448710435692425), 'brier_score': (0.2062584230185872, 0.006403940812400831)}
[2022-05-16T12:11:56.883164+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6640970414928974, 0.02446125980306839), 'brier_score': (0.19780844168363684, 0.010360659174567882)}
[2022-05-16T12:12:20.091874+0300][195736][INFO]  Experiment repeat: 1 task type: survival_analysis Train df hash = 9065649484940803752
[2022-05-16T12:12:22.696106+0300][195736][INFO]  Performance eval for df hash = 1180314678481592860 ood hash = 8312814499701059726
[2022-05-16T12:12:23.489507+0300][195736][INFO] Baseline performance score: {'c_index': (0.6370695299707485, 0.011040162087917997), 'brier_score': (0.18521492563382147, 0.009407559045704648)}
[2022-05-16T12:12:24.401841+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6261133081656788, 0.006333331823359668), 'brier_score': 

[2022-05-16T12:14:25.222229+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6113785056845309, 0.0029087772774698765), 'brier_score': (0.24523228532720684, 0.06937048883821396)}
[2022-05-16T12:14:57.582656+0300][195736][INFO]  Experiment repeat: 2 task type: survival_analysis Train df hash = 7271128867335054582
[2022-05-16T12:15:00.180318+0300][195736][INFO]  Performance eval for df hash = 1031821224541529103 ood hash = 6870545931919139053
[2022-05-16T12:15:00.959174+0300][195736][INFO] Baseline performance score: {'c_index': (0.6372342504152105, 0.009251340289887762), 'brier_score': (0.18784895175534577, 0.01529063719607242)}
[2022-05-16T12:15:01.846346+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.623095979612612, 0.011398935843202014), 'brier_score': (0.2374333719529592, 0.02435453852496955)}
[2022-05-16T12:15:02.454990+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.610466130050963, 0.037291100959799706), 'brier_score': (0

[2022-05-16T12:16:16.659357+0300][195736][INFO] Baseline performance score: {'c_index': (0.6362022539369426, 0.01575209345005484), 'brier_score': (0.20145258156149315, 0.019558556526948698)}
[2022-05-16T12:16:46.175276+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6416149927074334, 0.013070563150540296), 'brier_score': (0.20429203456591816, 0.012383245994693568)}
[2022-05-16T12:17:15.066779+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6350693377635606, 0.04083978128492642), 'brier_score': (0.21393689043700337, 0.031706966712329074)}
[2022-05-16T12:17:38.723866+0300][195736][INFO]  Experiment repeat: 3 task type: survival_analysis Train df hash = 768910174161884000
[2022-05-16T12:17:54.510503+0300][195736][INFO]  Performance eval for df hash = 909476929172126206 ood hash = 2403283914219513140
[2022-05-16T12:17:55.294309+0300][195736][INFO] Baseline performance score: {'c_index': (0.6315715300483483, 0.005908855359029488), 'brier_score': (0.186

[2022-05-16T12:21:45.286158+0300][195736][INFO] Baseline performance score: {'c_index': (0.6370616303865931, 0.01207547223615163), 'brier_score': (0.20169124864568352, 0.016591834853371526)}


[2022-05-16T12:22:11.129146+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6326382461567488, 0.0038355652026351304), 'brier_score': (0.21923453557843478, 0.01916279593424734)}
[2022-05-16T12:22:38.462313+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6554288842020162, 0.029364307353839936), 'brier_score': (0.22203379736930576, 0.027937072104545695)}
[2022-05-16T12:23:01.666131+0300][195736][INFO] Benchmarking plugin : nflow
[2022-05-16T12:23:01.670238+0300][195736][INFO]  Experiment repeat: 0 task type: survival_analysis Train df hash = 1082615206799451559
[2022-05-16T12:23:04.219789+0300][195736][INFO]  Performance eval for df hash = 3714545451517944886 ood hash = 2369742275626103798
[2022-05-16T12:23:05.047349+0300][195736][INFO] Baseline performance score: {'c_index': (0.6362518222021938, 0.01043427122067225), 'brier_score': (0.18780283778280085, 0.013277638086733278)}
[2022-05-16T12:23:05.773607+0300][195736][INFO] Synthetic ID performance s

[2022-05-16T12:24:12.396539+0300][195736][INFO] Baseline performance score: {'c_index': (0.6322304584122765, 0.02222068733245527), 'brier_score': (0.20132617970069164, 0.008857442162430841)}
[2022-05-16T12:24:38.014539+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5615624824135658, 0.018757232526055424), 'brier_score': (0.21802112742875226, 0.012697664925943536)}
[2022-05-16T12:25:06.053850+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5923983334929491, 0.018523987523728268), 'brier_score': (0.20945112149415293, 0.03267623634452716)}
[2022-05-16T12:25:38.460419+0300][195736][INFO]  Experiment repeat: 1 task type: survival_analysis Train df hash = 9065649484940803752
[2022-05-16T12:25:40.956434+0300][195736][INFO]  Performance eval for df hash = 1180314678481592860 ood hash = 8312814499701059726
[2022-05-16T12:25:41.756107+0300][195736][INFO] Baseline performance score: {'c_index': (0.6370695299707485, 0.011040162087917997), 'brier_score': (0.1

[2022-05-16T12:26:49.662677+0300][195736][INFO] Baseline performance score: {'c_index': (0.6446570048677241, 0.011492767793291026), 'brier_score': (0.1957376232221791, 0.006818588947746551)}
[2022-05-16T12:27:15.516034+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5669120992821939, 0.004385577982301783), 'brier_score': (0.21681105891217753, 0.003715192090325221)}
[2022-05-16T12:27:42.813409+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.569625762976867, 0.025380698072461346), 'brier_score': (0.23748360512512276, 0.04870335093755423)}
[2022-05-16T12:28:03.423323+0300][195736][INFO]  Experiment repeat: 2 task type: survival_analysis Train df hash = 7271128867335054582
[2022-05-16T12:28:06.050703+0300][195736][INFO]  Performance eval for df hash = 1031821224541529103 ood hash = 6870545931919139053
[2022-05-16T12:28:06.835972+0300][195736][INFO] Baseline performance score: {'c_index': (0.6372342504152105, 0.009251340289887762), 'brier_score': (0.18

[2022-05-16T12:29:14.519547+0300][195736][INFO] Baseline performance score: {'c_index': (0.6362022539369426, 0.01575209345005484), 'brier_score': (0.20145258156149315, 0.019558556526948698)}
[2022-05-16T12:29:40.396052+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5530403880599516, 0.024384854608827538), 'brier_score': (0.25382558164398167, 0.007997492181693712)}
[2022-05-16T12:30:08.336396+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5419162767085157, 0.023736125388808947), 'brier_score': (0.2598054534106374, 0.019459809139205204)}
[2022-05-16T12:30:31.301603+0300][195736][INFO]  Experiment repeat: 3 task type: survival_analysis Train df hash = 768910174161884000
[2022-05-16T12:30:55.078572+0300][195736][INFO]  Performance eval for df hash = 7327449707894222048 ood hash = 2403283914219513140
[2022-05-16T12:30:55.892004+0300][195736][INFO] Baseline performance score: {'c_index': (0.6313340681879397, 0.03047833499197445), 'brier_score': (0.186


[4m[1mPlugin : nflow[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch.score,0.916667,0.916667,0.916667,0.0,0.916667,0.0,5,0,0.0
sanity.common_rows_proportion.score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.nearest_syn_neighbor_distance.mean,0.280779,0.366906,0.305919,0.032881,0.285154,0.031913,5,0,0.01
sanity.close_values_probability.score,0.082075,0.288247,0.202232,0.088162,0.255417,0.167433,5,0,0.01
sanity.distant_values_probability.score,0.001313,0.012475,0.006041,0.003815,0.005909,0.00394,5,0,0.01
stats.jensenshannon_dist.marginal,0.18015,0.224078,0.206624,0.017397,0.218274,0.027154,5,0,0.06
stats.chi_squared_test.marginal,0.901426,0.938937,0.926362,0.01368,0.932098,0.014206,5,0,0.01
stats.feature_corr.joint,1.505605,2.214302,1.942565,0.25637,2.06983,0.306391,5,0,0.05
stats.inv_kl_divergence.marginal,0.835139,0.875585,0.854275,0.014777,0.851625,0.022997,5,0,0.01
stats.ks_test.marginal,0.754969,0.811079,0.782451,0.020909,0.774787,0.032591,5,0,0.01





In [7]:
survival_score = evaluate_dataset("survival", "metabric", survival_plugins)
Benchmarks.print(survival_score)

[2022-05-16T12:36:03.307976+0300][195736][INFO] Benchmarking plugin : survival_gan
[2022-05-16T12:36:03.313349+0300][195736][INFO]  Experiment repeat: 0 task type: survival_analysis Train df hash = 1082615206799451559
[2022-05-16T12:36:06.052820+0300][195736][INFO]  Performance eval for df hash = 3714545451517944886 ood hash = 2369742275626103798
[2022-05-16T12:36:06.859286+0300][195736][INFO] Baseline performance score: {'c_index': (0.6362518222021938, 0.01043427122067225), 'brier_score': (0.18780283778280085, 0.013277638086733278)}
[2022-05-16T12:36:07.741721+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6355650007239066, 0.013131402514497594), 'brier_score': (0.23522039728352273, 0.019487684975407725)}
[2022-05-16T12:36:08.347995+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6404564040271656, 0.049768126335245695), 'brier_score': (0.23591368253199674, 0.023816087949411563)}
[2022-05-16T12:36:08.354489+0300][195736][INFO]  Performance eval f

[2022-05-16T12:38:07.971492+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6316205376350308, 0.03554068382330655), 'brier_score': (0.20051795417345164, 0.006874475357374168)}
[2022-05-16T12:38:28.377876+0300][195736][INFO]  Experiment repeat: 1 task type: survival_analysis Train df hash = 9065649484940803752
[2022-05-16T12:38:30.945525+0300][195736][INFO]  Performance eval for df hash = 1180314678481592860 ood hash = 8312814499701059726
[2022-05-16T12:38:31.734816+0300][195736][INFO] Baseline performance score: {'c_index': (0.6370695299707485, 0.011040162087917997), 'brier_score': (0.18521492563382147, 0.009407559045704648)}
[2022-05-16T12:38:32.603696+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6235241009739748, 0.015511406293528927), 'brier_score': (0.24104091313225248, 0.014064644972606224)}
[2022-05-16T12:38:33.193875+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6602931894732841, 0.024215702596195786), 'brier_score'

[2022-05-16T12:40:05.629756+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6355860365069705, 0.016369384747853027), 'brier_score': (0.19453233888675228, 0.007200737429556991)}
[2022-05-16T12:40:30.932432+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6209328760203435, 0.0034450633941305936), 'brier_score': (0.23685342215579727, 0.07027950856998752)}
[2022-05-16T12:40:48.838909+0300][195736][INFO]  Experiment repeat: 2 task type: survival_analysis Train df hash = 7271128867335054582
[2022-05-16T12:40:51.570805+0300][195736][INFO]  Performance eval for df hash = 1031821224541529103 ood hash = 6870545931919139053
[2022-05-16T12:40:52.352301+0300][195736][INFO] Baseline performance score: {'c_index': (0.6372342504152105, 0.009251340289887762), 'brier_score': (0.18784895175534577, 0.01529063719607242)}
[2022-05-16T12:40:53.308598+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6222740738891743, 0.018501062900248985), 'brier_score':

[2022-05-16T12:46:29.725099+0300][195736][INFO] Baseline performance score: {'c_index': (0.6407914670102728, 0.00777821978385041), 'brier_score': (0.19950704612065598, 0.01860800634357534)}
[2022-05-16T12:46:55.681587+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6168210409435266, 0.018548520878896052), 'brier_score': (0.2252772024500799, 0.027116271398512786)}


[2022-05-16T12:47:27.803449+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5687290745499389, 0.00983558960178391), 'brier_score': (0.24323378747194976, 0.051219707837213156)}
[2022-05-16T12:47:53.263454+0300][195736][INFO]  Experiment repeat: 4 task type: survival_analysis Train df hash = 2211166129410309295
[2022-05-16T12:47:53.505713+0300][195736][INFO] Using imbalanced time and censoring sampling
[2022-05-16T12:47:57.318076+0300][195736][INFO] Train the uncensoring model
[2022-05-16T12:48:02.866631+0300][195736][INFO] max T = 355.20001220703125, max syn T = 353.57171630859375
[2022-05-16T12:48:02.867182+0300][195736][INFO] Train the synthetic generator
[2022-05-16T12:50:00.290404+0300][195736][INFO]  Performance eval for df hash = 6079769656942176734 ood hash = 8548616833134766624
[2022-05-16T12:50:01.079383+0300][195736][INFO] Baseline performance score: {'c_index': (0.619232292770257, 0.022884851997379014), 'brier_score': (0.19356636185753148, 0.00792217052233



[2022-05-16T12:51:38.629093+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5965715043608278, 0.0025278435067843706), 'brier_score': (0.210122568034675, 0.017228572672881883)}
[2022-05-16T12:52:06.843380+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6039919536718982, 0.017679942902054213), 'brier_score': (0.21221344013735702, 0.022012027701259113)}



[4m[1mPlugin : survival_gan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch.score,0.916667,0.916667,0.916667,0.0,0.916667,0.0,5,0,0.0
sanity.common_rows_proportion.score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.nearest_syn_neighbor_distance.mean,0.060817,0.135701,0.096752,0.03091,0.091287,0.062227,5,0,0.01
sanity.close_values_probability.score,0.84176,0.972423,0.916218,0.053675,0.943533,0.099146,5,0,0.01
sanity.distant_values_probability.score,0.001313,0.006566,0.002495,0.002051,0.001313,0.000657,5,0,0.01
stats.jensenshannon_dist.marginal,0.10179,0.151745,0.120331,0.018185,0.114081,0.022886,5,0,0.06
stats.chi_squared_test.marginal,0.429505,0.717294,0.568652,0.09604,0.538046,0.083698,5,0,0.01
stats.feature_corr.joint,1.243398,2.152078,1.605812,0.308037,1.533441,0.257811,5,0,0.05
stats.inv_kl_divergence.marginal,0.783812,0.894907,0.835188,0.040363,0.816219,0.055203,5,0,0.01
stats.ks_test.marginal,0.830836,0.907061,0.883651,0.027373,0.896198,0.014803,5,0,0.01





## CUTRACT

In [None]:
base_score = evaluate_dataset("baseline", "cutract", base_plugins)
Benchmarks.print(base_score)

[2022-05-16T12:52:23.486121+0300][195736][INFO] Benchmarking plugin : privbayes
[2022-05-16T12:52:23.490374+0300][195736][INFO]  Experiment repeat: 0 task type: survival_analysis Train df hash = 553634569229738029
[2022-05-16T12:52:28.616743+0300][195736][INFO]  Performance eval for df hash = 4206929094738099388 ood hash = 3884606706705070508
[2022-05-16T12:52:33.897112+0300][195736][INFO] Baseline performance score: {'c_index': (0.7992029392942062, 0.009935930922101145), 'brier_score': (0.07677033501068477, 0.002420609215927864)}
[2022-05-16T12:52:36.409290+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6022486675952278, 0.017687435902633938), 'brier_score': (0.0990595795477874, 0.0021331671674467124)}
[2022-05-16T12:52:37.444254+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5838349847965166, 0.012220429283142374), 'brier_score': (0.10133189930251339, 0.0004881868348396558)}
[2022-05-16T12:52:37.450956+0300][195736][INFO]  Performance eval for

[2022-05-16T13:02:12.487557+0300][195736][INFO] Baseline performance score: {'c_index': (0.8262981612006556, 0.009319639634419691), 'brier_score': (0.08481291716972371, 0.0023588148060674006)}
[2022-05-16T13:03:31.072899+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.46028702839662555, 0.01476692517552364), 'brier_score': (0.1008915192613109, 0.0023281873168665207)}
[2022-05-16T13:04:42.968354+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.4713187533602317, 0.021695782491033048), 'brier_score': (0.10076062111305013, 0.002448895579007693)}
[2022-05-16T13:05:19.959030+0300][195736][INFO]  Experiment repeat: 2 task type: survival_analysis Train df hash = 4167553424968043274
[2022-05-16T13:05:24.734269+0300][195736][INFO]  Performance eval for df hash = 7870464263584895058 ood hash = 1392968455907958676
[2022-05-16T13:05:28.438137+0300][195736][INFO] Baseline performance score: {'c_index': (0.8076501605344878, 0.0040294229513951115), 'brier_score': 

[2022-05-16T13:26:04.435398+0300][195736][INFO]  Performance eval for df hash = 6073666314542529266 ood hash = 3884606706705070508
[2022-05-16T13:26:16.546997+0300][195736][INFO] Baseline performance score: {'c_index': (0.6566658026963447, 0.12589092426029447), 'brier_score': (0.0916663426061941, 0.004628632122908726)}
[2022-05-16T13:26:27.668922+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.6371337697835108, 0.0064200696981963285), 'brier_score': (0.11457497199956297, 0.002832486237146959)}
[2022-05-16T13:26:41.707294+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.6452804151328446, 0.024208021684096023), 'brier_score': (0.11653769652512184, 0.0022491725944214732)}
[2022-05-16T13:26:41.714674+0300][195736][INFO]  Performance eval for df hash = 1954771181398338952 ood hash = 3884606706705070508
[2022-05-16T13:29:26.431033+0300][195736][INFO] Baseline performance score: {'c_index': (0.822742659603748, 0.0031277169003271488), 'brier_score': (0.084

[2022-05-16T13:31:03.664117+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.731794175351831, 0.007542028644285218), 'brier_score': (0.11849375135051134, 0.0022490889708421118)}
[2022-05-16T13:31:36.710453+0300][195736][INFO]  Experiment repeat: 1 task type: survival_analysis Train df hash = 7689582637391920142
[2022-05-16T13:31:41.503667+0300][195736][INFO]  Performance eval for df hash = 356321018025193907 ood hash = 4357411713889416927
[2022-05-16T13:31:45.206057+0300][195736][INFO] Baseline performance score: {'c_index': (0.8037687028638295, 0.010595355446932289), 'brier_score': (0.07632056156912781, 0.0012103438398218269)}
[2022-05-16T13:31:45.330619+0300][195736][ERROR] Failed to evaluate synthetic ID performance. cox_ph: Convergence halted due to matrix inversion problems. Suspicion is high collinearity. Please see the following tips in the lifelines documentation: https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-pr

[2022-05-16T13:40:18.587338+0300][195736][INFO] Baseline performance score: {'c_index': (0.8301300520571256, 0.007847454307286558), 'brier_score': (0.08504457431981376, 0.0013518953844011187)}
[2022-05-16T13:41:15.635173+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.5908882247442353, 0.012992876200750535), 'brier_score': (0.11442922210092016, 0.0008050867843561727)}
[2022-05-16T13:42:10.470661+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.5872113549842797, 0.031480875047543794), 'brier_score': (0.11160298309696476, 0.0037032095392679956)}
[2022-05-16T13:42:43.279987+0300][195736][INFO]  Experiment repeat: 3 task type: survival_analysis Train df hash = 5502102683481213224
[2022-05-16T13:42:43.392863+0300][195736][CRITICAL] [adsgan][take 3] failed: 1 validation error for Init
kwargs
  unexpected keyword argument: 'device' (type=type_error)
[2022-05-16T13:42:43.392863+0300][195736][CRITICAL] [adsgan][take 3] failed: 1 validation error for Init
kw

[2022-05-16T13:55:39.202652+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.7842985968685715, 0.03563436023213878), 'brier_score': (0.22431132350053903, 0.017248332402304875)}
[2022-05-16T13:56:28.371817+0300][195736][INFO]  Experiment repeat: 2 task type: survival_analysis Train df hash = 4167553424968043274
[2022-05-16T13:56:32.974624+0300][195736][INFO]  Performance eval for df hash = 7870464263584895058 ood hash = 1392968455907958676
[2022-05-16T13:56:36.681520+0300][195736][INFO] Baseline performance score: {'c_index': (0.8076501605344878, 0.0040294229513951115), 'brier_score': (0.07693109190608903, 0.0024860093224249587)}
[2022-05-16T13:56:39.800960+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.8057886708221265, 0.005395452295442697), 'brier_score': (0.1622039395050651, 0.003847351710632319)}
[2022-05-16T13:56:41.447561+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.7787611746128132, 0.026371404067664295), 'brier_score

[2022-05-16T14:08:25.676728+0300][195736][INFO] Baseline performance score: {'c_index': (0.8265708637656379, 0.012799190078964672), 'brier_score': (0.08531002745158413, 0.0029548594210406275)}
[2022-05-16T14:09:45.585122+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.8126768614026293, 0.009782356645588821), 'brier_score': (0.1506381705963123, 0.0056422738290209645)}
[2022-05-16T14:11:21.203599+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.796116152860232, 0.027933452351425143), 'brier_score': (0.15241778197822486, 0.010633669791283922)}
[2022-05-16T14:11:55.847179+0300][195736][INFO]  Experiment repeat: 4 task type: survival_analysis Train df hash = 7223914864637014995
[2022-05-16T14:13:53.997884+0300][195736][INFO]  Performance eval for df hash = 4194760533314532178 ood hash = 2194550091257685496
[2022-05-16T14:13:58.843897+0300][195736][INFO] Baseline performance score: {'c_index': (0.804181549886481, 0.007274505618188662), 'brier_score': (0.

[2022-05-16T14:19:39.964155+0300][195736][INFO] Baseline performance score: {'c_index': (0.8274323729798843, 0.012489892046532896), 'brier_score': (0.0852449287435772, 0.0001860650364092367)}


[2022-05-16T14:21:50.452501+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.8060573558472234, 0.012220898368410048), 'brier_score': (0.20452946648173673, 0.005736376795745016)}
[2022-05-16T14:24:03.417269+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.8045810580088792, 0.023651050648447178), 'brier_score': (0.2032445438634478, 0.028567029669924714)}
[2022-05-16T14:25:01.001558+0300][195736][INFO] Benchmarking plugin : tvae
[2022-05-16T14:25:01.005807+0300][195736][INFO]  Experiment repeat: 0 task type: survival_analysis Train df hash = 553634569229738029
[2022-05-16T14:25:07.047654+0300][195736][INFO]  Performance eval for df hash = 4206929094738099388 ood hash = 3884606706705070508
[2022-05-16T14:25:11.844269+0300][195736][INFO] Baseline performance score: {'c_index': (0.7992029392942062, 0.009935930922101145), 'brier_score': (0.07677033501068477, 0.002420609215927864)}
[2022-05-16T14:25:15.997279+0300][195736][INFO] Synthetic ID performance sco

[2022-05-16T14:30:01.989141+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.7444879567529515, 0.005873962677383562), 'brier_score': (0.09951833657391272, 0.0014105279753642195)}
[2022-05-16T14:31:01.519741+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.7385692997339824, 0.004718463151460451), 'brier_score': (0.1052864534179921, 0.0035576079747156237)}
[2022-05-16T14:31:53.250085+0300][195736][INFO]  Experiment repeat: 1 task type: survival_analysis Train df hash = 7689582637391920142
[2022-05-16T14:31:58.125319+0300][195736][INFO]  Performance eval for df hash = 356321018025193907 ood hash = 4357411713889416927
[2022-05-16T14:32:01.844504+0300][195736][INFO] Baseline performance score: {'c_index': (0.8037687028638295, 0.010595355446932289), 'brier_score': (0.07632056156912781, 0.0012103438398218269)}
[2022-05-16T14:32:05.719157+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.7531174513575797, 0.027304866303284974), 'brier_score

[2022-05-16T14:36:38.004971+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.7741925810168627, 0.010533499773001232), 'brier_score': (0.09593246740910162, 0.0016997441557405847)}
[2022-05-16T14:37:39.098002+0300][195736][INFO] Synthetic OOD performance score: {'c_index': (0.7395220018914554, 0.046929446363893346), 'brier_score': (0.10196459947615573, 0.0012736074459316577)}
[2022-05-16T14:38:15.219970+0300][195736][INFO]  Experiment repeat: 2 task type: survival_analysis Train df hash = 4167553424968043274
[2022-05-16T14:38:20.208261+0300][195736][INFO]  Performance eval for df hash = 7870464263584895058 ood hash = 1392968455907958676
[2022-05-16T14:38:25.103339+0300][195736][INFO] Baseline performance score: {'c_index': (0.8076501605344878, 0.0040294229513951115), 'brier_score': (0.07693109190608903, 0.0024860093224249587)}
[2022-05-16T14:38:27.980306+0300][195736][INFO] Synthetic ID performance score: {'c_index': (0.8093112478698007, 0.004284857069101776), 'brier_sc

In [None]:
survival_score = evaluate_dataset("survival", "cutract", survival_plugins)
Benchmarks.print(survival_score)

## MAGGIC dataset 

In [None]:
base_score = evaluate_dataset("baseline", "maggic", base_plugins)
Benchmarks.print(base_score)

In [None]:
survival_score = evaluate_dataset("survival", "maggic", survival_plugins)
Benchmarks.print(survival_score)

## SEER prostate 

In [None]:
base_score = evaluate_dataset("baseline", "seer", base_plugins)
Benchmarks.print(base_score)

In [None]:
survival_score = evaluate_dataset("survival", "seer", survival_plugins)
Benchmarks.print(survival_score)