# Tutorial 2: Benchmarks

In [1]:
import warnings
import sys
warnings.filterwarnings('ignore')

from sklearn.datasets import load_diabetes
from synthcity.plugins import Plugins
import synthcity.logger as log

X, y = load_diabetes(return_X_y=True, as_frame=True)
X["target"] = y

X

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068330,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018118,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017282,-0.037344,-0.013840,-0.024993,-0.011080,-0.046879,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044528,-0.025930,220.0


## List the available generative models

In [2]:
from synthcity.plugins import Plugins

plugins = Plugins().list(skip_debug = True)

plugins

['uniform_sampler',
 'nflow',
 'privbayes',
 'gaussian_copula',
 'pategan',
 'adsgan',
 'marginal_distributions',
 'tvae',
 'rtvae',
 'ctgan',
 'bayesian_network',
 'copulagan']

## Benchmark the quality of plugins

In [3]:
from synthcity.benchmark import Benchmarks

score = Benchmarks.evaluate(
    Plugins().list(),
    X,
    sensitive_columns = ["sex"],
    synthetic_size = len(X),
    repeats = 5,
)


In [4]:
Benchmarks.print(score)


[4m[1mComparatives[0m[0m


Unnamed: 0,uniform_sampler,nflow,privbayes,gaussian_copula,pategan,adsgan,marginal_distributions,tvae,rtvae,ctgan,bayesian_network,copulagan
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.271946,0.0
sanity.nearest_syn_neighbor_distance,0.138489,0.04652,0.102365,0.079813,0.080285,0.052031,0.183521,0.054804,0.034103,0.130247,0.041631,0.092883
sanity.inlier_probability,0.808597,0.970136,0.873303,0.846154,0.88371,0.923529,0.70362,0.962443,0.976018,0.798643,0.958824,0.879638
sanity.outlier_probability,0.008145,0.002715,0.00905,0.00362,0.008597,0.006787,0.012217,0.003167,0.00362,0.00724,0.003167,0.003167
statistical.marginal.jensenshannon_distance,0.272614,0.078018,0.194618,0.082348,0.175319,0.18637,0.272283,0.159785,0.16247,0.226017,0.04964,0.263623
statistical.marginal.chi_squared_test,0.996923,0.809211,0.687382,0.868966,0.442084,0.251475,0.997975,0.480718,0.466525,0.862118,0.815949,0.884416
statistical.joint.feature_correlation,3.874638,1.120998,3.946758,0.628132,4.363082,2.582731,3.90963,1.496298,4.567759,3.716248,1.069151,3.778173
statistical.marginal.inverse_cdf_distance,0.145705,0.181955,0.164977,0.175421,0.183422,0.182242,0.146102,0.175613,0.194439,0.176579,0.177965,0.176476
statistical.marginal.inverse_kl_divergence,0.775668,0.96056,0.826321,0.956474,0.78944,0.7029,0.776773,0.778526,0.800836,0.825172,0.977106,0.780544



[4m[1mPlugin : uniform_sampler[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.nearest_syn_neighbor_distance,0.065501,0.166576,0.138489,0.03729443,0.155801,0.017776,5,0,0.0
sanity.inlier_probability,0.737557,0.950226,0.808597,0.07565633,0.794118,0.058824,5,0,0.0
sanity.outlier_probability,0.002262,0.015837,0.008145,0.005651582,0.006787,0.011312,5,0,0.0
statistical.marginal.jensenshannon_distance,0.268026,0.278777,0.272614,0.004121257,0.271162,0.006896,5,0,0.07
statistical.marginal.chi_squared_test,0.995095,0.998367,0.996923,0.00116473,0.997132,0.00161,5,0,0.01
statistical.joint.feature_correlation,3.812352,3.9264,3.874638,0.03874212,3.882799,0.041869,5,0,0.05
statistical.marginal.inverse_cdf_distance,0.140601,0.150908,0.145705,0.003528625,0.145582,0.00427,5,0,3.45
statistical.marginal.inverse_kl_divergence,0.765822,0.78347,0.775668,0.006096036,0.776459,0.007293,5,0,0.01




[4m[1mPlugin : nflow[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.nearest_syn_neighbor_distance,0.022788,0.105837,0.04652,0.03007354,0.033758,0.00583,5,0,0.0
sanity.inlier_probability,0.893665,0.993213,0.970136,0.03835225,0.988688,0.006787,5,0,0.0
sanity.outlier_probability,0.002262,0.004525,0.002715,0.0009049774,0.002262,0.0,5,0,0.0
statistical.marginal.jensenshannon_distance,0.067483,0.089672,0.078018,0.00736034,0.078227,0.006803,5,0,0.07
statistical.marginal.chi_squared_test,0.720303,0.898734,0.809211,0.05657862,0.807483,0.01289,5,0,0.01
statistical.joint.feature_correlation,0.828657,1.457244,1.120998,0.2129417,1.048858,0.212572,5,0,0.05
statistical.marginal.inverse_cdf_distance,0.174869,0.19012,0.181955,0.005949492,0.180188,0.010652,5,0,2.98
statistical.marginal.inverse_kl_divergence,0.948428,0.972382,0.96056,0.008105616,0.959123,0.008714,5,0,0.01




[4m[1mPlugin : privbayes[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.nearest_syn_neighbor_distance,0.060629,0.155057,0.102365,0.03040036,0.099215,0.011149,5,0,0.0
sanity.inlier_probability,0.79638,0.918552,0.873303,0.04183991,0.891403,0.027149,5,0,0.0
sanity.outlier_probability,0.002262,0.029412,0.00905,0.01031833,0.004525,0.004525,5,0,0.0
statistical.marginal.jensenshannon_distance,0.175143,0.21208,0.194618,0.01263959,0.1977,0.014461,5,0,0.07
statistical.marginal.chi_squared_test,0.453553,0.898035,0.687382,0.1450319,0.726225,0.094756,5,0,0.01
statistical.joint.feature_correlation,3.853771,4.009521,3.946758,0.05290101,3.944413,0.041737,5,0,0.05
statistical.marginal.inverse_cdf_distance,0.155715,0.172886,0.164977,0.005657588,0.166597,0.004254,5,0,3.3
statistical.marginal.inverse_kl_divergence,0.807644,0.839032,0.826321,0.01125047,0.827206,0.014749,5,0,0.01




[4m[1mPlugin : gaussian_copula[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.nearest_syn_neighbor_distance,0.051276,0.101049,0.079813,0.01641491,0.08220525,0.011326,5,0,0.0
sanity.inlier_probability,0.80543,0.979638,0.846154,0.06688573,0.8144796,0.006787,5,0,0.0
sanity.outlier_probability,0.002262,0.004525,0.00362,0.001108366,0.004524887,0.002262,5,0,0.0
statistical.marginal.jensenshannon_distance,0.075364,0.089566,0.082348,0.005690854,0.08302839,0.01097,5,0,0.06
statistical.marginal.chi_squared_test,0.725295,0.906453,0.868966,0.07185177,0.9048261,0.004017,5,0,0.01
statistical.joint.feature_correlation,0.508357,0.820059,0.628132,0.1156952,0.625346,0.163287,5,0,0.05
statistical.marginal.inverse_cdf_distance,0.170763,0.182274,0.175421,0.003790937,0.1750969,0.00164,5,0,2.78
statistical.marginal.inverse_kl_divergence,0.938726,0.969072,0.956474,0.009830299,0.9582453,0.001106,5,0,0.01




[4m[1mPlugin : pategan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.nearest_syn_neighbor_distance,0.02746,0.133669,0.080285,0.0361,0.083266,0.041483,5,0,0.0
sanity.inlier_probability,0.78733,0.970588,0.88371,0.066557,0.864253,0.095023,5,0,0.0
sanity.outlier_probability,0.002262,0.013575,0.008597,0.004614,0.00905,0.00905,5,0,0.0
statistical.marginal.jensenshannon_distance,0.164365,0.205213,0.175319,0.015269,0.170985,0.007299,5,0,0.07
statistical.marginal.chi_squared_test,0.359614,0.619717,0.442084,0.094952,0.434666,0.075093,5,0,0.01
statistical.joint.feature_correlation,3.812564,4.791306,4.363082,0.317322,4.389012,0.152999,5,0,0.05
statistical.marginal.inverse_cdf_distance,0.170714,0.197836,0.183422,0.010733,0.182998,0.020285,5,0,3.13
statistical.marginal.inverse_kl_divergence,0.744243,0.829897,0.78944,0.029274,0.795745,0.033591,5,0,0.01




[4m[1mPlugin : adsgan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.nearest_syn_neighbor_distance,0.018359,0.16067,0.052031,0.05451904,0.025799,0.009988,5,0,0.0
sanity.inlier_probability,0.723982,0.986425,0.923529,0.1000849,0.972851,0.011312,5,0,0.0
sanity.outlier_probability,0.004525,0.011312,0.006787,0.002478383,0.006787,0.002262,5,0,0.0
statistical.marginal.jensenshannon_distance,0.159419,0.212228,0.18637,0.01735787,0.186846,0.014906,5,0,0.07
statistical.marginal.chi_squared_test,0.086613,0.544618,0.251475,0.1772426,0.176522,0.271744,5,0,0.01
statistical.joint.feature_correlation,2.179963,3.708689,2.582731,0.5686087,2.337964,0.156337,5,0,0.05
statistical.marginal.inverse_cdf_distance,0.162399,0.202891,0.182242,0.01305929,0.180765,0.007768,5,0,3.19
statistical.marginal.inverse_kl_divergence,0.643754,0.773273,0.7029,0.04481712,0.685066,0.050048,5,0,0.01




[4m[1mPlugin : marginal_distributions[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.nearest_syn_neighbor_distance,0.103423,0.234473,0.183521,0.04561654,0.189183,0.046578,5,0,0.0
sanity.inlier_probability,0.563348,0.891403,0.70362,0.1138349,0.699095,0.133484,5,0,0.0
sanity.outlier_probability,0.002262,0.022624,0.012217,0.008169896,0.011312,0.015837,5,0,0.0
statistical.marginal.jensenshannon_distance,0.263659,0.282564,0.272283,0.007785965,0.271119,0.015593,5,0,0.07
statistical.marginal.chi_squared_test,0.995617,0.999528,0.997975,0.001403323,0.998748,0.001597,5,0,0.01
statistical.joint.feature_correlation,3.78247,4.002069,3.90963,0.08987111,3.960826,0.160703,5,0,0.05
statistical.marginal.inverse_cdf_distance,0.141542,0.150039,0.146102,0.002726976,0.146545,0.001158,5,0,3.33
statistical.marginal.inverse_kl_divergence,0.762289,0.789096,0.776773,0.01000032,0.779427,0.0162,5,0,0.01




[4m[1mPlugin : tvae[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.nearest_syn_neighbor_distance,0.029725,0.086024,0.054804,0.02081629,0.0436,0.029143,5,0,0.0
sanity.inlier_probability,0.89819,0.99095,0.962443,0.0338188,0.979638,0.024887,5,0,0.0
sanity.outlier_probability,0.002262,0.006787,0.003167,0.001809955,0.002262,0.0,5,0,0.0
statistical.marginal.jensenshannon_distance,0.148019,0.170384,0.159785,0.007545479,0.16165,0.007767,5,0,0.07
statistical.marginal.chi_squared_test,0.435342,0.53747,0.480718,0.04270106,0.453217,0.076822,5,0,0.01
statistical.joint.feature_correlation,1.376539,1.638821,1.496298,0.1161339,1.419614,0.225594,5,0,0.05
statistical.marginal.inverse_cdf_distance,0.164625,0.185351,0.175613,0.006680028,0.176653,0.00359,5,0,3.14
statistical.marginal.inverse_kl_divergence,0.747084,0.794096,0.778526,0.01736054,0.785506,0.019371,5,0,0.01




[4m[1mPlugin : rtvae[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.nearest_syn_neighbor_distance,0.016063,0.071032,0.034103,0.01952105,0.026393,0.013731,5,0,0.0
sanity.inlier_probability,0.936652,0.993213,0.976018,0.02086361,0.988688,0.015837,5,0,0.0
sanity.outlier_probability,0.002262,0.006787,0.00362,0.001809955,0.002262,0.002262,5,0,0.0
statistical.marginal.jensenshannon_distance,0.140032,0.179218,0.16247,0.0148456,0.163827,0.024769,5,0,0.07
statistical.marginal.chi_squared_test,0.359606,0.634922,0.466525,0.1047146,0.453228,0.165523,5,0,0.01
statistical.joint.feature_correlation,4.21991,4.831208,4.567759,0.2396778,4.595315,0.434621,5,0,0.05
statistical.marginal.inverse_cdf_distance,0.187848,0.203239,0.194439,0.005132236,0.193118,0.004167,5,0,3.14
statistical.marginal.inverse_kl_divergence,0.724356,0.835845,0.800836,0.0427646,0.825537,0.051352,5,0,0.01




[4m[1mPlugin : ctgan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.nearest_syn_neighbor_distance,0.075443,0.204843,0.130247,0.04685122,0.131106,0.066229,5,0,0.0
sanity.inlier_probability,0.662896,0.968326,0.798643,0.1329886,0.728507,0.266968,5,0,0.0
sanity.outlier_probability,0.002262,0.015837,0.00724,0.005429864,0.004525,0.00905,5,0,0.0
statistical.marginal.jensenshannon_distance,0.2017,0.246703,0.226017,0.01520862,0.225817,0.01612,5,0,0.07
statistical.marginal.chi_squared_test,0.797104,0.977503,0.862118,0.06986974,0.81618,0.097451,5,0,0.01
statistical.joint.feature_correlation,3.571465,3.813441,3.716248,0.09456724,3.743393,0.163465,5,0,0.05
statistical.marginal.inverse_cdf_distance,0.159875,0.189102,0.176579,0.01100868,0.174845,0.017297,5,0,3.27
statistical.marginal.inverse_kl_divergence,0.805512,0.844651,0.825172,0.01529437,0.827586,0.028118,5,0,0.01




[4m[1mPlugin : bayesian_network[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.260181,0.285068,0.271946,0.009317312,0.266968,0.013575,5,0,0.0
sanity.nearest_syn_neighbor_distance,0.020114,0.063706,0.041631,0.0143073,0.040315,0.011931,5,0,0.0
sanity.inlier_probability,0.864253,0.99095,0.958824,0.04793818,0.9819,0.020362,5,0,0.0
sanity.outlier_probability,0.002262,0.004525,0.003167,0.001108366,0.002262,0.002262,5,0,0.0
statistical.marginal.jensenshannon_distance,0.044118,0.063144,0.04964,0.007043694,0.045915,0.004955,5,0,0.06
statistical.marginal.chi_squared_test,0.725626,0.908105,0.815949,0.05771189,0.8149,0.001311,5,0,0.01
statistical.joint.feature_correlation,0.927069,1.208824,1.069151,0.09864011,1.08198,0.132172,5,0,0.05
statistical.marginal.inverse_cdf_distance,0.176074,0.183477,0.177965,0.002780988,0.176902,0.000793,5,0,3.08
statistical.marginal.inverse_kl_divergence,0.971046,0.981031,0.977106,0.003288129,0.97793,0.000883,5,0,0.01




[4m[1mPlugin : copulagan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.nearest_syn_neighbor_distance,0.063495,0.116322,0.092883,0.018832,0.093733,0.026325,5,0,0.0
sanity.inlier_probability,0.755656,0.952489,0.879638,0.088037,0.950226,0.160633,5,0,0.0
sanity.outlier_probability,0.002262,0.006787,0.003167,0.00181,0.002262,0.0,5,0,0.0
statistical.marginal.jensenshannon_distance,0.252471,0.288157,0.263623,0.012972,0.257802,0.010257,5,0,0.07
statistical.marginal.chi_squared_test,0.84766,0.90564,0.884416,0.022411,0.897975,0.032371,5,0,0.01
statistical.joint.feature_correlation,3.704649,3.881834,3.778173,0.060201,3.767545,0.057725,5,0,0.05
statistical.marginal.inverse_cdf_distance,0.148951,0.191685,0.176476,0.014452,0.180243,0.003029,5,0,3.38
statistical.marginal.inverse_kl_divergence,0.745656,0.79461,0.780544,0.018141,0.786546,0.013002,5,0,0.01





In [5]:
import pandas as pd
import numpy as np

means = []
directions = None
for plugin in score:
    data = score[plugin]["mean"]
    if directions is None and len(score[plugin]["direction"].to_dict()) > 0:
        directions =  score[plugin]["direction"].to_dict()
    
    means.append(data)
    
out = pd.concat(means, axis = 1)
out.set_axis(score.keys(), axis=1, inplace=True)

bad_highlight = 'background-color: lightcoral;'
ok_highlight = 'background-color: green;'
default = ''

def highlights(row):    
    metric = row.name
    if directions[metric] == "minimize":
        best_val = np.min(row.values)
        worst_val = np.max(row)
    else:
        best_val = np.max(row.values)
        worst_val = np.min(row)
        
    styles = []
    for val in row.values:
        if val == best_val:
            styles.append(ok_highlight)
        elif val == worst_val:
            styles.append(bad_highlight)
        else:
            styles.append(default)
            
    return styles

out.style.apply(highlights, axis=1)

Unnamed: 0,uniform_sampler,nflow,privbayes,gaussian_copula,pategan,adsgan,marginal_distributions,tvae,rtvae,ctgan,bayesian_network,copulagan
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.271946,0.0
sanity.nearest_syn_neighbor_distance,0.138489,0.04652,0.102365,0.079813,0.080285,0.052031,0.183521,0.054804,0.034103,0.130247,0.041631,0.092883
sanity.inlier_probability,0.808597,0.970136,0.873303,0.846154,0.88371,0.923529,0.70362,0.962443,0.976018,0.798643,0.958824,0.879638
sanity.outlier_probability,0.008145,0.002715,0.00905,0.00362,0.008597,0.006787,0.012217,0.003167,0.00362,0.00724,0.003167,0.003167
statistical.marginal.jensenshannon_distance,0.272614,0.078018,0.194618,0.082348,0.175319,0.18637,0.272283,0.159785,0.16247,0.226017,0.04964,0.263623
statistical.marginal.chi_squared_test,0.996923,0.809211,0.687382,0.868966,0.442084,0.251475,0.997975,0.480718,0.466525,0.862118,0.815949,0.884416
statistical.joint.feature_correlation,3.874638,1.120998,3.946758,0.628132,4.363082,2.582731,3.90963,1.496298,4.567759,3.716248,1.069151,3.778173
statistical.marginal.inverse_cdf_distance,0.145705,0.181955,0.164977,0.175421,0.183422,0.182242,0.146102,0.175613,0.194439,0.176579,0.177965,0.176476
statistical.marginal.inverse_kl_divergence,0.775668,0.96056,0.826321,0.956474,0.78944,0.7029,0.776773,0.778526,0.800836,0.825172,0.977106,0.780544


# 