# Tutorial 2: Benchmarks

In [1]:
import warnings
import sys
warnings.filterwarnings('ignore')

from sklearn.datasets import load_diabetes
from synthcity.plugins import Plugins
import synthcity.logger as log

X, y = load_diabetes(return_X_y=True, as_frame=True)
X["target"] = y

X

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068330,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018118,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017282,-0.037344,-0.013840,-0.024993,-0.011080,-0.046879,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044528,-0.025930,220.0


## List the available generative models

In [2]:
from synthcity.plugins import Plugins

plugins = Plugins().list(skip_debug = True)

plugins

['privbayes',
 'rtvae',
 'bayesian_network',
 'nflow',
 'gaussian_copula',
 'uniform_sampler',
 'pategan',
 'adsgan',
 'copulagan',
 'tvae',
 'marginal_distributions',
 'ctgan']

## Benchmark the quality of plugins

In [3]:
from synthcity.benchmark import Benchmarks

score = Benchmarks.evaluate(
    Plugins().list(),
    X,
    sensitive_columns = ["sex"],
    synthetic_size = len(X),
    repeats = 5,
)


In [4]:
Benchmarks.print(score)


[4m[1mComparatives[0m[0m


Unnamed: 0,privbayes,rtvae,bayesian_network,nflow,gaussian_copula,uniform_sampler,pategan,adsgan,copulagan,tvae,marginal_distributions,ctgan
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sanity.common_rows_proportion,0.0,0.0,0.286878,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sanity.avg_distance_nearest_neighbor,0.09339,0.042299,0.034911,0.036685,0.087864,0.179756,0.060651,0.04419,0.100988,0.09254,0.167104,0.120215
sanity.inlier_probability,0.874208,0.957919,0.950679,0.978733,0.806787,0.719457,0.909955,0.959729,0.807692,0.827602,0.744344,0.743439
sanity.outlier_probability,0.004072,0.00543,0.004072,0.003167,0.00362,0.014027,0.004072,0.00362,0.002715,0.008145,0.004072,0.004072
statistical.inverse_kl_divergence,0.81958,0.821315,0.986395,0.956035,0.964581,0.778577,0.740388,0.762003,0.809209,0.789666,0.77559,0.835876
statistical.kolmogorov_smirnov_test,0.829206,0.833032,0.962567,0.909585,0.933731,0.765076,0.800782,0.835459,0.731222,0.833896,0.757795,0.764377
statistical.chi_squared_test,0.615097,0.504346,0.905788,0.823043,0.942214,0.997867,0.372824,0.361189,0.949391,0.539589,0.997808,0.89462
statistical.maximum_mean_discrepancy,0.014787,0.005498,0.002256,0.004638,0.005013,0.005596,0.0075,0.006336,0.007023,0.006466,0.005876,0.008604
statistical.inverse_cdf_distance,1.6761,1.905032,1.738172,1.862435,1.721277,1.462218,2.011956,1.853365,1.685857,1.808113,1.471777,1.653022



[4m[1mPlugin : privbayes[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.072762,0.126178,0.09339,0.019977,0.086377,0.029406,5,0,0.01
sanity.inlier_probability,0.773756,0.934389,0.874208,0.060176,0.909502,0.079186,5,0,0.01
sanity.outlier_probability,0.002262,0.00905,0.004072,0.002638,0.002262,0.002262,5,0,0.01
statistical.inverse_kl_divergence,0.78029,0.86739,0.81958,0.030258,0.814914,0.037924,5,0,0.01
statistical.kolmogorov_smirnov_test,0.819005,0.856849,0.829206,0.01411,0.823324,0.007199,5,0,0.01
statistical.chi_squared_test,0.541483,0.724916,0.615097,0.06784,0.630578,0.088958,5,0,0.02
statistical.maximum_mean_discrepancy,0.012002,0.021369,0.014787,0.003394,0.013231,0.001778,5,0,0.02
statistical.inverse_cdf_distance,1.563666,1.882878,1.6761,0.116091,1.647806,0.134203,5,0,3.75




[4m[1mPlugin : rtvae[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.024097,0.093791,0.042299,0.026068,0.029131,0.008841,5,0,0.01
sanity.inlier_probability,0.877828,0.993213,0.957919,0.041288,0.968326,0.0181,5,0,0.01
sanity.outlier_probability,0.002262,0.013575,0.00543,0.004433,0.002262,0.004525,5,0,0.01
statistical.inverse_kl_divergence,0.756518,0.862741,0.821315,0.037475,0.830838,0.042903,5,0,0.01
statistical.kolmogorov_smirnov_test,0.80687,0.868778,0.833032,0.025891,0.814891,0.045249,5,0,0.01
statistical.chi_squared_test,0.36271,0.633857,0.504346,0.094245,0.543477,0.106591,5,0,0.02
statistical.maximum_mean_discrepancy,0.004877,0.005952,0.005498,0.000359,0.005614,0.000281,5,0,0.03
statistical.inverse_cdf_distance,1.64009,2.224444,1.905032,0.188297,1.872614,0.086792,5,0,3.6




[4m[1mPlugin : bayesian_network[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.2737557,0.303167,0.286878,0.010358,0.2873303,0.013575,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.02177756,0.046439,0.034911,0.010727,0.03856413,0.022563,5,0,0.01
sanity.inlier_probability,0.9004525,0.993213,0.950679,0.040608,0.9728507,0.081448,5,0,0.01
sanity.outlier_probability,0.002262443,0.006787,0.004072,0.002217,0.002262443,0.004525,5,0,0.01
statistical.inverse_kl_divergence,0.9800875,0.992214,0.986395,0.004368,0.9849591,0.005977,5,0,0.01
statistical.kolmogorov_smirnov_test,0.9549568,0.967914,0.962567,0.005511,0.9664747,0.009872,5,0,0.01
statistical.chi_squared_test,0.8145726,0.998353,0.905788,0.080438,0.9044618,0.175848,5,0,0.02
statistical.maximum_mean_discrepancy,0.002068674,0.002443,0.002256,0.00014,0.002210814,0.000224,5,0,0.03
statistical.inverse_cdf_distance,1.67566,1.772678,1.738172,0.032875,1.744971,0.008291,5,0,3.49




[4m[1mPlugin : nflow[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.021752,0.058983,0.036685,0.012442,0.033375,0.008328,5,0,0.01
sanity.inlier_probability,0.963801,0.986425,0.978733,0.008044,0.9819,0.006787,5,0,0.01
sanity.outlier_probability,0.002262,0.004525,0.003167,0.001108,0.002262,0.002262,5,0,0.01
statistical.inverse_kl_divergence,0.946761,0.964514,0.956035,0.007416,0.959523,0.014232,5,0,0.01
statistical.kolmogorov_smirnov_test,0.902921,0.923077,0.909585,0.007292,0.908063,0.006787,5,0,0.01
statistical.chi_squared_test,0.787097,0.908433,0.823043,0.044101,0.810539,0.024587,5,0,0.02
statistical.maximum_mean_discrepancy,0.004184,0.004882,0.004638,0.00025,0.00466,0.000243,5,0,0.02
statistical.inverse_cdf_distance,1.762678,1.998981,1.862435,0.083921,1.843623,0.110473,5,0,3.37




[4m[1mPlugin : gaussian_copula[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.051118,0.1176197,0.087864,0.021947,0.087925,0.01725,5,0,0.01
sanity.inlier_probability,0.728507,0.9683258,0.806787,0.084041,0.773756,0.038462,5,0,0.01
sanity.outlier_probability,0.002262,0.00678733,0.00362,0.00181,0.002262,0.002262,5,0,0.01
statistical.inverse_kl_divergence,0.962252,0.9688494,0.964581,0.002347,0.963954,0.002305,5,0,0.01
statistical.kolmogorov_smirnov_test,0.930687,0.9380913,0.933731,0.002473,0.933361,0.001851,5,0,0.01
statistical.chi_squared_test,0.902792,0.9973614,0.942214,0.044939,0.909091,0.092236,5,0,0.02
statistical.maximum_mean_discrepancy,0.004429,0.005732667,0.005013,0.000422,0.004997,0.000228,5,0,0.02
statistical.inverse_cdf_distance,1.683905,1.787131,1.721277,0.035137,1.712013,0.018395,5,0,3.2




[4m[1mPlugin : uniform_sampler[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.103379,0.220642,0.179756,0.042478,0.186003,0.04583,5,0,0.01
sanity.inlier_probability,0.608597,0.891403,0.719457,0.101473,0.733032,0.115385,5,0,0.01
sanity.outlier_probability,0.002262,0.029412,0.014027,0.008751,0.013575,0.002262,5,0,0.01
statistical.inverse_kl_divergence,0.776423,0.780844,0.778577,0.001449,0.778732,0.001148,5,0,0.01
statistical.kolmogorov_smirnov_test,0.75977,0.774167,0.765076,0.005899,0.760798,0.009461,5,0,0.01
statistical.chi_squared_test,0.995541,0.999611,0.997867,0.001471,0.998526,0.001963,5,0,0.02
statistical.maximum_mean_discrepancy,0.004756,0.006137,0.005596,0.000474,0.005596,0.000398,5,0,0.02
statistical.inverse_cdf_distance,1.445004,1.493235,1.462218,0.018441,1.454119,0.026835,5,0,4.4




[4m[1mPlugin : pategan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.020998,0.103726,0.060651,0.030923,0.063509,0.05149,5,0,0.01
sanity.inlier_probability,0.850679,0.988688,0.909955,0.058271,0.891403,0.117647,5,0,0.01
sanity.outlier_probability,0.002262,0.006787,0.004072,0.001693,0.004525,0.002262,5,0,0.01
statistical.inverse_kl_divergence,0.653741,0.814756,0.740388,0.060019,0.726773,0.094369,5,0,0.01
statistical.kolmogorov_smirnov_test,0.749897,0.837721,0.800782,0.029506,0.799671,0.023858,5,0,0.01
statistical.chi_squared_test,0.266041,0.452543,0.372824,0.086406,0.434647,0.174048,5,0,0.02
statistical.maximum_mean_discrepancy,0.005342,0.01077,0.0075,0.002136,0.006421,0.003548,5,0,0.03
statistical.inverse_cdf_distance,1.685675,2.341537,2.011956,0.250614,2.051012,0.439387,5,0,3.1




[4m[1mPlugin : adsgan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.017023,0.104764,0.04419,0.03137,0.031373,0.01654,5,0,0.01
sanity.inlier_probability,0.877828,0.988688,0.959729,0.041874,0.9819,0.022624,5,0,0.01
sanity.outlier_probability,0.002262,0.004525,0.00362,0.001108,0.004525,0.002262,5,0,0.01
statistical.inverse_kl_divergence,0.706308,0.812376,0.762003,0.039856,0.744343,0.058774,5,0,0.01
statistical.kolmogorov_smirnov_test,0.831139,0.842452,0.835459,0.004193,0.834225,0.005965,5,0,0.01
statistical.chi_squared_test,0.09058,0.541782,0.361189,0.150849,0.36265,0.089567,5,0,0.02
statistical.maximum_mean_discrepancy,0.005445,0.006998,0.006336,0.000603,0.006417,0.001066,5,0,0.02
statistical.inverse_cdf_distance,1.694471,1.996852,1.853365,0.12335,1.885803,0.24101,5,0,3.25




[4m[1mPlugin : copulagan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.064331,0.142446,0.100988,0.026166,0.101665,0.026281,5,0,0.01
sanity.inlier_probability,0.68552,0.9819,0.807692,0.114748,0.762443,0.192308,5,0,0.01
sanity.outlier_probability,0.002262,0.004525,0.002715,0.000905,0.002262,0.0,5,0,0.01
statistical.inverse_kl_divergence,0.787136,0.850287,0.809209,0.021446,0.803739,0.002637,5,0,0.01
statistical.kolmogorov_smirnov_test,0.713904,0.776429,0.731222,0.023449,0.717195,0.015014,5,0,0.01
statistical.chi_squared_test,0.841967,0.997004,0.949391,0.054892,0.968307,0.011379,5,0,0.02
statistical.maximum_mean_discrepancy,0.006113,0.007844,0.007023,0.000654,0.00719,0.001103,5,0,0.02
statistical.inverse_cdf_distance,1.481147,1.874608,1.685857,0.126093,1.692527,0.061268,5,0,3.29




[4m[1mPlugin : tvae[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.030157,0.127607,0.09254,0.033686,0.102037,0.02464,5,0,0.01
sanity.inlier_probability,0.69457,0.99095,0.827602,0.103633,0.825792,0.133484,5,0,0.01
sanity.outlier_probability,0.002262,0.029412,0.008145,0.01067,0.002262,0.002262,5,0,0.01
statistical.inverse_kl_divergence,0.754688,0.815455,0.789666,0.022579,0.791007,0.035436,5,0,0.01
statistical.kolmogorov_smirnov_test,0.819622,0.842452,0.833896,0.008352,0.833813,0.01049,5,0,0.01
statistical.chi_squared_test,0.402914,0.719909,0.539589,0.112009,0.534055,0.146814,5,0,0.02
statistical.maximum_mean_discrepancy,0.005482,0.00827,0.006466,0.001033,0.006227,0.001332,5,0,0.02
statistical.inverse_cdf_distance,1.689774,1.897131,1.808113,0.075681,1.804184,0.110831,5,0,3.37




[4m[1mPlugin : marginal_distributions[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.138838,0.191833,0.167104,0.016893,0.168057,0.004936,5,0,0.01
sanity.inlier_probability,0.680995,0.809955,0.744344,0.042133,0.737557,0.031674,5,0,0.01
sanity.outlier_probability,0.002262,0.004525,0.004072,0.000905,0.004525,0.0,5,0,0.01
statistical.inverse_kl_divergence,0.769383,0.78217,0.77559,0.004702,0.77561,0.007568,5,0,0.01
statistical.kolmogorov_smirnov_test,0.750926,0.762443,0.757795,0.004284,0.75977,0.00617,5,0,0.01
statistical.chi_squared_test,0.995519,0.998882,0.997808,0.001227,0.998168,0.001134,5,0,0.02
statistical.maximum_mean_discrepancy,0.005561,0.006372,0.005876,0.000291,0.005833,0.000368,5,0,0.02
statistical.inverse_cdf_distance,1.434748,1.546308,1.471777,0.043378,1.442242,0.056183,5,0,4.07




[4m[1mPlugin : ctgan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.100141,0.149972,0.120215,0.019299,0.109685,0.030417,5,0,0.01
sanity.inlier_probability,0.669683,0.900452,0.743439,0.081518,0.726244,0.036199,5,0,0.01
sanity.outlier_probability,0.002262,0.006787,0.004072,0.002217,0.002262,0.004525,5,0,0.01
statistical.inverse_kl_divergence,0.803469,0.868282,0.835876,0.021839,0.829938,0.021514,5,0,0.01
statistical.kolmogorov_smirnov_test,0.730975,0.817565,0.764377,0.032228,0.751131,0.045866,5,0,0.01
statistical.chi_squared_test,0.80974,0.968867,0.89462,0.051177,0.904922,0.023313,5,0,0.02
statistical.maximum_mean_discrepancy,0.006724,0.013442,0.008604,0.002492,0.007372,0.001484,5,0,0.02
statistical.inverse_cdf_distance,1.430205,1.875792,1.653022,0.14091,1.653291,0.002754,5,0,3.53





In [5]:
import pandas as pd
import numpy as np

means = []
for plugin in score:
    data = score[plugin]["mean"]
    good_scores =  score[plugin]["ok_score"].to_dict()
    bad_scores =  score[plugin]["bad_score"].to_dict()
    means.append(data)
    
out = pd.concat(means, axis = 1)
out.set_axis(score.keys(), axis=1, inplace=True)

bad_highlight = 'background-color: lightcoral;'
ok_highlight = 'background-color: green;'
default = ''

def highlights(row):    
    metric = row.name
    if good_scores[metric] < bad_scores[metric]:
        best_val = np.min(row.values)
        worst_val = np.max(row)
    else:
        best_val = np.max(row.values)
        worst_val = np.min(row)
        
    styles = []
    for val in row.values:
        if val == best_val:
            styles.append(ok_highlight)
        elif val == worst_val:
            styles.append(bad_highlight)
        else:
            styles.append(default)
            
    return styles

out.style.apply(highlights, axis=1)

Unnamed: 0,privbayes,rtvae,bayesian_network,nflow,gaussian_copula,uniform_sampler,pategan,adsgan,copulagan,tvae,marginal_distributions,ctgan
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sanity.common_rows_proportion,0.0,0.0,0.286878,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sanity.avg_distance_nearest_neighbor,0.09339,0.042299,0.034911,0.036685,0.087864,0.179756,0.060651,0.04419,0.100988,0.09254,0.167104,0.120215
sanity.inlier_probability,0.874208,0.957919,0.950679,0.978733,0.806787,0.719457,0.909955,0.959729,0.807692,0.827602,0.744344,0.743439
sanity.outlier_probability,0.004072,0.00543,0.004072,0.003167,0.00362,0.014027,0.004072,0.00362,0.002715,0.008145,0.004072,0.004072
statistical.inverse_kl_divergence,0.81958,0.821315,0.986395,0.956035,0.964581,0.778577,0.740388,0.762003,0.809209,0.789666,0.77559,0.835876
statistical.kolmogorov_smirnov_test,0.829206,0.833032,0.962567,0.909585,0.933731,0.765076,0.800782,0.835459,0.731222,0.833896,0.757795,0.764377
statistical.chi_squared_test,0.615097,0.504346,0.905788,0.823043,0.942214,0.997867,0.372824,0.361189,0.949391,0.539589,0.997808,0.89462
statistical.maximum_mean_discrepancy,0.014787,0.005498,0.002256,0.004638,0.005013,0.005596,0.0075,0.006336,0.007023,0.006466,0.005876,0.008604
statistical.inverse_cdf_distance,1.6761,1.905032,1.738172,1.862435,1.721277,1.462218,2.011956,1.853365,1.685857,1.808113,1.471777,1.653022


# 