# Tutorial 2: Benchmarks

In [1]:
import warnings
import sys
warnings.filterwarnings('ignore')

from sklearn.datasets import load_diabetes
from synthcity.plugins import Plugins
import synthcity.logger as log

X, y = load_diabetes(return_X_y=True, as_frame=True)
X["target"] = y

X

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068330,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018118,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017282,-0.037344,-0.013840,-0.024993,-0.011080,-0.046879,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044528,-0.025930,220.0


## List the available generative models

In [2]:
from synthcity.plugins import Plugins

Plugins().list()

['gaussian_copula',
 'uniform_sampler',
 'adsgan',
 'pategan',
 'ctgan',
 'dummy_sampler',
 'tvae',
 'copulagan',
 'marginal_distributions',
 'privbayes']

## Benchmark the quality of plugins

In [3]:
from synthcity.benchmark import Benchmarks

score = Benchmarks.evaluate(
    Plugins().list(),
    X,
    sensitive_columns = ["sex"],
    synthetic_size = len(X),
    repeats = 5,
)


In [4]:
Benchmarks.print(score)


[4m[1mComparatives[0m[0m


Unnamed: 0,gaussian_copula,uniform_sampler,adsgan,pategan,ctgan,dummy_sampler,tvae,copulagan,marginal_distributions,privbayes
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.620814,0.0,0.0,0.0,0.0
sanity.avg_distance_nearest_neighbor,0.09431729,0.171701,0.117358,0.052857,0.111746,0.043744,0.06705,0.106615,0.150489,0.078724
sanity.inlier_probability,0.7900452,0.730317,0.844344,0.964253,0.822172,0.900905,0.937104,0.792308,0.776471,0.900905
sanity.outlier_probability,0.002714932,0.008145,0.007692,0.004072,0.009955,0.003167,0.005882,0.004977,0.006335,0.006787
statistical.inverse_kl_divergence,0.9541018,0.776156,0.842302,0.947653,0.840967,0.984443,0.826513,0.837532,0.775535,0.831159
statistical.kolmogorov_smirnov_test,0.9246812,0.758536,0.780296,0.883093,0.77536,0.965858,0.845002,0.767832,0.756191,0.831756
statistical.chi_squared_test,0.8651301,0.997699,0.833064,0.957089,0.879774,0.925302,0.5909,0.937246,0.996038,0.66484
statistical.maximum_mean_discrepancy,0.004960897,0.005908,0.005576,0.004736,0.006894,0.002441,0.006782,0.005457,0.005732,0.013378
statistical.inverse_cdf_distance,1.686226,1.472834,1.634876,1.733328,1.559593,1.736441,1.815174,1.719994,1.479462,1.634845



[4m[1mPlugin : gaussian_copula[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.078227,0.1143565,0.09431729,0.01183,0.090802,0.007317,5,0,0.01
sanity.inlier_probability,0.733032,0.9434389,0.7900452,0.077419,0.757919,0.013575,5,0,0.01
sanity.outlier_probability,0.002262,0.004524887,0.002714932,0.000905,0.002262,0.0,5,0,0.01
statistical.inverse_kl_divergence,0.948975,0.9623781,0.9541018,0.004496,0.953048,0.002438,5,0,0.01
statistical.kolmogorov_smirnov_test,0.908474,0.9343891,0.9246812,0.00915,0.927602,0.009667,5,0,0.01
statistical.chi_squared_test,0.811883,0.9017844,0.8651301,0.042158,0.897017,0.084544,5,0,0.02
statistical.maximum_mean_discrepancy,0.004823,0.005189493,0.004960897,0.000162,0.004839,0.000299,5,0,0.02
statistical.inverse_cdf_distance,1.599641,1.735801,1.686226,0.047613,1.689405,0.039944,5,0,3.28




[4m[1mPlugin : uniform_sampler[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.148075,0.222863,0.171701,0.026416,0.162947,0.010989,5,0,0.01
sanity.inlier_probability,0.61086,0.807692,0.730317,0.066508,0.748869,0.049774,5,0,0.01
sanity.outlier_probability,0.004525,0.013575,0.008145,0.003676,0.006787,0.006787,5,0,0.01
statistical.inverse_kl_divergence,0.768795,0.784707,0.776156,0.006314,0.773479,0.011292,5,0,0.01
statistical.kolmogorov_smirnov_test,0.746812,0.766763,0.758536,0.007282,0.757507,0.009872,5,0,0.01
statistical.chi_squared_test,0.995935,0.999811,0.997699,0.001399,0.997742,0.002071,5,0,0.01
statistical.maximum_mean_discrepancy,0.005496,0.006135,0.005908,0.000247,0.006015,0.000374,5,0,0.02
statistical.inverse_cdf_distance,1.431059,1.523621,1.472834,0.032032,1.460931,0.036324,5,0,4.28




[4m[1mPlugin : adsgan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.070636,0.185411,0.117358,0.046552,0.085755,0.077072,5,0,0.01
sanity.inlier_probability,0.71267,0.938914,0.844344,0.09479,0.891403,0.180995,5,0,0.01
sanity.outlier_probability,0.002262,0.020362,0.007692,0.006802,0.004525,0.006787,5,0,0.01
statistical.inverse_kl_divergence,0.813425,0.85852,0.842302,0.015928,0.847536,0.015278,5,0,0.01
statistical.kolmogorov_smirnov_test,0.744961,0.798848,0.780296,0.019465,0.785274,0.020362,5,0,0.01
statistical.chi_squared_test,0.686603,0.986828,0.833064,0.112228,0.875057,0.17264,5,0,0.02
statistical.maximum_mean_discrepancy,0.004671,0.007144,0.005576,0.000913,0.005142,0.001165,5,0,0.03
statistical.inverse_cdf_distance,1.424472,1.827762,1.634876,0.158389,1.576078,0.274195,5,0,2.92




[4m[1mPlugin : pategan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.021295,0.095035,0.052857,0.025265,0.051827,0.027185,5,0,0.01
sanity.inlier_probability,0.902715,0.986425,0.964253,0.031395,0.975113,0.015837,5,0,0.01
sanity.outlier_probability,0.002262,0.004525,0.004072,0.000905,0.004525,0.0,5,0,0.01
statistical.inverse_kl_divergence,0.927014,0.955893,0.947653,0.010782,0.954082,0.007512,5,0,0.01
statistical.kolmogorov_smirnov_test,0.854587,0.893871,0.883093,0.01479,0.891197,0.010284,5,0,0.01
statistical.chi_squared_test,0.899755,0.996063,0.957089,0.04354,0.985132,0.087628,5,0,0.01
statistical.maximum_mean_discrepancy,0.0045,0.005051,0.004736,0.000202,0.00463,0.000274,5,0,0.02
statistical.inverse_cdf_distance,1.619784,1.832577,1.733328,0.069611,1.734509,0.052886,5,0,2.81




[4m[1mPlugin : ctgan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.07332,0.184338,0.111746,0.039835,0.108854,0.037161,5,0,0.01
sanity.inlier_probability,0.70362,0.968326,0.822172,0.107351,0.771493,0.196833,5,0,0.01
sanity.outlier_probability,0.002262,0.033937,0.009955,0.012108,0.004525,0.004525,5,0,0.01
statistical.inverse_kl_divergence,0.784383,0.882742,0.840967,0.038353,0.863075,0.061521,5,0,0.01
statistical.kolmogorov_smirnov_test,0.706705,0.816742,0.77536,0.046261,0.806458,0.081242,5,0,0.01
statistical.chi_squared_test,0.740133,0.986519,0.879774,0.086223,0.88624,0.108504,5,0,0.02
statistical.maximum_mean_discrepancy,0.005613,0.008743,0.006894,0.00106,0.006767,0.000945,5,0,0.02
statistical.inverse_cdf_distance,1.502298,1.592275,1.559593,0.031933,1.56488,0.032656,5,0,3.24




[4m[1mPlugin : dummy_sampler[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.608597,0.633484,0.620814,0.008417,0.622172,0.00905,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.028288,0.056317,0.043744,0.009253,0.043086,0.006989,5,0,0.01
sanity.inlier_probability,0.850679,0.968326,0.900905,0.050232,0.873303,0.097285,5,0,0.01
sanity.outlier_probability,0.002262,0.006787,0.003167,0.00181,0.002262,0.0,5,0,0.01
statistical.inverse_kl_divergence,0.967309,0.990498,0.984443,0.00865,0.987651,0.002257,5,0,0.01
statistical.kolmogorov_smirnov_test,0.960921,0.970794,0.965858,0.003236,0.965652,0.002674,5,0,0.01
statistical.chi_squared_test,0.723991,0.999342,0.925302,0.106639,0.997046,0.091242,5,0,0.02
statistical.maximum_mean_discrepancy,0.002185,0.003003,0.002441,0.00031,0.002253,0.00034,5,0,0.02
statistical.inverse_cdf_distance,1.687819,1.79454,1.736441,0.036018,1.724818,0.034911,5,0,2.98




[4m[1mPlugin : tvae[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.036052,0.115205,0.06705,0.029799,0.063532,0.045928,5,0,0.01
sanity.inlier_probability,0.830317,0.993213,0.937104,0.05978,0.968326,0.065611,5,0,0.01
sanity.outlier_probability,0.002262,0.0181,0.005882,0.006171,0.002262,0.002262,5,0,0.01
statistical.inverse_kl_divergence,0.802819,0.85066,0.826513,0.017821,0.829815,0.029171,5,0,0.01
statistical.kolmogorov_smirnov_test,0.837515,0.851913,0.845002,0.004992,0.844714,0.006376,5,0,0.01
statistical.chi_squared_test,0.536459,0.633252,0.5909,0.042977,0.610156,0.090347,5,0,0.01
statistical.maximum_mean_discrepancy,0.00477,0.009352,0.006782,0.001544,0.006497,0.00151,5,0,0.02
statistical.inverse_cdf_distance,1.679885,1.963482,1.815174,0.097769,1.818049,0.121337,5,0,2.93




[4m[1mPlugin : copulagan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.0746,0.135025,0.106615,0.023274,0.096933,0.037264,5,0,0.01
sanity.inlier_probability,0.723982,0.972851,0.792308,0.092294,0.755656,0.047511,5,0,0.01
sanity.outlier_probability,0.002262,0.011312,0.004977,0.003325,0.004525,0.002262,5,0,0.01
statistical.inverse_kl_divergence,0.801166,0.869206,0.837532,0.025119,0.849944,0.03592,5,0,0.01
statistical.kolmogorov_smirnov_test,0.714315,0.809955,0.767832,0.034759,0.778898,0.050596,5,0,0.01
statistical.chi_squared_test,0.893514,0.992957,0.937246,0.044646,0.90812,0.088934,5,0,0.02
statistical.maximum_mean_discrepancy,0.005178,0.005889,0.005457,0.000261,0.005486,0.000343,5,0,0.02
statistical.inverse_cdf_distance,1.492142,1.935774,1.719994,0.160848,1.696957,0.243871,5,0,3.53




[4m[1mPlugin : marginal_distributions[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.136322,0.181806,0.150489,0.016699,0.14667,0.01479,5,0,0.01
sanity.inlier_probability,0.721719,0.819005,0.776471,0.036632,0.771493,0.058824,5,0,0.01
sanity.outlier_probability,0.002262,0.015837,0.006335,0.005039,0.004525,0.004525,5,0,0.01
statistical.inverse_kl_divergence,0.768971,0.787518,0.775535,0.006492,0.773545,0.005331,5,0,0.01
statistical.kolmogorov_smirnov_test,0.754216,0.760592,0.756191,0.002394,0.754628,0.002262,5,0,0.01
statistical.chi_squared_test,0.995494,0.996848,0.996038,0.000466,0.995907,0.00047,5,0,0.02
statistical.maximum_mean_discrepancy,0.005271,0.006483,0.005732,0.000441,0.005792,0.000511,5,0,0.02
statistical.inverse_cdf_distance,1.44715,1.497032,1.479462,0.017879,1.482207,0.018219,5,0,3.66




[4m[1mPlugin : privbayes[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.037689,0.10284,0.078724,0.022332,0.086372,0.015922,5,0,0.01
sanity.inlier_probability,0.839367,0.984163,0.900905,0.047054,0.88914,0.0181,5,0,0.01
sanity.outlier_probability,0.002262,0.015837,0.006787,0.005724,0.002262,0.00905,5,0,0.01
statistical.inverse_kl_divergence,0.800707,0.857391,0.831159,0.023107,0.830583,0.045658,5,0,0.01
statistical.kolmogorov_smirnov_test,0.823324,0.842657,0.831756,0.008087,0.82682,0.014809,5,0,0.01
statistical.chi_squared_test,0.444864,0.901731,0.66484,0.170038,0.623948,0.276039,5,0,0.02
statistical.maximum_mean_discrepancy,0.012168,0.016321,0.013378,0.001496,0.012818,0.000347,5,0,0.02
statistical.inverse_cdf_distance,1.580492,1.709406,1.634845,0.047112,1.621202,0.06894,5,0,3.19





In [5]:
import pandas as pd
import numpy as np

means = []
for plugin in score:
    data = score[plugin]["mean"]
    good_scores =  score[plugin]["ok_score"].to_dict()
    bad_scores =  score[plugin]["bad_score"].to_dict()
    means.append(data)
    
out = pd.concat(means, axis = 1)
out.set_axis(score.keys(), axis=1, inplace=True)

bad_highlight = 'background-color: lightcoral;'
ok_highlight = 'background-color: green;'
default = ''

def highlights(row):    
    metric = row.name
    if good_scores[metric] < bad_scores[metric]:
        best_val = np.min(row.values)
        worst_val = np.max(row)
    else:
        best_val = np.max(row.values)
        worst_val = np.min(row)
        
    styles = []
    for val in row.values:
        if val == best_val:
            styles.append(ok_highlight)
        elif val == worst_val:
            styles.append(bad_highlight)
        else:
            styles.append(default)
            
    return styles

out.style.apply(highlights, axis=1)

Unnamed: 0,gaussian_copula,uniform_sampler,adsgan,pategan,ctgan,dummy_sampler,tvae,copulagan,marginal_distributions,privbayes
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.620814,0.0,0.0,0.0,0.0
sanity.avg_distance_nearest_neighbor,0.094317,0.171701,0.117358,0.052857,0.111746,0.043744,0.06705,0.106615,0.150489,0.078724
sanity.inlier_probability,0.790045,0.730317,0.844344,0.964253,0.822172,0.900905,0.937104,0.792308,0.776471,0.900905
sanity.outlier_probability,0.002715,0.008145,0.007692,0.004072,0.009955,0.003167,0.005882,0.004977,0.006335,0.006787
statistical.inverse_kl_divergence,0.954102,0.776156,0.842302,0.947653,0.840967,0.984443,0.826513,0.837532,0.775535,0.831159
statistical.kolmogorov_smirnov_test,0.924681,0.758536,0.780296,0.883093,0.77536,0.965858,0.845002,0.767832,0.756191,0.831756
statistical.chi_squared_test,0.86513,0.997699,0.833064,0.957089,0.879774,0.925302,0.5909,0.937246,0.996038,0.66484
statistical.maximum_mean_discrepancy,0.004961,0.005908,0.005576,0.004736,0.006894,0.002441,0.006782,0.005457,0.005732,0.013378
statistical.inverse_cdf_distance,1.686226,1.472834,1.634876,1.733328,1.559593,1.736441,1.815174,1.719994,1.479462,1.634845


# 