# Tutorial 2: Benchmarks

In [1]:
import warnings
import sys
warnings.filterwarnings('ignore')

from sklearn.datasets import load_diabetes
from synthcity.plugins import Plugins
import synthcity.logger as log

X, y = load_diabetes(return_X_y=True, as_frame=True)
X["target"] = y

X

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068330,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018118,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017282,-0.037344,-0.013840,-0.024993,-0.011080,-0.046879,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044528,-0.025930,220.0


## List the available generative models

In [2]:
from synthcity.plugins import Plugins

plugins = Plugins().list(skip_debug = True)

plugins

['privbayes',
 'ctgan',
 'pategan',
 'rtvae',
 'tvae',
 'copulagan',
 'gaussian_copula',
 'adsgan',
 'bayesian_network',
 'uniform_sampler',
 'marginal_distributions']

## Benchmark the quality of plugins

In [3]:
from synthcity.benchmark import Benchmarks

score = Benchmarks.evaluate(
    Plugins().list(),
    X,
    sensitive_columns = ["sex"],
    synthetic_size = len(X),
    repeats = 5,
)


In [4]:
Benchmarks.print(score)


[4m[1mComparatives[0m[0m


Unnamed: 0,privbayes,ctgan,pategan,rtvae,tvae,copulagan,gaussian_copula,adsgan,bayesian_network,uniform_sampler,marginal_distributions
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2959276,0.0,0.0
sanity.avg_distance_nearest_neighbor,0.064546,0.12147,0.062627,0.029637,0.098334,0.087193,0.08172792,0.041681,0.03617961,0.185152,0.184771
sanity.inlier_probability,0.941629,0.828507,0.919005,0.967421,0.833484,0.868326,0.8791855,0.960181,0.9497738,0.7,0.69457
sanity.outlier_probability,0.004525,0.013122,0.004072,0.004525,0.008145,0.002262,0.002262443,0.00362,0.00361991,0.011312,0.00724
statistical.inverse_kl_divergence,0.8198,0.834569,0.792611,0.792042,0.791295,0.81835,0.9612365,0.710925,0.9846946,0.776488,0.780241
statistical.kolmogorov_smirnov_test,0.822007,0.762443,0.806952,0.824064,0.828507,0.758001,0.920074,0.788276,0.9659811,0.75652,0.761826
statistical.chi_squared_test,0.657202,0.957609,0.489821,0.479696,0.455625,0.953235,0.901903,0.341815,0.8704161,0.997949,0.9983
statistical.maximum_mean_discrepancy,0.015637,0.020362,0.005665,0.005905,0.00641,0.00644,0.005000748,0.00709,0.002202179,0.005956,0.005888
statistical.inverse_cdf_distance,1.61798,1.60757,1.802995,1.92629,1.79151,1.695818,1.704147,1.957537,1.713826,1.462954,1.480024



[4m[1mPlugin : privbayes[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.03
sanity.avg_distance_nearest_neighbor,0.035143,0.114975,0.064546,0.033119,0.042493,0.05639,5,0,0.01
sanity.inlier_probability,0.873303,0.986425,0.941629,0.052191,0.9819,0.10181,5,0,0.01
sanity.outlier_probability,0.002262,0.006787,0.004525,0.001431,0.004525,0.0,5,0,0.01
statistical.inverse_kl_divergence,0.794754,0.837684,0.8198,0.017307,0.824135,0.032487,5,0,0.01
statistical.kolmogorov_smirnov_test,0.803784,0.843069,0.822007,0.014884,0.828054,0.022624,5,0,0.02
statistical.chi_squared_test,0.533601,0.811995,0.657202,0.11329,0.635268,0.220957,5,0,0.02
statistical.maximum_mean_discrepancy,0.012555,0.019213,0.015637,0.002242,0.014838,0.002078,5,0,0.03
statistical.inverse_cdf_distance,1.516258,1.772972,1.61798,0.092582,1.601851,0.122779,5,0,3.32




[4m[1mPlugin : ctgan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.072324,0.1826,0.12147,0.042363,0.137684,0.065734,5,0,0.01
sanity.inlier_probability,0.742081,0.943439,0.828507,0.076005,0.830317,0.124434,5,0,0.01
sanity.outlier_probability,0.002262,0.036199,0.013122,0.012175,0.00905,0.00905,5,0,0.01
statistical.inverse_kl_divergence,0.804342,0.851273,0.834569,0.019029,0.84803,0.029548,5,0,0.01
statistical.kolmogorov_smirnov_test,0.72028,0.787947,0.762443,0.026087,0.777458,0.038667,5,0,0.01
statistical.chi_squared_test,0.872543,0.997348,0.957609,0.045429,0.974904,0.040294,5,0,0.02
statistical.maximum_mean_discrepancy,0.005867,0.044576,0.020362,0.01514,0.014486,0.024813,5,0,0.03
statistical.inverse_cdf_distance,1.407536,1.757937,1.60757,0.133428,1.597082,0.221236,5,0,3.97




[4m[1mPlugin : pategan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.021451,0.137096,0.062627,0.044799,0.039169,0.066255,5,0,0.01
sanity.inlier_probability,0.782805,0.9819,0.919005,0.077327,0.972851,0.09276,5,0,0.01
sanity.outlier_probability,0.002262,0.006787,0.004072,0.002217,0.002262,0.004525,5,0,0.01
statistical.inverse_kl_divergence,0.754658,0.838081,0.792611,0.029207,0.78525,0.037501,5,0,0.01
statistical.kolmogorov_smirnov_test,0.770671,0.850473,0.806952,0.029622,0.803579,0.048128,5,0,0.01
statistical.chi_squared_test,0.438619,0.607305,0.489821,0.063462,0.452483,0.061641,5,0,0.02
statistical.maximum_mean_discrepancy,0.004719,0.00634,0.005665,0.000594,0.005558,0.000813,5,0,0.03
statistical.inverse_cdf_distance,1.564553,2.020821,1.802995,0.154585,1.775642,0.162977,5,0,3.26




[4m[1mPlugin : rtvae[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.0196,0.038071,0.029637,0.007647,0.029851,0.015458,5,0,0.01
sanity.inlier_probability,0.945701,0.988688,0.967421,0.018246,0.968326,0.038462,5,0,0.01
sanity.outlier_probability,0.002262,0.00905,0.004525,0.002478,0.004525,0.002262,5,0,0.01
statistical.inverse_kl_divergence,0.733561,0.866818,0.792042,0.045888,0.785972,0.053437,5,0,0.01
statistical.kolmogorov_smirnov_test,0.79494,0.866722,0.824064,0.025084,0.826409,0.026327,5,0,0.01
statistical.chi_squared_test,0.354654,0.543433,0.479696,0.073712,0.531886,0.096996,5,0,0.02
statistical.maximum_mean_discrepancy,0.00473,0.007473,0.005905,0.001088,0.005566,0.001985,5,0,0.02
statistical.inverse_cdf_distance,1.86172,1.986206,1.92629,0.045199,1.918644,0.068142,5,0,2.92




[4m[1mPlugin : tvae[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.03633225,0.146059,0.098334,0.039678,0.098004,0.057322,5,0,0.01
sanity.inlier_probability,0.719457,0.970588,0.833484,0.100312,0.839367,0.190045,5,0,0.01
sanity.outlier_probability,0.002262443,0.027149,0.008145,0.009556,0.004525,0.002262,5,0,0.01
statistical.inverse_kl_divergence,0.7206115,0.846241,0.791295,0.048533,0.814766,0.079766,5,0,0.01
statistical.kolmogorov_smirnov_test,0.7895928,0.860963,0.828507,0.024198,0.827437,0.02571,5,0,0.01
statistical.chi_squared_test,0.2723867,0.701769,0.455625,0.150766,0.410186,0.185858,5,0,0.02
statistical.maximum_mean_discrepancy,0.005496081,0.007225,0.00641,0.000672,0.006485,0.001226,5,0,0.02
statistical.inverse_cdf_distance,1.623357,2.030846,1.79151,0.132942,1.779218,0.043035,5,0,3.43




[4m[1mPlugin : copulagan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.059317,0.12728,0.087193,0.024722,0.07344,0.031034,5,0,0.01
sanity.inlier_probability,0.714932,0.977376,0.868326,0.110294,0.920814,0.21267,5,0,0.01
sanity.outlier_probability,0.002262,0.002262,0.002262,0.0,0.002262,0.0,5,0,0.01
statistical.inverse_kl_divergence,0.778613,0.852594,0.81835,0.023573,0.821434,0.005379,5,0,0.01
statistical.kolmogorov_smirnov_test,0.714521,0.792884,0.758001,0.025566,0.760181,0.016454,5,0,0.01
statistical.chi_squared_test,0.8972,0.995753,0.953235,0.041836,0.980839,0.076709,5,0,0.02
statistical.maximum_mean_discrepancy,0.004722,0.008544,0.00644,0.001279,0.006283,0.001225,5,0,0.02
statistical.inverse_cdf_distance,1.452912,1.903766,1.695818,0.154744,1.687697,0.181597,5,0,3.38




[4m[1mPlugin : gaussian_copula[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.040046,0.123253,0.08172792,0.027971,0.078132,0.029331,5,0,0.01
sanity.inlier_probability,0.737557,0.986425,0.8791855,0.113417,0.952489,0.230769,5,0,0.01
sanity.outlier_probability,0.002262,0.002262,0.002262443,0.0,0.002262,0.0,5,0,0.01
statistical.inverse_kl_divergence,0.959263,0.963722,0.9612365,0.001491,0.960679,0.001207,5,0,0.01
statistical.kolmogorov_smirnov_test,0.90868,0.926573,0.920074,0.006049,0.921226,0.002262,5,0,0.01
statistical.chi_squared_test,0.897705,0.90579,0.901903,0.002683,0.901448,0.00235,5,0,0.02
statistical.maximum_mean_discrepancy,0.004728,0.005444,0.005000748,0.000265,0.004846,0.000332,5,0,0.03
statistical.inverse_cdf_distance,1.646739,1.753588,1.704147,0.037271,1.717022,0.045342,5,0,3.06




[4m[1mPlugin : adsgan[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.027924,0.055044,0.041681,0.010385,0.040178,0.018293,5,0,0.01
sanity.inlier_probability,0.91629,0.984163,0.960181,0.02828,0.9819,0.045249,5,0,0.01
sanity.outlier_probability,0.002262,0.004525,0.00362,0.001108,0.004525,0.002262,5,0,0.01
statistical.inverse_kl_divergence,0.648973,0.764586,0.710925,0.044113,0.733843,0.068993,5,0,0.01
statistical.kolmogorov_smirnov_test,0.74311,0.839161,0.788276,0.039041,0.79021,0.076923,5,0,0.01
statistical.chi_squared_test,0.271741,0.363307,0.341815,0.035393,0.361658,0.012994,5,0,0.02
statistical.maximum_mean_discrepancy,0.004125,0.009501,0.00709,0.002121,0.007527,0.003922,5,0,0.02
statistical.inverse_cdf_distance,1.790059,2.175761,1.957537,0.136169,1.945505,0.179694,5,0,3.38




[4m[1mPlugin : bayesian_network[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.282805,0.309955,0.2959276,0.010456,0.2986425,0.0181,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.025541,0.053372,0.03617961,0.01017,0.03638334,0.013076,5,0,0.01
sanity.inlier_probability,0.868778,0.995475,0.9497738,0.050739,0.979638,0.081448,5,0,0.01
sanity.outlier_probability,0.002262,0.004525,0.00361991,0.001108,0.004524887,0.002262,5,0,0.01
statistical.inverse_kl_divergence,0.977056,0.99331,0.9846946,0.006099,0.9838756,0.010271,5,0,0.01
statistical.kolmogorov_smirnov_test,0.956602,0.973262,0.9659811,0.005716,0.9652406,0.005965,5,0,0.01
statistical.chi_squared_test,0.81455,0.994704,0.8704161,0.07175,0.817853,0.092551,5,0,0.01
statistical.maximum_mean_discrepancy,0.001862,0.002445,0.002202179,0.000235,0.002355605,0.000393,5,0,0.02
statistical.inverse_cdf_distance,1.669465,1.73289,1.713826,0.022777,1.72118,0.009747,5,0,3.09




[4m[1mPlugin : uniform_sampler[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.068862,0.266653,0.185152,0.06458,0.191086,0.02172,5,0,0.01
sanity.inlier_probability,0.533937,0.968326,0.7,0.144099,0.669683,0.038462,5,0,0.01
sanity.outlier_probability,0.002262,0.027149,0.011312,0.008821,0.00905,0.00905,5,0,0.01
statistical.inverse_kl_divergence,0.76279,0.789564,0.776488,0.008725,0.776512,0.006601,5,0,0.01
statistical.kolmogorov_smirnov_test,0.745578,0.768202,0.75652,0.00871,0.760592,0.01378,5,0,0.01
statistical.chi_squared_test,0.996233,0.999789,0.997949,0.001546,0.997442,0.003254,5,0,0.02
statistical.maximum_mean_discrepancy,0.005368,0.006189,0.005956,0.000301,0.006086,0.000146,5,0,0.02
statistical.inverse_cdf_distance,1.439411,1.475909,1.462954,0.014983,1.473773,0.023803,5,0,3.92




[4m[1mPlugin : marginal_distributions[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0.01
sanity.avg_distance_nearest_neighbor,0.157474,0.209657,0.184771,0.019792,0.182888,0.034099,5,0,0.01
sanity.inlier_probability,0.644796,0.721719,0.69457,0.02851,0.710407,0.033937,5,0,0.01
sanity.outlier_probability,0.002262,0.013575,0.00724,0.004387,0.00905,0.006787,5,0,0.01
statistical.inverse_kl_divergence,0.77315,0.784511,0.780241,0.004159,0.781286,0.00553,5,0,0.01
statistical.kolmogorov_smirnov_test,0.758124,0.764912,0.761826,0.002963,0.763883,0.005553,5,0,0.01
statistical.chi_squared_test,0.996898,0.999138,0.9983,0.000814,0.998234,0.000946,5,0,0.02
statistical.maximum_mean_discrepancy,0.00545,0.006329,0.005888,0.000369,0.005774,0.000741,5,0,0.02
statistical.inverse_cdf_distance,1.466227,1.506469,1.480024,0.015026,1.474336,0.019113,5,0,3.82





In [5]:
import pandas as pd
import numpy as np

means = []
for plugin in score:
    data = score[plugin]["mean"]
    good_scores =  score[plugin]["ok_score"].to_dict()
    bad_scores =  score[plugin]["bad_score"].to_dict()
    means.append(data)
    
out = pd.concat(means, axis = 1)
out.set_axis(score.keys(), axis=1, inplace=True)

bad_highlight = 'background-color: lightcoral;'
ok_highlight = 'background-color: green;'
default = ''

def highlights(row):    
    metric = row.name
    if good_scores[metric] < bad_scores[metric]:
        best_val = np.min(row.values)
        worst_val = np.max(row)
    else:
        best_val = np.max(row.values)
        worst_val = np.min(row)
        
    styles = []
    for val in row.values:
        if val == best_val:
            styles.append(ok_highlight)
        elif val == worst_val:
            styles.append(bad_highlight)
        else:
            styles.append(default)
            
    return styles

out.style.apply(highlights, axis=1)

Unnamed: 0,privbayes,ctgan,pategan,rtvae,tvae,copulagan,gaussian_copula,adsgan,bayesian_network,uniform_sampler,marginal_distributions
sanity.data_mismatch_score,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sanity.common_rows_proportion,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.295928,0.0,0.0
sanity.avg_distance_nearest_neighbor,0.064546,0.12147,0.062627,0.029637,0.098334,0.087193,0.081728,0.041681,0.03618,0.185152,0.184771
sanity.inlier_probability,0.941629,0.828507,0.919005,0.967421,0.833484,0.868326,0.879186,0.960181,0.949774,0.7,0.69457
sanity.outlier_probability,0.004525,0.013122,0.004072,0.004525,0.008145,0.002262,0.002262,0.00362,0.00362,0.011312,0.00724
statistical.inverse_kl_divergence,0.8198,0.834569,0.792611,0.792042,0.791295,0.81835,0.961237,0.710925,0.984695,0.776488,0.780241
statistical.kolmogorov_smirnov_test,0.822007,0.762443,0.806952,0.824064,0.828507,0.758001,0.920074,0.788276,0.965981,0.75652,0.761826
statistical.chi_squared_test,0.657202,0.957609,0.489821,0.479696,0.455625,0.953235,0.901903,0.341815,0.870416,0.997949,0.9983
statistical.maximum_mean_discrepancy,0.015637,0.020362,0.005665,0.005905,0.00641,0.00644,0.005001,0.00709,0.002202,0.005956,0.005888
statistical.inverse_cdf_distance,1.61798,1.60757,1.802995,1.92629,1.79151,1.695818,1.704147,1.957537,1.713826,1.462954,1.480024


# 