# Tutorial 2: Benchmarks

In [1]:
import warnings
import sys

warnings.filterwarnings("ignore")

from sklearn.datasets import load_iris
from synthcity.plugins import Plugins
import synthcity.logger as log

X, y = load_iris(return_X_y=True, as_frame=True)
X["target"] = y

X

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


## List the available generative models

In [2]:
from synthcity.plugins import Plugins

plugins = Plugins().list(skip_debug=True)

plugins

['survival_adsgan',
 'nflow',
 'tvae',
 'bayesian_network',
 'privbayes',
 'survival_nflow',
 'marginal_distributions',
 'copulagan',
 'survival_bayesian_network',
 'gaussian_copula',
 'survival_tvae',
 'survival_ctgan',
 'adsgan',
 'rtvae',
 'ctgan',
 'survival_gan',
 'pategan']

## Benchmark the quality of plugins

In [3]:
from synthcity.benchmark import Benchmarks

score = Benchmarks.evaluate(
    ["uniform_sampler"], X, sensitive_columns=[], synthetic_size=len(X), repeats=1,
)

In [4]:
Benchmarks.print(score)


[4m[1mPlugin : uniform_sampler[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
sanity.data_mismatch.score,0.166667,0.166667,0.166667,0.0,0.166667,0.0,1,0,0.0
sanity.common_rows_proportion.score,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0.0
sanity.nearest_syn_neighbor_distance.mean,0.521522,0.521522,0.521522,0.0,0.521522,0.0,1,0,0.0
sanity.close_values_probability.score,0.091667,0.091667,0.091667,0.0,0.091667,0.0,1,0,0.0
sanity.distant_values_probability.score,0.041667,0.041667,0.041667,0.0,0.041667,0.0,1,0,0.0
stats.jensenshannon_dist.marginal,0.227611,0.227611,0.227611,0.0,0.227611,0.0,1,0,0.04
stats.chi_squared_test.marginal,0.999898,0.999898,0.999898,0.0,0.999898,0.0,1,0,0.0
stats.feature_corr.joint,3.217604,3.217604,3.217604,0.0,3.217604,0.0,1,0,0.02
stats.inv_kl_divergence.marginal,0.796835,0.796835,0.796835,0.0,0.796835,0.0,1,0,0.0
stats.ks_test.marginal,0.825,0.825,0.825,0.0,0.825,0.0,1,0,0.0





In [5]:
import pandas as pd
import numpy as np

means = []
directions = None
for plugin in score:
    data = score[plugin]["mean"]
    if directions is None and len(score[plugin]["direction"].to_dict()) > 0:
        directions = score[plugin]["direction"].to_dict()

    means.append(data)

out = pd.concat(means, axis=1)
out.set_axis(score.keys(), axis=1, inplace=True)

bad_highlight = "background-color: lightcoral;"
ok_highlight = "background-color: green;"
default = ""


def highlights(row):
    metric = row.name
    if directions[metric] == "minimize":
        best_val = np.min(row.values)
        worst_val = np.max(row)
    else:
        best_val = np.max(row.values)
        worst_val = np.min(row)

    styles = []
    for val in row.values:
        if val == best_val:
            styles.append(ok_highlight)
        elif val == worst_val:
            styles.append(bad_highlight)
        else:
            styles.append(default)

    return styles


out.style.apply(highlights, axis=1)

Unnamed: 0,uniform_sampler
sanity.data_mismatch.score,0.166667
sanity.common_rows_proportion.score,0.0
sanity.nearest_syn_neighbor_distance.mean,0.521522
sanity.close_values_probability.score,0.091667
sanity.distant_values_probability.score,0.041667
stats.jensenshannon_dist.marginal,0.227611
stats.chi_squared_test.marginal,0.999898
stats.feature_corr.joint,3.217604
stats.inv_kl_divergence.marginal,0.796835
stats.ks_test.marginal,0.825


# 