# Add a new plugin

By default, the library will import all the files with prefix "plugin\_\*.py" from src/synthcity/plugins, and load all the classes which implement the [Plugin interface](src/synthcity/plugins/core/plugin.py).

Each plugin must implement the following methods:
- hyperparameter_space() - a static method that returns the hyperparameters that can be tuned during AutoML.
- type() - a static method that returns the type of the plugin. e.g., debug, generative, bayesian, etc.
- name() - a static method that returns the name of the plugin. e.g., ctgan, random_noisee, etc.
- _fit() - internal method, called by `fit` on each training set.
- _generate() - internal method, called by `generate`.

## Existing plugins

In [1]:
from synthcity.plugins import Plugins

generators = Plugins()

generators.list()



['pategan',
 'adsgan',
 'nflow',
 'gaussian_copula',
 'ctgan',
 'privbayes',
 'rtvae',
 'bayesian_network',
 'tvae',
 'copulagan']

## Example plugin: Generate 0-1

In [2]:
# stdlib
from typing import Any, List

# third party
import pandas as pd
import numpy as np

# synthcity absolute
from synthcity.plugins.core.distribution import Distribution
from synthcity.plugins.core.plugin import Plugin
from synthcity.plugins.core.schema import Schema
from synthcity.plugins.core.dataloader import GenericDataLoader, DataLoader


class ZeroOnePlugin(Plugin):
    """Dummy plugin for debugging."""

    def __init__(self, **kwargs: Any) -> None:
        super().__init__(**kwargs)

    @staticmethod
    def name() -> str:
        return "zero_one"

    @staticmethod
    def type() -> str:
        return "debug"

    @staticmethod
    def hyperparameter_space(*args: Any, **kwargs: Any) -> List[Distribution]:
        return []

    def _fit(self, X: DataLoader, *args: Any, **kwargs: Any) -> "ZeroOnePlugin":
        self.features_count = X.shape[1]
        return self

    def _generate(self, count: int, syn_schema: Schema, **kwargs: Any):
        return GenericDataLoader(
            np.random.randint(0, 2, size=(count, self.features_count))
        )

In [3]:
# Add the new plugin to the collection

generators.add("zero_one", ZeroOnePlugin)

<synthcity.plugins.Plugins at 0x7fc38c026a30>

In [4]:
# Check the new plugins list
generators.list()

['pategan',
 'adsgan',
 'nflow',
 'gaussian_copula',
 'ctgan',
 'privbayes',
 'rtvae',
 'bayesian_network',
 'tvae',
 'copulagan']

In [5]:
# Load reference data

from sklearn.datasets import load_breast_cancer

X, y = load_breast_cancer(return_X_y=True, as_frame=True)

loader = GenericDataLoader(X)

loader.dataframe()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [6]:
# Train the new plugin

gen = generators.get("zero_one")

gen.fit(loader)

<__main__.ZeroOnePlugin at 0x7fc2b8b14ac0>

In [7]:
# Generate some new data

gen.generate(count=10)

RuntimeError: Plugin zero_one failed to meet the synthetic constraints.

### Oops, this didn't work.

__The Plugin interface enforces the new generated data to:__
 - satistify the same constraints as the training set.
 - Or to satisfy the constraints provided at inference time(if provided).
 
 
 If the generated dataframe fails to comply, an exception will be raised.

Let's try again

## A functional plugin

In [8]:
# stdlib
from typing import Any, List

# third party
import pandas as pd
import numpy as np

# synthcity absolute
from synthcity.plugins.core.distribution import Distribution
from synthcity.plugins.core.plugin import Plugin
from synthcity.plugins.core.schema import Schema


class DummyGeneratorPlugin(Plugin):
    """Dummy plugin for debugging."""

    def __init__(self, **kwargs: Any) -> None:
        super().__init__(**kwargs)

    @staticmethod
    def name() -> str:
        return "dummy_generator"

    @staticmethod
    def type() -> str:
        return "debug"

    @staticmethod
    def hyperparameter_space(*args: Any, **kwargs: Any) -> List[Distribution]:
        return []

    def _fit(self, X: DataLoader, *args: Any, **kwargs: Any) -> "ZeroOnePlugin":
        return self

    def _generate(self, count: int, syn_schema: Schema, **kwargs: Any):
        result = self.schema().sample(count)
        result[syn_schema.features()] = syn_schema.sample(count)

        return GenericDataLoader(result)

In [9]:
generators.add("dummy_generator", DummyGeneratorPlugin)

generators.list()

['pategan',
 'adsgan',
 'nflow',
 'gaussian_copula',
 'ctgan',
 'privbayes',
 'rtvae',
 'bayesian_network',
 'tvae',
 'copulagan']

In [10]:
# Train the new plugin

gen = generators.get("dummy_generator")

gen.fit(loader)

<__main__.DummyGeneratorPlugin at 0x7fc2b8966e50>

In [13]:
# Generate some new data

gen.generate(count=10).dataframe()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,7.128463,22.496261,64.790667,739.024762,0.082947,0.118172,0.238049,0.079475,0.22641,0.095526,...,8.63488,17.615347,220.032345,3353.467033,0.14314,1.012072,0.000998,0.06696,0.610907,0.172779
1,15.615487,26.162723,95.038115,562.507565,0.103963,0.057096,0.306654,0.027967,0.18773,0.085482,...,31.565912,22.946837,224.675484,2921.197138,0.086941,0.638025,1.17795,0.163928,0.647546,0.127228
2,26.572941,10.55678,159.947414,1548.434881,0.134323,0.181572,0.057288,0.199807,0.215096,0.085181,...,26.543878,14.284968,131.653705,2264.628687,0.072542,0.484271,0.97665,0.208954,0.592674,0.178029
3,11.120237,17.977118,95.760926,468.141797,0.160343,0.177685,0.129709,0.160119,0.154137,0.086077,...,12.455267,40.992885,236.09162,1372.741682,0.074896,0.349193,0.961462,0.102422,0.295744,0.204293
4,12.257483,26.411431,172.250592,2300.609695,0.095715,0.205637,0.07352,0.02218,0.1491,0.081624,...,15.144045,31.815087,143.679462,247.538787,0.21361,0.687972,0.136033,0.236419,0.183274,0.194366
5,8.179994,29.283652,128.530208,369.357865,0.080357,0.211705,0.11935,0.031733,0.183397,0.084654,...,9.383642,33.095478,173.112176,3263.013051,0.082014,0.791424,0.117452,0.191341,0.515229,0.168538
6,24.961455,10.135525,88.634739,1446.853612,0.088238,0.24518,0.176209,0.187079,0.294478,0.080834,...,30.843865,28.563874,188.341978,1921.533571,0.140934,0.163644,0.395787,0.180805,0.241366,0.19689
7,7.614768,31.659671,145.107033,605.498285,0.089959,0.194839,0.155746,0.146238,0.230826,0.087736,...,33.865343,45.021829,241.561708,2277.780435,0.14351,0.504,0.012355,0.118881,0.608809,0.081589
8,27.650579,29.918782,100.586228,874.935612,0.106888,0.0504,0.36613,0.072321,0.182658,0.067579,...,28.832325,40.166375,102.286774,3478.296095,0.092602,0.593531,0.046302,0.001217,0.197753,0.201258
9,16.793555,19.155174,87.424621,974.692141,0.137387,0.332894,0.338938,0.086461,0.276587,0.057577,...,20.96674,34.196445,76.848579,2631.133784,0.093827,0.553384,0.150456,0.201058,0.42909,0.084795


In [14]:
# Custom generation constraints

from synthcity.plugins.core.constraints import Constraints

constraints = Constraints(rules=[("worst radius", ">", 15)])

generated = gen.generate(count=10, constraints=constraints)

assert (generated["worst radius"] > 15).any()

generated.dataframe()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,11.445839,17.509174,58.236262,1303.971148,0.119721,0.06948,0.397942,0.104572,0.301533,0.051719,...,26.510372,31.043753,130.604776,1468.667081,0.116849,0.707709,0.265036,0.051682,0.511875,0.087068
1,27.905451,37.350699,130.662319,1607.139869,0.135979,0.228819,0.265115,0.142118,0.160237,0.085185,...,32.941611,48.958069,56.89241,299.020281,0.203533,0.822483,0.024576,0.054654,0.171717,0.109908
2,7.130919,28.94662,106.731209,1514.649232,0.071522,0.166235,0.414975,0.125901,0.27628,0.064661,...,24.254817,35.241416,208.853006,641.206264,0.197348,0.336103,0.229234,0.2622,0.364346,0.098332
3,16.416042,33.427282,150.972758,903.139088,0.085743,0.217636,0.028634,0.166435,0.270334,0.083252,...,16.349645,45.415129,79.416512,3938.823309,0.179789,0.04232,0.641616,0.042525,0.48472,0.093335
4,11.756518,9.771626,165.647188,1971.343719,0.084776,0.228021,0.087168,0.112919,0.264667,0.073433,...,35.017834,33.436589,67.726541,1398.132958,0.154926,0.956097,0.513708,0.069,0.427976,0.205267
5,27.933158,10.300638,120.939612,2440.502385,0.057805,0.028888,0.121556,0.071728,0.147826,0.083841,...,23.021262,43.473827,97.93239,3518.893964,0.150497,0.194736,0.922176,0.211068,0.463483,0.200202
6,22.509499,29.081925,147.227112,2286.414962,0.073789,0.163535,0.089633,0.169707,0.224894,0.068433,...,22.072767,45.48017,98.010887,1598.427624,0.145471,0.824993,0.206942,0.059864,0.391499,0.189738
7,27.173775,38.26448,120.467359,712.287551,0.107257,0.144573,0.065052,0.011575,0.116116,0.096438,...,31.404669,13.698521,85.951363,1722.074691,0.091124,0.94905,0.175387,0.12462,0.173784,0.181992
8,26.818651,15.543895,112.606824,2354.16962,0.094796,0.194125,0.335433,0.191288,0.269044,0.073588,...,36.000866,29.939317,96.061594,3720.041052,0.083858,0.569278,0.447388,0.180016,0.290678,0.125951
9,15.190687,11.241542,187.620141,1220.136864,0.100703,0.125037,0.418723,0.084032,0.189164,0.078674,...,32.106528,21.548204,223.01215,1352.003526,0.110449,0.698327,0.702204,0.232255,0.630734,0.101959
