In [1]:
# stdlib
import warnings
warnings.filterwarnings("ignore")
# third party
from sklearn.datasets import load_diabetes
# synthcity absolute
from synthcity.plugins import Plugins
eval_plugin = "syn_seq"
# synthcity absolute
from synthcity.plugins.core.dataloader import Syn_SeqDataLoader
X, y = load_diabetes(return_X_y=True, as_frame=True)
X["target"] = y

    The default C++ compiler could not be found on your system.
    You need to either define the CXX environment variable or a symlink to the g++ command.
    For example if g++-8 is the command you can do
      import os
      os.environ['CXX'] = 'g++-8'
    


In [2]:
user_custom = {
  'syn_order' : ['sex', 'bmi', 'age', 'bp', 's1', 's2', 's3', 's4', 's5', 's6', 'target'],
  'method' : {'bp':'norm'},
  'col_type' : {"age": "category", "sex": "category"},
  'special_value' : {'bp':[-0.040099, -0.005670]},
  'variable_selection' : {
    "s4": ['sex', 'bmi', 'age', 'bp', 's1', 's2'],
    "target": ['sex', 'bmi', 'age', 'bp', 's1', 's2', 's3'],
  }
}

In [3]:
loader = Syn_SeqDataLoader(X, target_column="target", sensitive_columns=["sex"], user_custom = user_custom)


[INFO] Syn_SeqEncoder summary:
  - syn_order: ['sex', 'bmi', 'age', 'bp', 's1', 's2', 's3', 's4', 's5', 's6', 'target']
  - original_dtype => {'sex': 'float64', 'bmi': 'float64', 'age': 'float64', 'bp': 'float64', 's1': 'float64', 's2': 'float64', 's3': 'float64', 's4': 'float64', 's5': 'float64', 's6': 'float64', 'target': 'float64'}
  - converted_type => {'sex': 'category', 'bmi': 'numeric', 'age': 'category', 'bp': 'numeric', 's1': 'numeric', 's2': 'numeric', 's3': 'numeric', 's4': 'numeric', 's5': 'numeric', 's6': 'numeric', 'target': 'numeric'}
  - method => {'sex': 'swr', 'bmi': 'cart', 'age': 'cart', 'bp': 'norm', 's1': 'cart', 's2': 'cart', 's3': 'cart', 's4': 'cart', 's5': 'cart', 's6': 'cart', 'target': 'cart'}
  - special_value => {'bp': [-0.040099, -0.00567]}
  - date_mins => {}
  - variable_selection_:
        sex  bmi  age  bp  s1  s2  s3  s4  s5  s6  target
sex       0    0    0   0   0   0   0   0   0   0       0
bmi       1    0    0   0   0   0   0   0   0   0       

In [4]:
loader

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930,220.0


In [5]:
encoded_loader, enc_dict = loader.encode()
print(encoded_loader)
print(enc_dict)
loader._print_init_info()

          sex       bmi       age        bp        s1        s2        s3  \
0    0.050680  0.061696  0.038076  0.021872 -0.044223 -0.034821 -0.043401   
1   -0.044642 -0.051474 -0.001882 -0.026328 -0.008449 -0.019163  0.074412   
2    0.050680  0.044451  0.085299 -0.005670 -0.045599 -0.034194 -0.032356   
3   -0.044642 -0.011595 -0.089063 -0.036656  0.012191  0.024991 -0.036038   
4   -0.044642 -0.036385  0.005383  0.021872  0.003935  0.015596  0.008142   
..        ...       ...       ...       ...       ...       ...       ...   
437  0.050680  0.019662  0.041708  0.059744 -0.005697 -0.002566 -0.028674   
438  0.050680 -0.015906 -0.005515 -0.067642  0.049341  0.079165 -0.028674   
439  0.050680 -0.015906  0.041708  0.017293 -0.037344 -0.013840 -0.024993   
440 -0.044642  0.039062 -0.045472  0.001215  0.016318  0.015283 -0.028674   
441 -0.044642 -0.073030 -0.045472 -0.081413  0.083740  0.027809  0.173816   

           s4        s5        s6  target bp_cat  
0   -0.002592  0.019907 

In [7]:
syn_model = Plugins().get("syn_seq")

[2025-01-20T13:27:19.495147+0900][18948][CRITICAL] module disabled: C:\Users\hsrhe\Desktop\synthcity\src\synthcity\plugins\generic\plugin_goggle.py


In [6]:
syn_model.fit(loader)

[WARN] encode() called again. We already splitted in __init__.
[INFO] model fitting
Fitting order => ['sex', 'bmi', 'age', 'bp_cat', 'bp', 's1', 's2', 's3', 's4', 's5', 's6', 'target']
Fitting first col 'sex' with method='swr' ... Done!
Fitting 'bmi' with method='cart' ... Done!
Fitting 'age' with method='cart' ... Done!
Fitting 'bp_cat' with method='cart' ... Done!
Fitting 'bp' with method='cart' ... Done!
Fitting 's1' with method='cart' ... Done!
Fitting 's2' with method='cart' ... Done!
Fitting 's3' with method='cart' ... Done!
Fitting 's4' with method='cart' ... Done!
Fitting 's5' with method='cart' ... Done!
Fitting 's6' with method='cart' ... Done!
Fitting 'target' with method='cart' ... Done!


<synthcity.plugins.generic.plugin_syn_seq.Syn_SeqPlugin at 0x310b86610>

In [7]:
rules = {
  "target":[
    ("bmi", ">", 0.15),
    ("target", ">", 0)
  ]
}

In [8]:
syn_model.generate(nrows = len(X), rules = rules).dataframe()

Generating 'bmi' ... Done!
Generating 'age' ... Done!
Generating 'bp_cat' ... Done!
Generating 'bp' ... Done!
Generating 's1' ... Done!
Generating 's2' ... Done!
Generating 's3' ... Done!
Generating 's4' ... Done!
Generating 's5' ... Done!
Generating 's6' ... Done!
[WARN] target: could not satisfy rules after 10 tries => set them to NaN.
Generating 'target' ... Done!


Unnamed: 0,sex,bmi,age,bp_cat,bp,s1,s2,s3,s4,s5,s6,target
0,0.050680,-0.009439,0.052606,-777,0.049415,0.050717,-0.019163,-0.013948,0.034309,0.119340,-0.017646,
1,-0.044642,0.026128,0.016281,-777,0.058608,-0.060735,-0.044215,-0.013948,-0.033958,-0.051404,-0.025930,
2,-0.044642,-0.065486,-0.041840,-777,-0.040099,-0.005697,0.014344,-0.043401,0.034309,0.007027,-0.013504,
3,0.050680,-0.001895,0.005383,-777,0.008101,-0.004321,-0.015719,-0.002903,-0.002592,0.038394,-0.013504,
4,0.050680,0.059541,-0.030942,-777,0.001215,0.012191,0.031567,-0.043401,0.034309,0.014821,0.007207,
...,...,...,...,...,...,...,...,...,...,...,...,...
437,-0.044642,0.092953,0.027178,-777,-0.052734,0.008063,0.039709,-0.028674,0.021024,-0.048359,0.019633,
438,0.050680,-0.025607,0.063504,-777,0.011544,0.064477,0.048477,0.030232,-0.002592,0.038394,0.019633,
439,-0.044642,0.104809,0.030811,-777,0.076958,-0.011201,-0.011335,-0.058127,0.034309,0.057108,0.036201,
440,0.050680,-0.033151,-0.001882,-777,-0.018306,0.031454,0.042840,-0.013948,0.019917,0.010227,0.027917,


In [9]:
loader

Unnamed: 0,sex,bmi,age,bp,s1,s2,s3,s4,s5,s6,target,bp_cat
0,0.050680,0.061696,0.038076,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0,-777
1,-0.044642,-0.051474,-0.001882,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0,-777
2,0.050680,0.044451,0.085299,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930,141.0,-777
3,-0.044642,-0.011595,-0.089063,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0,-777
4,-0.044642,-0.036385,0.005383,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0,-777
...,...,...,...,...,...,...,...,...,...,...,...,...
437,0.050680,0.019662,0.041708,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0,-777
438,0.050680,-0.015906,-0.005515,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485,104.0,-777
439,0.050680,-0.015906,0.041708,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491,132.0,-777
440,-0.044642,0.039062,-0.045472,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930,220.0,-777


In [10]:
X

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930,220.0
