## Some example mock APIs

Looking for some high level feedback on these data mocks

In [50]:
from mlfaker.generators import NormalGenerator, CategoricalGenerator, BaseGenerator
import pandas as pd

In [36]:
def build_df(generators, size):
    return pd.DataFrame([g.generate(size) for g in generators]).T

In [73]:
build_df(
    [
        NormalGenerator("foo"),
        CategoricalGenerator("bar", fillrate=0.5), 
        CategoricalGenerator("fizz", classes=["a", "b", "c"], fillrate=0.5)
    ],
    7
)

Unnamed: 0,foo,bar,fizz
0,1.624345,1.0,
1,-0.611756,,a
2,-0.528172,,
3,-1.072969,,
4,0.865408,,b
5,-2.301539,1.0,a
6,1.744812,1.0,


In [38]:
class DataFramer():
    def __init__(self, generators):
        self.generators = generators
        
    def generate(self, size):
        return pd.DataFrame([g.generate(size) for g in self.generators]).T

In [46]:
dataframer = DataFramer(
    [
        NormalGenerator("mike"),
        CategoricalGenerator("jeff", fillrate=0.5, seed=10),
        NormalGenerator("tom", fillrate=0.9, loc=3),
        CategoricalGenerator("target", classes=[0, 1], fillrate=0.5)
    ]
)

In [72]:
dataframer.generate(6)

Unnamed: 0,mike,jeff,tom,target
0,0.900856,,3.705072,0.0
1,-0.683728,,1.320058,1.0
2,-0.12289,0.0,4.999976,0.0
3,-0.935769,1.0,,
4,-0.267888,1.0,3.193118,
5,0.530355,,1.446993,


In [70]:
def build_df_from_cols(cols_types , size=10):
    lookup = {"normal": NormalGenerator, "categorical": CategoricalGenerator}
    return pd.DataFrame([lookup[v](k).generate(size) for k, v in cols_types.items()]).T

In [71]:
build_df_from_cols({"foo": "normal", "bar": "categorical"}, size=5)

Unnamed: 0,foo,bar
0,1.624345,1.0
1,-0.611756,1.0
2,-0.528172,0.0
3,-1.072969,0.0
4,0.865408,1.0


In [89]:
def build_df_from_num_cat(nums, cats, size=10):
    gens = []
    counter = 0
    for col_type, gen in zip([nums, cats], [NormalGenerator, CategoricalGenerator]):
        if isinstance(col_type, int):
            gen_holder = [gen(f"col{counter+i}") for i in range(col_type)]
        else:
            gen_holder = [gen(col) for col in col_type]
        counter += len(gen_holder)
        gens.extend(gen_holder)
    
    return pd.DataFrame([gen.generate(size) for gen in gens]).T

In [90]:
build_df_from_num_cat(nums=2, cats=2, size=5)

Unnamed: 0,col0,col1,col2,col3
0,1.624345,1.624345,1.0,1.0
1,-0.611756,-0.611756,1.0,1.0
2,-0.528172,-0.528172,0.0,0.0
3,-1.072969,-1.072969,0.0,0.0
4,0.865408,0.865408,1.0,1.0


In [91]:
build_df_from_num_cat(nums=["foo"], cats=2, size=3)

Unnamed: 0,foo,col1,col2
0,1.624345,1.0,1.0
1,-0.611756,1.0,1.0
2,-0.528172,0.0,0.0
