In [1]:
from scmodels import SCM
import random, math

In [2]:
def get_distributions():
    ep = .1
    dists = [
        f"LogLogistic(alpha={random.randint(5, 20)}, beta={round(random.uniform(1,3.5),1)})",
        f"Normal(mean={random.randint(0,10)}, std={round(random.uniform(1,20),2)})",
        f"LogNormal(mean={random.randint(0,10)}, std={round(random.uniform(1,20),2)})",
        f"Benini(alpha={round(random.random() + ep, 1)}, beta={round(random.random() + ep, 1)}, sigma={round(random.random() + ep, 1)})",
        f"Beta(alpha={round(random.random() + ep, 1)}, beta={round(random.random() + ep, 1)})",
        f"Exponential(rate={round((random.random() + ep) * 10, 1)})",
        f"FDistribution(d1={random.randint(2, 4)}, d2={random.randint(5, 8)})",
        f"Gamma(k={round((random.random() + ep) * 4, 1)}, theta={round((random.random() + ep) * 8, 1)})",
        f"GammaInverse(a={round((random.random() + ep) * 4, 1)}, b={round((random.random() + ep) * 8, 1)})",
        f"Bernoulli({round(random.random(), 1)})",
        f"Binomial(n={random.randint(2, 10)}, p={round(random.random(), 1)}, succ={random.randint(3, 20)}, fail={random.randint(3, 20)})",
        f"BetaBinomial(n={random.randint(10, 100)}, alpha={round(random.random(), 1)}, beta={round(random.uniform(2, 5),1)})",
        f"Die(sides={random.randint(4, 10)})",
        f"DiscreteUniform(items='1 2 3')",
        f"Geometric(p={round(random.random(), 1)})",
        f"Poisson(lamda={round(random.random(), 1)})",
        f"FiniteRV({{{random.randint(5, 10)}: 1/2, {random.randint(0, 3)}: 1/6, {random.randint(15, 25)}: 1/6, {random.randint(30, 50)}: 1/6}})"
    ]
    
    return dists

In [14]:
def get_child_distribution(parents):
    simple_ops = ["+", "*", "-"]
    complex_ops = [False, "sqrt"]
    noise_ops = [True, False]
    dist = f"{parents[0]}"
    count = 0
    for p in parents[1:]:
        op = random.sample(simple_ops, 1)[0]
        dist = f"{dist} {op} {p}"
        count += 1
        
        if count > 2:
            op = random.choices(complex_ops, weights=(500, 5), k=1)[0]
            if op:
                dist = f"{op}({dist})"
        
        op = random.choices(noise_ops, weights=(10, 90), k=1)[0]
        if op:
            dist = f"{dist} * N"
    
    dist = f"{dist} * N"
    return dist

In [15]:
def get_parents(levels_and_distributions):
    parent_levels = random.randint(1,len(levels_and_distributions))
    parents = []
    for level in range(parent_levels):
        possible_parents = levels_and_distributions[level][1]
        num_parents = random.randint(1, len(possible_parents))
        parents.extend(random.sample(possible_parents, num_parents))
    
    return parents

In [16]:
def get_level_n_distributions(level_data, levels_and_distributions):
    level_dists = []
    dist_names = []
    for n in range(level_data['num']):
        name = level_data['name']+str(n+1)
        dist = get_child_distribution(get_parents(levels_and_distributions))
        dist = f"{name} = {dist}, N ~ {random.sample(get_distributions(), 1)[0]}"

        level_dists.append(dist)
        dist_names.append(name)
    
    return level_dists, dist_names

In [17]:
def get_level_0_distributions(level_data):
    level_dists = []
    dist_names = []
    for n in range(level_data['num']):
        name = level_data['name']+str(n+1)
        dist = f"{name} = N, N ~ {random.sample(get_distributions(), 1)[0]}"
        level_dists.append(dist)
        dist_names.append(name)
    
    return level_dists, dist_names

In [18]:
def populate_level_distributions(level, level_data, levels_and_distributions):
    if level == 0:
        levels_and_distributions[level] = get_level_0_distributions(level_data)
    else:
        levels_and_distributions[level] = get_level_n_distributions(level_data, levels_and_distributions)

In [19]:
def get_level_names_and_num_nodes(input_nodes):
    nodes = input_nodes
    levels = int(math.pow(nodes, 1/3))
    level_name_numnodes = {}
    for n in range(levels):
        ub = round(math.log(nodes)) - 0.6
        lb = ub - 0.7
        level_nodes = int(math.exp(round(random.uniform(lb,ub), 1)))
        if level_nodes == 0 : break

        level_name_numnodes[n] = {'name': chr(65+n), 'num': level_nodes}
        nodes -= level_nodes
        
    return level_name_numnodes

In [38]:
def create_names_and_num_nodes_dict(nodes_per_level, level_names):
    level_name_numnodes = {}
    for n, name in enumerate(level_names):
        level_name_numnodes[n] = {'name': name, 'num': nodes_per_level[n]}
        
    return level_name_numnodes

In [39]:
def get_level_names(numnodes_per_level):
    level_names = []
    for n in range(len(numnodes_per_level)):
        level_names.append(chr(65 + n))
    
    return level_names

In [40]:
def get_numnodes_per_level(input_nodes):
    nodes = input_nodes
    levels = int(math.pow(nodes, 1/3))
    numnodes_per_level = []
    for n in range(levels):
        ub = round(math.log(nodes)) - 0.6
        lb = ub - 0.7
        level_nodes = int(math.exp(round(random.uniform(lb,ub), 1)))
        if level_nodes == 0 : break

        numnodes_per_level.append(level_nodes)
        nodes -= level_nodes
        
    return numnodes_per_level

In [None]:
def create_scm_from_nodes_list(nodes_list):
    level_names = get_level_names(nodes_per_level)
    level_name_numnodes = create_names_and_num_nodes_dict(nodes_per_level, level_names)

In [60]:
input_nodes = 80
# nodes_per_level = get_numnodes_per_level(input_nodes)
# level_names = get_level_names(nodes_per_level)
# level_name_numnodes = create_names_and_num_nodes_dict(nodes_per_level, level_names)
level_name_numnodes = get_level_names_and_num_nodes(input_nodes)

# levels_and_distributions = {}
# for level, level_data in level_name_numnodes.items():
#     populate_level_distributions(level, level_data, levels_and_distributions)
    

# scm_dists = levels_and_distributions[0][0]
# for n in range(1, len(levels_and_distributions)):
#     scm_dists.extend(levels_and_distributions[n][0])

# scm = SCM(
#         scm_dists
#     )
# scm.plot();

{0: {'name': 'A', 'num': 14}, 1: {'name': 'B', 'num': 20}, 2: {'name': 'C', 'num': 22}, 3: {'name': 'D', 'num': 8}}


In [21]:
scm.sample(100)


The numsamples parameter to sympy.stats.sample() is deprecated.
Either use a list comprehension, like

[sample(...) for i in range(100)]

or add a dimension to size, like

sample(..., size=(100,))

See https://docs.sympy.org/latest/explanation/active-deprecations.html#deprecated-sympy-stats-numsamples
for details.

This has been deprecated since SymPy version 1.9. It
will be removed in a future version of SymPy.

  list(sample(noise_gen, numsamples=n, seed=seed)), dtype=float
  return -A2*A9*N**2 + sqrt(-A10 + A13 + A19*A5*A7)
  return sqrt(-A10*A19 + A15 + A16 - A20*A6 - A3*N + A4 - A5 - A8 + A9)*N**2
  return B2*N + B3*N + sqrt(-A1 - A10*A7*N - A11*A19*A3*A4*B1*B14 + A12*A5 - A13 + A15 - A16 + A17*N + A18 + A2 + A6 - A8 + A9*N - B13 - B5 - B6)


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,...,C6,C11,C15,C17,C20,D1,D7,D8,D9,D5
0,1.134132,11.559278,0.472162,17.0,4320.251685,0.999921,1.177818,5.800939,8.0,20.0,...,5.325245e+08,481431.636157,13438.578712,1.979447e+08,-3.813358e+09,-17088.388499,4756.401587,1.319634e+06,,
1,0.834939,-8.033557,1.816628,11.0,3.959617,0.977394,1.807256,1.136018,7.0,7.0,...,7.294567e+06,-15167.286996,-36364.407054,-1.091613e+09,-2.015759e+07,-18425.574873,745.475294,-1.301521e+05,-4.286703e+06,8.466736e+08
2,1.652346,-5.407006,0.538870,9.0,243.520914,0.999905,3.176934,-3.258257,1.0,7.0,...,3.497929e+06,-27738.899154,-16740.474594,5.236033e+07,-3.398082e+08,-14930.081890,461.033553,-1.164805e+05,,
3,1.002384,-7.161546,0.523036,33.0,30.550980,0.697326,1.830672,0.919950,5.0,7.0,...,1.256849e+07,1933.628739,-30362.304482,-7.313974e+07,-7.271707e+06,-5248.508922,1303.693365,-1.602049e+05,9.974370e+05,-1.028149e+10
4,15.003724,10.378194,0.570116,1.0,1.969951,1.000000,1.476801,-1.663165,2.0,7.0,...,2.865890e+08,457368.224029,474538.732665,-3.347903e+09,-2.196105e+09,478340.530318,,4.646430e+05,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1.186733,25.304978,0.571010,22.0,1.405134,0.998133,6.468481,-14.582337,2.0,2.0,...,-1.952790e+07,-81504.155450,-21130.797271,1.892290e+09,-3.558496e+08,-38513.068957,1357.430687,-5.954774e+04,-1.388811e+06,-1.083927e+09
96,1.531825,-19.761058,3.791624,3.0,3.076284,0.994717,1.516217,1.743619,3.0,42.0,...,-3.095512e+05,-4975.832101,-11243.786174,-1.047632e+08,-3.155179e+08,6768.227958,,-1.586569e+04,,
97,0.707064,-12.249919,1.679235,2.0,1.604053,0.989174,1.390194,8.708652,5.0,2.0,...,-5.016677e+09,189651.259488,100915.663818,-1.872704e+08,1.175651e+11,105497.477180,,3.126873e+04,,
98,4.291372,-6.906716,0.436171,10.0,408.306083,1.000000,5.200185,8.808545,1.0,20.0,...,1.776432e+07,109093.341897,-12699.636327,-2.084645e+08,-5.583944e+08,-22686.267604,3011.703601,-8.956698e+04,,
