# Purpose: demonstrate approximate inference

## Simple example with known parameters and structure
This example creates two variables with defined parameters (means and covariances) and structure.

In [1]:
import numpy as np
from pybbn.lg.graph import Dag, Parameters, Bbn

# create the directed acylic graph
dag = Dag()
dag.add_node(0)
dag.add_node(1)
dag.add_edge(0, 1)

# create parameters
means = np.array([0, 25])
cov = np.array([
    [1.09, 1.95],
    [1.95, 4.52]
])
params = Parameters(means, cov)

# create the bayesian belief network
bbn = Bbn(dag, params, max_samples=2000, max_iters=10)

# do the inference
s = bbn.do_inference()
print(s)

# set the evidence on node 0 to a value of 1
bbn.set_evidence(0, 1)
s = bbn.do_inference()
print(s)
bbn.clear_evidences()

# set the evidence on node 1 to a value of 20
bbn.set_evidence(1, 20)
s = bbn.do_inference()
print(s)
bbn.clear_evidences()

[ 0.0332189  25.26909024]
[ 1.         27.01627047]
[-2.16060132 20.        ]


## Example with learned parameters and known structure
This example simulates data and then computes the means and covariances to be used as the parameter input. The structure is given according to the equations (relationships below).

In [2]:
import numpy as np
import pandas as pd

In [3]:
np.random.seed(37)
num_samples = 1000
x1 = np.random.normal(0, 1, num_samples)
x2 = np.random.normal(2, 2, num_samples)
x3 = 1.0 + 0.8 * x1 + 0.2 * x2 + np.random.normal(0, 1, num_samples)
x4 = 0.9 * x3 + np.random.normal(0, 1, num_samples)
df = pd.DataFrame({
    'x1': x1,
    'x2': x2,
    'x3': x3,
    'x4': x4
})

In [4]:
dag = Dag()
dag.add_node(0)
dag.add_node(1)
dag.add_node(2)
dag.add_node(3)
dag.add_edge(0, 2)
dag.add_edge(1, 2)
dag.add_edge(2, 3)

# create parameters
means = np.array(list(df.mean()))
cov = df.cov().values
params = Parameters(means, cov)

print('means: {}'.format(means))
print('cov: {}'.format(cov))

# create the bayesian belief network
bbn = Bbn(dag, params, max_samples=2000, max_iters=1)

# do the inference
print('inferences')
print(bbn.do_inference())

bbn.set_evidence(2, 1.0)
print(bbn.do_inference())
bbn.clear_evidences()

bbn.set_evidence(3, 0.5)
print(bbn.do_inference())
bbn.clear_evidences()

bbn.set_evidence(0, 0)
bbn.set_evidence(2, 0.5)
print(bbn.do_inference())
bbn.clear_evidences()

means: [0.01277839 1.96783059 1.39369055 1.25516026]
cov: [[ 0.9634615  -0.00742708  0.75799913  0.60274112]
 [-0.00742708  3.91462611  0.67076612  0.52561797]
 [ 0.75799913  0.67076612  1.6807885   1.39650737]
 [ 0.60274112  0.52561797  1.39650737  2.15634038]]
inferences
[0.42325496 0.59982193 1.60298985 1.44507415]
[ 0.05810657 -0.31001876  1.          1.62701067]
[-0.14791773  1.23639539  1.03236433  0.5       ]
[0.         2.28747974 0.5        0.99549776]


## Example with learned parameters and learned structure
This is a simple example of learning the parameters (as before above) and structure using a simple maximum-weight spanning tree algorithm. Note that arc-orientation is simply the order given to it.

```
True model

x1 --> x3 <-- x2
        --> x4

Learned model (arc commission error with x1 --> x2, arc omission error with x2 --> x3)
x1 --> x3 --> x4
--> X2
```

In [5]:
def to_edge_list(df):
    data = []
    corr_matrix = df.corr().values
    num_vars = corr_matrix.shape[0]
    for i in range(num_vars):
        for j in range(num_vars):
            if j > i:
                t = (i, j, abs(corr_matrix[i, j]))
                data.append(t)
    return data

def learn_structure(num_vars, edge_list):
    # add all nodes
    dag = Dag()
    for i in range(num_vars):
        dag.add_node(i)
    
    # add edges using maximum weight spanning tree algorithm
    for edge in edge_list:
        try:
            dag.add_edge(edge[0], edge[1])
        except ValueError:
            pass
        if len(dag.g.edges()) == num_vars - 1:
            break
    
    return dag

In [6]:
edge_list = sorted(to_edge_list(df), key=lambda tup: tup[2], reverse=True)
print(edge_list)

[(2, 3, 0.7335471611852428), (0, 2, 0.5956552726580833), (0, 3, 0.4181720299771823), (1, 2, 0.2614986442653267), (1, 3, 0.18091159589992045), (0, 1, 0.003824331204180586)]


In [7]:
dag = learn_structure(df.shape[1], edge_list)

In [8]:
dag.g.nodes()

NodeView((0, 1, 2, 3))

In [9]:
dag.g.edges()

OutEdgeView([(0, 2), (0, 3), (2, 3)])

In [10]:
# create the bayesian belief network
bbn = Bbn(dag, params, max_samples=1000, max_iters=10)

# do the inference
print('inferences')
print(bbn.do_inference())
print(bbn.do_inference())

bbn.set_evidence(2, 1.0)
print(bbn.do_inference())
print(bbn.do_inference())
bbn.clear_evidences()

bbn.set_evidence(3, 0.5)
print(bbn.do_inference())
print(bbn.do_inference())
bbn.clear_evidences()

bbn.set_evidence(0, 0)
bbn.set_evidence(2, 0.5)
print(bbn.do_inference())
print(bbn.do_inference())
bbn.clear_evidences()

inferences
[0.15692099 2.02323104 1.20253379 0.68633235]
[0.16455135 2.04344398 1.58716291 1.54695888]
[-0.40148508  1.7410963   1.          0.89846984]
[-0.13812888  1.82528758  1.          1.04290391]
[-0.05246594  1.91217844  0.94784875  0.5       ]
[-0.08299282  1.15588197  0.97860861  0.5       ]
[0.         0.67440068 0.5        0.505206  ]
[0.         1.56564444 0.5        0.50222223]
