# Random Bayesian Network Generator

Author: [GA WU](mailto:wuga@mie.utoronto.ca), D3M Lab, MIE, University of Toronto

A Bayesian network, Bayes network, belief network, Bayes(ian) model or probabilistic directed acyclic graphical model is a probabilistic graphical model (a type of statistical model) that represents a set of random variables and their conditional dependencies via a directed acyclic graph (DAG).[Cite](https://en.wikipedia.org/wiki/Bayesian_network)

This code generates random Bayesian Network in [json format](http://www.json.org/), which corresponding to the import format of python package [libpgm](http://pythonhosted.org/libpgm/).

## Limitation
The current code only support Bayesian Network with following property:
1. Binary Variables(*)
2. Multiple Parents nodes
3. Single Child node(*)
4. Arbitrary number of nodes
Note: the stars are big limitation, please if it fits your requirement

## Instruction
Calling this module is very simple. You only need to instantiate one instance of BayesNet class and provide number of variables of the target Bayesian Network.

In [1]:
# bn = BayesNet(20): will generate a random bayes network with 20 variables.
# print bn.stream(): will print the json format on console screen
# bn.writeStream('data/bn/new_bn.txt'): will write json file to the target directory

## Packages
The following list of packages are more than necessary. We put them here to remind ourselves of possible extension direction. Feel free to remove unnecessary package to lighten your work.

In [2]:
import os
import json
import sys
import numpy as np
import pandas as pd
import string
import unicodedata
from numpy import genfromtxt
from tqdm import tqdm
import random 
import copy
import itertools

#Load PGM library
from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.lgbayesiannetwork import LGBayesianNetwork
from libpgm.hybayesiannetwork import HyBayesianNetwork
from libpgm.dyndiscbayesiannetwork import DynDiscBayesianNetwork
from libpgm.tablecpdfactorization import TableCPDFactorization
from libpgm.sampleaggregator import SampleAggregator
from libpgm.pgmlearner import PGMLearner


## Node 
One unit of bayesian network. This class represents the property of single node:
1. Parents list
2. Children list
3. Conditional probabilities

In [3]:
class Node():
    def __init__(self,name):
        self.name = name
        self.conds = {}
        
    def setParents(self, parents):
        self.parents = parents
        
    def setChild(self,child):
        self.child =child
        
    def setRandConds(self):
        if not self.parents:
            p = round(random.uniform(0.1,0.9), 5)
            self.conds["No_Parents"] = [round(p,5), round(1-p,5)]
        else:
            combinations = ["".join(seq) for seq in itertools.product("01", repeat=len(self.parents))]
            for c in combinations:
                p = round(random.uniform(0.1,0.9), 5)
                self.conds[c] = [round(p,5), round(1-p,5)]
                
    def getParents(self):
        return self.parents
    
    def getChild(self):
        return self.child
    
    def getConds(self):
        return self.conds

## Bayesian Network
Please do not modify any of the following code. The libpgm is very picky! Any small changes could cause error in libpgm side.

In [4]:
class BayesNet():
    def __init__(self, num):
        self.Vdata = []
        self.V = []
        self.E = []
        vars = self._createVariables(num)
        self._createConnections(vars)
        self._createVdata(vars)
        
    def _createVariables(self,num):
        vars = []
        for i in range(num):
            vars.append('Binary_Variable_'+format(i,'03d'))
        self.neighbors = {}
        for v in vars:
            self.neighbors[v] = []
        return vars
        
    def _createConnections(self,vars):
        connected = []
        edges = []
        e = random.sample(vars, 2)
        connected.append(e[0])
        self.neighbors.get(e[0]).append(e[1])
        connected.append(e[1])
        self.neighbors.get(e[1]).append(e[0])
        edges.append(e)
        for v in vars:
            if v not in connected:
                c_index = random.randint(0, len(connected)-1)
                u = connected[c_index]
                edges.append([v,u])
                connected.append(v)
                self.neighbors.get(v).append(u)
                self.neighbors.get(u).append(v)
        
   
    def _createVdata(self,vars):
        todo = copy.deepcopy(vars)
        done = []
        #for i in range(100):
        while len(todo)!=0:
            index = random.randint(0, len(todo)-1)
            v = todo[index]
            parents = list(set(done) & set(self.neighbors[v]))
            child = list(set(self.neighbors[v]).difference(parents))
            if len(child)<=1:
                node = Node(v)
                node.setParents(parents)
                node.setChild(child)
                node.setRandConds()
                todo.remove(v)
                done.append(v)
                self.V.append(v)
                self.Vdata.append(node)
                if parents:
                    for p in parents:
                        self.E.append([p,v])
    
    def printNodes(self):
        for v in self.Vdata:
            print v.name
            print v.conds
            
    def _vertexStream(self):
        return '\t"V": {0}'.format(self.V).replace("'",'"')
    
    def _edgeStream(self):
        return '\t"E": {0}'.format(self.E).replace("'",'"').replace("],","],\n\t\t")
            
    def _nodeStream(self):
        NUMEOUTCOMES = 2 #Binary ONLY
        vnodes = []
        for i,v in enumerate(self.Vdata):
            order = '"ord":{0}'.format(i)
            numoutcomes = '"numoutcomes": {0}'.format(NUMEOUTCOMES)
            outcome = '"vals": ["Neg_{0}", "Pos_{0}"]'.format(v.name)
            if not v.parents:
                parents = '"parents": null'
                cprob = '"cprob":{0}'.format(v.conds.get("No_Parents"))
            else:
                parents = '"parents": '+str(v.parents).replace("'",'"')
                combs = []
                for key in v.conds.keys():
                    key_list =list(key)
                    comb_list = []
                    for j,b in enumerate(key_list):
                        if b == '0':
                            comb_list.append("'Neg_{0}'".format(v.parents[j]))
                        else:
                            comb_list.append("'Pos_{0}'".format(v.parents[j]))
                    comb_name = ', '.join(comb_list)
                    combs.append('"[{0}]":{1}'.format(comb_name,v.conds.get(key)))
                cprob = '"cprob":{{\n\t\t\t\t{0}\n\t\t\t\t}}'.format(',\n\t\t\t\t'.join(combs))
                
            if not v.child:
                children = '"children": null'
            else:
                children = '"children": '+str(v.child).replace("'",'"')
            
            components = [order, numoutcomes, outcome, parents, children, cprob]
            vnodes.append('"{0}":{{\n\t\t\t{1}\n\t\t\t}}'.format(v.name, ",\n\t\t\t".join(components)))
        vdata = '\t"Vdata":{{\n\t\t{0}\n\t\t}}'.format(',\n\t\t'.join(vnodes))
        return vdata
            
    def stream(self):
        vertex = self._vertexStream()
        edge = self._edgeStream()
        node = self._nodeStream()
        components = [vertex,edge,node]
        stream = '{{\n{0}\n}}'.format(',\n'.join(components))
        return stream
    
    def writeStream(self,path):
        with open(path, "w") as text_file:
            text_file.write(self.stream())
        print "Done!"

In [5]:
bn = BayesNet(100)

In [6]:
print bn.stream()

{
	"V": ["Binary_Variable_077", "Binary_Variable_070", "Binary_Variable_083", "Binary_Variable_095", "Binary_Variable_020", "Binary_Variable_097", "Binary_Variable_032", "Binary_Variable_075", "Binary_Variable_033", "Binary_Variable_071", "Binary_Variable_088", "Binary_Variable_090", "Binary_Variable_073", "Binary_Variable_086", "Binary_Variable_022", "Binary_Variable_092", "Binary_Variable_054", "Binary_Variable_053", "Binary_Variable_061", "Binary_Variable_087", "Binary_Variable_078", "Binary_Variable_093", "Binary_Variable_081", "Binary_Variable_001", "Binary_Variable_059", "Binary_Variable_030", "Binary_Variable_043", "Binary_Variable_048", "Binary_Variable_096", "Binary_Variable_049", "Binary_Variable_068", "Binary_Variable_072", "Binary_Variable_056", "Binary_Variable_062", "Binary_Variable_042", "Binary_Variable_098", "Binary_Variable_028", "Binary_Variable_046", "Binary_Variable_057", "Binary_Variable_040", "Binary_Variable_076", "Binary_Variable_069", "Binary_Variable_029", "B

In [7]:
bn.writeStream('data/bn/new_bn.txt')

Done!
