# Samples to Arithmetic Circuits

Author: [GA WU](mailto:wuga@mie.utoronto.ca), D3M Lab, MIE, University of Toronto
    
This code try to solve such a problem that how to generate Arithmetic Circuits from sampled Data. The code has following components:
1. Reveal the conditional probabilities of data through Deep Learning
2. Transfer learned network to valid AC format
3. Experiments with batch AC conversion

## Related toolbox
[Random Bayes Network Generator](https://github.com/wuga214/TOOLBOX-Random-Bayes-Net-Generator)

## Package

In [1]:
import os
import json
import sys
import numpy as np
import pandas as pd
import string
import unicodedata
from numpy import genfromtxt
from tqdm import tqdm
import random 

#Load PGM library
from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.lgbayesiannetwork import LGBayesianNetwork
from libpgm.hybayesiannetwork import HyBayesianNetwork
from libpgm.dyndiscbayesiannetwork import DynDiscBayesianNetwork
from libpgm.tablecpdfactorization import TableCPDFactorization
from libpgm.sampleaggregator import SampleAggregator
from libpgm.pgmlearner import PGMLearner

#Keras
from keras.layers import Dense, Activation,Dropout
from keras.models import Sequential
from keras.regularizers import l1,l2
from keras.models import load_model

Using TensorFlow backend.


In [2]:
def PathFinder(path):
    script_dir = os.path.dirname('__file__')
    fullpath = os.path.join(script_dir,path)
    return fullpath

def ReadData(path,pandas=False):
    fullpath=PathFinder(path)
    if not pandas:
        return genfromtxt(fullpath, delimiter=',')
    else:
        return pd.read_csv(fullpath)

In [3]:
class FullyConnectedNetwork(object):
    """
        Single hidden layer network
    """
    
    def __init__(self,input_shape,hidden_layer,hidden_shape,output_shape):
#         Layer1 = Sequential([Dense(hidden_shape, input_dim=input_shape), Activation('relu')])
#         Layer2 = Sequential([Dense(output_shape, input_dim=hidden_shape), Activation('linear')])
        self.network = Sequential()
        itermedian_shape = input_shape
        for i in range(hidden_layer):
            layer = Dense(hidden_shape, input_dim=itermedian_shape)
            activation = Activation('relu')
            itermedian_shape = hidden_shape
            self.network.add(layer)
            self.network.add(activation)
        layer = Dense(output_shape, input_dim=hidden_shape)
        activation = Activation('linear')
        self.network.add(layer)
        self.network.add(activation)
        self.network.compile(optimizer='rmsprop', loss="binary_crossentropy")
    
    def train(self,parents,child, epoch=100):
        self.network.fit(parents, child, nb_epoch=epoch,verbose=0)

    def test(self,parents):
        return self.network.predict(parents, verbose=0)
    
    def loadModel(self,modelpath):
        self.network=load_model(modelpath)
    
    def saveModel(self,modelpath):
        self.network.save(modelpath)
        
    def getLayers(self):
        return self.network.layers

In [4]:
class NetToAC():
    """
    Assuming the nonlinear activations are all relu include the last layer
    """
    def __init__(self, parents_names, child_name, varindex, layers, offset = 0):
        self._layers = layers
        self._num_layers = len(layers)
        self._input_dim=layers[0].get_config().get('input_dim')
        self.ac={}
        self.nodeindex={}
        self.varindex = varindex
        self._ac_index = offset
        self._createAC(parents_names,child_name)
        
    def _getACIndex(self):
        output = self._ac_index
        self._ac_index = self._ac_index+1
        return output
            
    def _createAC(self,parents_names,child_name):
        #Bianry AC
        self.ac['zero'] ='n 0'
        self.nodeindex['zero'] = self._getACIndex()
        self.ac['one'] ='n 1'
        self.nodeindex['one'] = self._getACIndex()
        for i,p in enumerate(parents_names):
            self.ac[p] = 'v_'+self.varindex[i]+' one zero'
            self.nodeindex[p] = self._getACIndex()
        
        intern_names = parents_names
            
        #Layerwise Conversion
        for layer_index,layer in enumerate(self._layers):
            if type(layer) is Dense:
                intern_names = self._layerwiseTransform(layer_index,intern_names,layer)
                
        #Adding Negative branch of output node
        assert len(intern_names) == 1, 'Single binary output only!'
        neg_output_name = 'Nega_output'
        self.ac[neg_output_name] = '- one '+str(intern_names[0])
        self.nodeindex[neg_output_name] = self._getACIndex()
        self.ac[child_name+'_Pos'] = 'v_'+self.varindex[-1]+' one zero'
        self.nodeindex[child_name+'_Pos'] = self._getACIndex()
        self.ac[child_name+'_Neg'] = 'v_'+self.varindex[-1]+' zero one'
        self.nodeindex[child_name+'_Neg'] = self._getACIndex()
        apply_trick_pos = 'Apply_trick_pos'
        apply_trick_neg = 'Apply_trick_neg'
        self.ac[apply_trick_pos] = '* '+intern_names[0]+' '+child_name+'_Pos'
        self.nodeindex[apply_trick_pos] = self._getACIndex()
        self.ac[apply_trick_neg] = '* '+neg_output_name+' '+child_name+'_Neg'
        self.nodeindex[apply_trick_neg] = self._getACIndex()
        self.ac['final'] = '+ '+apply_trick_pos+' '+apply_trick_neg
        self.nodeindex['final'] = self._getACIndex()
        
        
            
    def _layerwiseTransform(self,layer_index,parents_names,layer):
        input_dim,output_dim=layer.get_weights()[0].shape
        weights_bias=layer.get_weights()
        weights=weights_bias[0]
        bias=weights_bias[1]
        
        #Initialize layer output variables as a list of names
        children_names = []
        for output_index in range(output_dim):
            children_names.append('H_'+format(layer_index,'03d')+'_'+format(output_index,'03d'))
        
        #Outputwise Conversion
        for output_index, output_name in enumerate(children_names):
            weights_vector = weights[:,output_index]
            bias_scalar = bias[output_index]
            self._outputwiseTransform(layer_index,output_index,parents_names,\
                                      output_name,weights_vector,bias_scalar)
            
        return children_names
    
    def _outputwiseTransform(self,layer_index,output_index,parents_names,\
                             output_name,weights_vector,bias_scalar):
        #Create weight node(Constant)
        for input_index in range(len(weights_vector)):
            weight_name = 'W_'+format(layer_index,'03d')+'_'+format(input_index,'03d')+'_'+format(output_index,'03d')           
            self.ac[weight_name] = 'n '+str(weights_vector[input_index])
            self.nodeindex[weight_name] = self._getACIndex()
            
        #Create bias(Constant)
        bias_name = 'B_'+format(layer_index,'03d')+'_'+format(output_index,'03d')
        self.ac[bias_name] = 'n '+str(bias_scalar)
        self.nodeindex[bias_name] = self._getACIndex()
        
        #Create multiplication node(Operation)
        for input_index in range(len(weights_vector)):
            mul_name = 'M_'+format(layer_index,'03d')+'_'+format(input_index,'03d')+'_'+format(output_index,'03d')
            weight_name = 'W_'+format(layer_index,'03d')+'_'+format(input_index,'03d')+'_'+format(output_index,'03d')
            self.ac[mul_name] = '* '+ parents_names[input_index] +' '+weight_name #w*x
            self.nodeindex[mul_name] = self._getACIndex()
        
        #Pairwise summansion node(Operation)
        sum_prev = ''
        for sum_index in range(len(weights_vector)-1):
            sum_name = 'S_'+format(layer_index,'03d')+'_'+format(output_index,'03d')+'_'+format(sum_index,'03d')
            if sum_prev =='':
                mul_name1 = 'M_'+format(layer_index,'03d')+'_'+format(sum_index,'03d')+'_'+format(output_index,'03d')
                mul_name2 = 'M_'+format(layer_index,'03d')+'_'+format(sum_index+1,'03d')+'_'+format(output_index,'03d')
                self.ac[sum_name] = '+ '+mul_name1+' '+mul_name2
                self.nodeindex[sum_name] = self._getACIndex()
            else:
                mul_name = 'M_'+format(layer_index,'03d')+'_'+format(sum_index+1,'03d')+'_'+format(output_index,'03d')
                self.ac[sum_name] = '+ '+sum_prev+' '+mul_name
                self.nodeindex[sum_name] = self._getACIndex()
            #Record revious operation name    
            sum_prev = sum_name
        
        sum_bias_name = 'S_'+format(layer_index,'03d')+'_'+format(output_index,'03d')+'_Bias'
        if sum_prev =='':
            mul_name = 'M_'+format(layer_index,'03d')+'_'+format(0,'03d')+'_'+format(output_index,'03d')
            self.ac[sum_bias_name] = '+ '+mul_name+' '+bias_name
            self.nodeindex[sum_bias_name] = self._getACIndex()
        else:
            self.ac[sum_bias_name] = '+ '+sum_prev+' '+bias_name
            self.nodeindex[sum_bias_name] = self._getACIndex() 
        
        #Nonlinear Activation node is the output node name
        self.ac[output_name] = 'max '+sum_bias_name+' 0'     
        self.nodeindex[output_name] = self._getACIndex()
    
    def _convertACtoString(self):
        ac_string = ""
        for key,value in self.ac.iteritems():
            ac_string = ac_string + key+':'+str(value)+'\n'
        return ac_string
    
    def getACStream(self):
        ac_string = self._convertACtoString()
        # Replace Dummy Names by Indecs
        for key,value in self.nodeindex.iteritems():
            ac_string = ac_string.replace(key, str(value))
        ac_list = ac_string.split('\n')
        # Remove empty line
        del ac_list[-1]
        ac_pandas = pd.DataFrame(columns = ('Index','AC'))
        for i,line in enumerate(ac_list):
            ac_pandas.loc[i] = line.split(':')
        ac_pandas[['Index']] = ac_pandas[['Index']].astype(int)
        ac_pandas = ac_pandas.sort_values('Index', ascending=True).set_index('Index')
        ac_stream = ac_pandas.to_csv(index = False, header=False)
        return ac_stream,self._getACIndex()

In [5]:
samplepath = 'data/bn/samples.txt'
df = ReadData(samplepath,pandas = True)
df.head()

Unnamed: 0,Binary_Variable_088,Binary_Variable_089,Binary_Variable_084,Binary_Variable_085,Binary_Variable_086,Binary_Variable_087,Binary_Variable_080,Binary_Variable_081,Binary_Variable_082,Binary_Variable_083,...,Binary_Variable_062,Binary_Variable_063,Binary_Variable_060,Binary_Variable_061,Binary_Variable_066,Binary_Variable_067,Binary_Variable_064,Binary_Variable_065,Binary_Variable_068,Binary_Variable_069
0,0,1,0,1,0,1,0,1,1,0,...,1,1,1,1,1,1,1,1,1,1
1,0,1,1,1,1,0,0,1,1,1,...,1,1,0,0,1,0,1,1,1,1
2,0,0,0,1,1,0,0,1,1,1,...,0,1,1,0,1,1,0,1,1,1
3,1,0,1,1,0,0,1,1,0,0,...,0,1,0,0,1,0,0,0,0,1
4,1,1,1,0,0,1,1,1,0,1,...,0,1,0,0,0,1,0,0,1,0


In [6]:
# sample_path = 'data/bn/samples.txt'
# bn_path = 'data/bn/new_bn.txt'
# conv = EntireSamplestoAC(sample_path,bn_path)
# conv.experiment()

In [8]:
parents_names = ['Binary_Variable_088','Binary_Variable_089']
child_name = 'Binary_Variable_084'
varindex =['088','089','084']

#Train Network
parents = df[parents_names].as_matrix()
child = df[[child_name]].as_matrix()
dnn = FullyConnectedNetwork(2,2,4,1)
dnn.train(parents,child)
layers = dnn.getLayers()

#Convert
conv = NetToAC(parents_names,child_name,varindex,layers)
ac_stream,_ = conv.getACStream()
print ac_stream

n 0
n 1
v_088 1 0
v_089 1 0
n 0.447128
n -0.0490709
n -0.00602626
* 2 4
* 3 5
+ 7 8
+ 9 6
max 10 0
n -0.761312
n 0.506863
n -0.00033015
* 2 12
* 3 13
+ 15 16
+ 17 14
max 18 0
n 0.516567
n -0.0524704
n 0.0205706
* 2 20
* 3 21
+ 23 24
+ 25 22
max 26 0
n -0.80021
n 0.177098
n -0.0135724
* 2 28
* 3 29
+ 31 32
+ 33 30
max 34 0
n -0.0948203
n -0.0165872
n -0.794004
n 0.135369
n -0.0173507
* 11 36
* 19 37
* 27 38
* 35 39
+ 41 42
+ 45 43
+ 46 44
+ 47 40
max 48 0
n -0.045701
n -0.748889
n 0.822276
n -0.832085
n 0.0183679
* 11 50
* 19 51
* 27 52
* 35 53
+ 55 56
+ 59 57
+ 60 58
+ 61 54
max 62 0
n 0.109205
n -0.693105
n 0.239726
n 0.0751531
n 0.0247009
* 11 64
* 19 65
* 27 66
* 35 67
+ 69 70
+ 73 71
+ 74 72
+ 75 68
max 76 0
n 0.297561
n -0.542606
n 0.603823
n -0.457084
n 0.0282269
* 11 78
* 19 79
* 27 80
* 35 81
+ 83 84
+ 87 85
+ 88 86
+ 89 82
max 90 0
n -0.637008
n -0.920457
n 0.422779
n 0.681015
n 0.288339
* 49 92
* 63 93
* 77 94
* 91 95
+ 97 98
+ 101 99
+ 102 100
+ 103 96
max 104 0
- 1 105
v_08