# Minimum probability flow

In [1]:
import numpy as np
import theano.tensor as T

## Preparation
To get started we first have to prepare the data to work with. The steps to synthesis the data are as follows:
1. Form a $n \times n$ matrix $W$ that is symmetric with diagonal entries set to zeros.
2. The bias vector is $n \times 1$ that is either set to zero or takes binary inputs.
3. Initialise a vector of size $n$ that has binary entries, $x^{(1)}$, which is used to generate the subsequent sample values.
4. Given $x^{(i)}$, for each row $j$
\begin{align*}
p_{j}^{(i+1)} &= \sigma\left(\sum_{k=1}^{n}w_{jk}x_{k}^{(i)}\right)\\
%x_{j}^{(i+1)}&=\sigma(\tilde{x}_{j}^{(i+1)})
\end{align*}
where $\sigma$ is the sigmoid function, thus produces a row of values between 0 and 1. Each entry of $x_j^{(i+1)}$ is Bernoulli distributed with parameter $p_{j}^{(i+1)}$.
The requirement of being symmetric with diagonal entries set to zero for $W$ is necessary for the data to be 'good'. Shall elaborate on the good later.

In [2]:
# Set dimension of the data vector
n = 16

# Initialize weight matrix that is symmetric and has zero diagonal entries
W = np.triu(np.random.normal(0,1,(n,n)))*(1 - np.eye(n))
# W = np.triu(np.random.rand(n,n))*(1 - np.eye(n))
W = W + np.transpose(W)
# W = 2*W - 1

# Ask Gary if the initialization of the W matrix is between 0 and 1 as I will get a all ones dataset

# To test if W is symmetric
# print (W)
# print ((W == np.transpose(W)).all())

# Bias vector with binary inputs
#b = np.random.randint(2, size = n).reshape(n,-1)
b = np.zeros((1,n)).reshape(n,-1)

# Seed data vector to generate data
x = np.random.randint(2, size = n).reshape(n,-1)

print ('x:',np.transpose(x))
print ('W:',W)
print ('b:',np.transpose(b))

x: [[1 1 0 1 0 1 0 0 1 1 1 0 1 0 1 0]]
W: [[ -0.00000000e+00  -4.88921061e-01  -4.98583517e-01   1.72484638e+00
   -1.21939916e-01   2.76529305e-01   6.50219444e-01  -5.24777873e-01
    6.29830406e-01  -2.02219663e+00   3.40368156e-01   4.91941414e-01
    6.33251718e-01   1.56830887e+00   3.49886099e-01  -1.25343035e+00]
 [ -4.88921061e-01  -0.00000000e+00  -6.11256797e-01   1.24735168e-01
    1.64861297e+00   2.80298761e-01   8.15908746e-01   9.18864243e-01
   -4.13948583e-01   1.60707995e+00  -9.05613893e-01   1.57818182e+00
    1.18801773e+00  -4.62862668e-01   5.02958049e-02  -4.09916520e-01]
 [ -4.98583517e-01  -6.11256797e-01  -0.00000000e+00  -8.35651964e-01
   -8.34370377e-01  -1.76868270e+00   6.12945751e-01   2.82117426e-01
   -8.90130522e-01   6.49506915e-01   9.71738200e-01   1.68027566e+00
   -1.12637783e+00  -6.68212525e-01  -1.46631479e+00   1.35724178e-01]
 [  1.72484638e+00   1.24735168e-01  -8.35651964e-01  -0.00000000e+00
    4.75819061e-01   4.57343774e-01  -1.64343

In [3]:
def sigmoid(x):
    return 1/ (1 + np.exp(-x))

In [4]:
def gen_singledata(x, W, b):
#     print (W.dot(x) + b)
#     print (sigmoid(W.dot(x) + b))
    return np.random.binomial(1,sigmoid(W.dot(x)+b))    

To be done: while looping concantenate the data for every thousand.

In [5]:
def gen_data(x, W, b, n, m):
    """
    Generates n*m data and selects one data for every m data generated and 
    returns a matrix where each row is a dataset.
    Inputs:
    - x: seed data of shape (n,1) (to generate more data).
    - W: a matrix of shape (n,n).
    - b: a bias of shape (n,1).
    - n: (int) number of data samples generated.
    - m: (int) period of each data collected.
    """
    data = np.zeros((n * m + 1,x.shape[0]))
    for i in np.arange(n * m + 1):
        x = gen_singledata(x, W, b)
        data[i,:] = np.transpose(x)
        
    print (data)
    return data[0:n * m + 1:m,:]        

In [6]:
gen_data(x, W, b, 2, 5)

[[ 1.  1.  0.  0.  1.  1.  0.  0.  1.  1.  1.  1.  1.  0.  1.  1.]
 [ 0.  1.  0.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  0.  1.  0.]
 [ 1.  1.  0.  0.  1.  1.  0.  0.  0.  1.  0.  1.  1.  0.  1.  1.]
 [ 0.  1.  0.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  0.  1.  0.]
 [ 1.  1.  0.  0.  1.  1.  1.  0.  0.  1.  0.  1.  1.  0.  1.  1.]
 [ 0.  1.  0.  1.  1.  1.  1.  1.  1.  1.  0.  1.  1.  0.  1.  1.]
 [ 0.  1.  0.  1.  1.  1.  1.  1.  1.  1.  0.  1.  1.  0.  1.  1.]
 [ 0.  1.  0.  0.  1.  1.  0.  1.  1.  0.  0.  1.  1.  0.  1.  1.]
 [ 1.  1.  0.  1.  1.  1.  1.  1.  1.  1.  0.  1.  1.  0.  1.  0.]
 [ 1.  1.  0.  1.  1.  1.  1.  0.  1.  1.  0.  1.  1.  0.  1.  1.]
 [ 0.  1.  0.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  0.  1.  0.]]


array([[ 1.,  1.,  0.,  0.,  1.,  1.,  0.,  0.,  1.,  1.,  1.,  1.,  1.,
         0.,  1.,  1.],
       [ 0.,  1.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,  1.,
         0.,  1.,  1.],
       [ 0.,  1.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
         0.,  1.,  0.]])