# Minimum probability flow

In [1]:
import numpy as np
import theano.tensor as T

## Preparation
To get started we first have to prepare the data to work with. The steps to synthesis the data are as follows:
1. Form a $n \times n$ matrix $W$ that is symmetric with diagonal entries set to zeros.
2. The bias vector is $n \times 1$ that is either set to zero or takes binary inputs.
3. Initialise a vector of size $n$ that has binary entries, $x^{(1)}$, which is used to generate the subsequent sample values.
4. Given $x^{(i)}$, for each row $j$
\begin{align*}
p_{j}^{(i+1)} &= \sigma\left(\sum_{k=1}^{n}w_{jk}x_{k}^{(i)}\right)\\
%x_{j}^{(i+1)}&=\sigma(\tilde{x}_{j}^{(i+1)})
\end{align*}
where $\sigma$ is the sigmoid function, thus produces a row of values between 0 and 1. Each entry of $x_j^{(i+1)}$ is Bernoulli distributed with parameter $p_{j}^{(i+1)}$.
The requirement of being symmetric with diagonal entries set to zero for $W$ is necessary for the data to be 'good'. Shall elaborate on the good later.

In [2]:
# Set dimension of the data vector
n = 16

# Initialize weight matrix that is symmetric and has zero diagonal entries
W = np.triu(np.random.normal(0,1,(n,n)))*(1 - np.eye(n))
# W = np.triu(np.random.rand(n,n))*(1 - np.eye(n))
W = W + np.transpose(W)
# W = 2*W - 1

# Ask Gary if the initialization of the W matrix is between 0 and 1 as I will get a all ones dataset

# To test if W is symmetric
# print (W)
# print ((W == np.transpose(W)).all())

# Bias vector with binary inputs
#b = np.random.randint(2, size = n).reshape(n,-1)
b = np.zeros((1,n)).reshape(n,-1)

# Seed data vector to generate data
x = np.random.randint(2, size = n).reshape(n,-1)

print ('x:',np.transpose(x))
print ('W:',W)
print ('b:',np.transpose(b))

x: [[1 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0]]
W: [[ 0.          0.08627778 -0.36675979  0.50092889 -0.26561384 -0.14560876
  -0.93630841 -0.51907027  1.09377534  0.01521072 -0.06133995  0.50529862
   1.71433541 -0.02057106 -1.28082203 -0.78621307]
 [ 0.08627778  0.          0.15024511 -0.42847049 -1.20601409 -0.69655557
   1.42907881 -0.07268021  1.90756479 -0.10215597 -0.93103294  0.34496104
   0.23342034 -2.21605038  0.20507378 -0.25891111]
 [-0.36675979  0.15024511  0.          0.96332637  0.18515433 -0.25989519
   0.64681005  1.13210109  0.25326655 -2.23926725 -0.38651376  0.86622138
   1.64960995 -0.764915    0.03145557 -1.22298646]
 [ 0.50092889 -0.42847049  0.96332637  0.          1.63987737 -2.20182153
  -0.13619547 -1.40377921 -1.22714478 -0.87737019 -1.21929998  0.4143886
   0.49816448 -0.9360532   0.53298873 -0.28391474]
 [-0.26561384 -1.20601409  0.18515433  1.63987737 -0.         -0.75869165
   1.27392342  1.27430104 -1.15168162  0.99722844 -0.11249396  1.13080055
  -0.51888175  

In [3]:
def sigmoid(x):
    return 1/ (1 + np.exp(-x))

In [4]:
def gen_singledata(x, W, b):
#     print (W.dot(x) + b)
#     print (sigmoid(W.dot(x) + b))
    return np.random.binomial(1,sigmoid(W.dot(x)+b))    

To be done: while looping concantenate the data for every thousand.

In [19]:
def gen_data(x, W, b, n, m):
    """
    Generates n*m data and selects one data for every m data generated and 
    returns a matrix where each row is a dataset.
    Inputs:
    - x: seed data of shape (n,1) (to generate more data).
    - W: a matrix of shape (n,n).
    - b: a bias of shape (n,1).
    - n: (int) number of data samples generated.
    - m: (int) period of each data collected.
    """
    data = np.zeros((n * m,x.shape[0]))
    for i in np.arange(n * m):
        x = gen_singledata(x, W, b)
        data[i,:] = np.transpose(x)
        
    print (data.shape)
    return data[0:n * m :m,:]        

In [32]:
data = gen_data(x, W, b, 3, 50)
print (data[-1])

(150, 16)
[ 1.  1.  1.  0.  0.  0.  1.  1.  0.  1.  0.  0.  1.  0.  0.  1.]


In [None]:
data[:]

In [23]:
data.shape

(3, 16)

In [16]:
for i in np.arange(10):
    print (data[i,:])

[ 1.  1.  0.  1.  0.  0.  0.  1.  1.  1.  0.  0.  0.  1.  0.  1.]
[ 1.  1.  1.  1.  1.  0.  0.  0.  0.  0.  0.  1.  1.  0.  0.  1.]
[ 1.  1.  1.  0.  1.  0.  0.  1.  0.  1.  1.  1.  1.  0.  0.  1.]
[ 0.  1.  1.  0.  1.  0.  0.  1.  1.  1.  0.  0.  1.  0.  0.  0.]
[ 1.  1.  1.  1.  1.  0.  1.  1.  1.  1.  1.  1.  1.  0.  0.  0.]
[ 1.  1.  1.  0.  0.  0.  0.  1.  1.  1.  0.  0.  1.  0.  0.  1.]
[ 1.  1.  1.  1.  1.  0.  0.  1.  0.  1.  1.  1.  1.  0.  0.  1.]
[ 1.  0.  0.  1.  1.  0.  0.  1.  1.  0.  0.  1.  1.  0.  0.  0.]
[ 1.  1.  1.  0.  1.  0.  0.  1.  0.  1.  0.  0.  1.  0.  0.  1.]
[ 0.  0.  0.  0.  1.  1.  0.  1.  1.  1.  1.  0.  1.  1.  0.  0.]


In [26]:
t = np.arange(16).reshape(4,4)

In [30]:
t

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [31]:
t[-1]

array([12, 13, 14, 15])