## MPF training

Below is my test code to use MPF training for a fully-connected Boltzmann machine with 32 units with no GPU consideration.

In [1]:
import numpy as np
import theano
import theano.tensor as T
import matplotlib.pyplot as plt

import os
import timeit

In [2]:
def load_data(dataset = '32-50K.npy', borrow = True):
    """
    Loads the dataset.
    """

    data = np.load(dataset)
    dataset = theano.shared(np.asarray(data, dtype = theano.config.floatX), borrow = borrow)

    return dataset

In [3]:
class mpf(object):
    """
    Minimum probability flow
    """
    
    def __init__(self, input = None, n = 16, temperature = 1,  W = None, b = None):
        
        self.n = n
        self.temperature = temperature
        U = np.random.rand(self.n, self.n)
        R = 0.5 * (U + U.T)
        np.fill_diagonal(R, 0)

        if not W:
            initial_W = np.asarray(R, dtype = theano.config.floatX)
            W = theano.shared(initial_W, name = 'W', borrow = True)

        if not b:
            initial_b = np.asarray(np.random.rand(n), dtype = theano.config.floatX)
            b = theano.shared(initial_b, name = 'b', borrow = True)


        self.W = W
        self.b = b
        
        if input is None:
            self.x = T.dmatrix(name = 'input')
        else:
            self.x = input

        self.params = [self.W, self.b]
    
#     def energy(): not sure if to have energy
    
    def cost(self, lr = 1e-2, epsilon = 1):
        """
        Returns cost when using vanilla SGD.
        """
        
        print (50 * '=')
        print (20 * '#' + 'MPF' + 20 * '#')
        print (50 * '=')
        print ('Input size: {0}'.format(self.n))
#         print ('Optimizer: {0}'.format('vanilla'))
        print ('Learning temperature: {0}'.format(self.temperature))
        print ('Learning rate: {0}'.format(lr))
#         print (50 * '=')
        
#         cost =  epsilon * T.mean(T.exp((0.5 - self.x) * (T.dot(self.x, T.fill_diagonal(self.W, 0)) + self.b)))
        cost =  epsilon * T.exp((0.5 - self.x) * (T.dot(self.x, T.fill_diagonal(self.W, 0)) + self.b) / self.temperature)
        gparams = T.grad(cost, self.params)

        updates = [(param, param - learning_rate * gparam) \
        for param, gparam in zip(self.params, gparams)]

        return cost, updates

In [None]:
def trainmpf(units = 32, lr = 1e-3, epsilon = 1, n_epochs = 1000,
             batchsize = 16, temperature = 1, sample = '32-50.npy'):
    """
    Trains parameters using MPF.
    """
    
    index = T.lscalar()
    x = T.matrix('x')
    flow = mpf(input = x, n = units, temperature = temperature)
    
    dataset = load_data(sample)
    print ('Sample used: {0}'.format(sample))
    print ('=' * 50)
    
    mpf_cost = theano.function(inputs = [index], outputs = cost, updates = updates, \
                                givens = {x: dataset[index * batch_size: (index + 1) * batch_size]})
    
    for epoch in range(n_epochs):
        c = []
        current_time = timeit.default_timer()
        for batch_index in range(n_dataset_batches):
            c.append(mpf_cost(batch_index))
