In [1]:
import numpy as np
import theano
import theano.tensor as T

import timeit

In [2]:
def load_data(dataset = 'gibbs-sample.dat.npy', borrow = True):
    """
    Loads the dataset.
    """
    data = np.load(dataset)
    dataset = theano.shared(np.asarray(data, dtype = theano.config.floatX), borrow = borrow)
        
    return dataset   

In [46]:
class mpf(object):
    """
    Minimum probability flow
    """
    def __init__(self, input, n, epsilon = 0.01):
        """
        Initialize the parameters of the MPF network
        """
        
        self.W = theano.shared(value = np.zeros((n, n), dtype = theano.config.floatX), name = 'W', borrow = True)
        self.b = theano.shared(value = np.zeros((1, n), dtype = theano.config.floatX), name = 'b', borrow = True)
        self.z = T.dot(input, self.W) + self.b
        
        self.params = [self.W, self.b]
        
        self.input = input
       
    
    def Kcost(self, epsilon = 0.01):
        """
        Returns the cost.
        Inputs:
        - data: numpy array of data that contains the different states of the network of size n
        - W: (n, n) numpy array of the weight matrix
        - b: (1, n) numpy array of biases
        - espilon: parameter for the cost
        """
#         return T.mean(T.exp((0.5 - self.input) * self.z)) * epsilon
        return (0.5 - self.input) * self.z
        
# PROBLEM IS THE ABOVE RETURN LINE!!!!!!
    

def sgd_opt(learning_rate = 0.01, n_epochs = 50, dataset = 'gibbs-sample.dat.npy', batch_size = 500):
    """
    Perform stochastic gradient descent on MPF.

    """
    print ('Loading the file', dataset+'...')
    
    dataset = load_data()

    n_dataset_batches = dataset.get_value(borrow = True).shape[0] // batch_size

    print ('Building the model...')

    index = T.lscalar()

    x = T.matrix('x')

    energy = mpf(input = x, n = 16)

    cost = energy.Kcost()

    grad_W = T.grad(cost = cost, wrt = energy.W)
    grad_b = T.grad(cost = cost, wrt = energy.b)

    updates = [(energy.W, energy.W - learning_rate * grad_W), (energy.b, energy.b - learning_rate * grad_b)]

    train_model = theano.function(inputs = [index],
                                outputs = cost,
                                updates = updates,
                                givens = {x: dataset[index * batch_size: (index + 1) * batch_size, :]} )

    print ('Training the model...')

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_dataset_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            print (minibatch_avg_cost)
#                 iter = (epoch - 1) * n_dataset_batches + minibatch_index
                
#                 if (iter + 1) % validation_frequency == 0:
        
        
#         patience = 5000
#         patience_increase = 2
#         improvement_threshold = 0.995
#         validation_frequency = min(n_dataset_batches, patience // 2)
        
#         best_validation_lost = np.inf
#         test_score = 0.
#         start_time = timeit.default_timer()
        
#         done_looping = False
#         epoch = 0
#         while (epoch < n_epoch) and (not done_looping):
#             epoch = epoch + 1
#             for minibatch_index in range(n_dataset_batches):
#                 minibatch_avg_cost = train_model(minibatch_index)
#                 iter = (epoch - 1) * n_dataset_batches + minibatch_index
                
#                 if (iter + 1) % validation_frequency == 0:
                    
        
        
        
        
        
        
        

In [10]:
dataset = load_data()

In [47]:
index = T.lscalar()

x = T.matrix('x')

learning_rate = 0.01
batch_size = 500

energy = mpf(input = x, n = 16)
cost = energy.Kcost()

grad_W = T.grad(cost = cost, wrt = energy.W)
grad_b = T.grad(cost = cost, wrt = energy.b)

updates = [(energy.W, energy.W - learning_rate * grad_W), (energy.b, energy.b - learning_rate * grad_b)]

train_model = theano.function(inputs = [index],
                            outputs = cost,
                            updates = updates,
                            givens = {x: dataset[index * batch_size: (index + 1) * batch_size, :]} )

n_dataset_batches = dataset.get_value(borrow = True).shape[0] // batch_size


for minibatch_index in range(n_dataset_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            print (minibatch_avg_cost)


TypeError: cost must be a scalar.

In [24]:
a = np.arange(4)
print (a)

[0 1 2 3]


In [25]:
b = np.arange(16).reshape(4,4)
print (b)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


In [26]:
np.dot(a,b)

array([56, 62, 68, 74])

In [27]:
a * b

array([[ 0,  1,  4,  9],
       [ 0,  5, 12, 21],
       [ 0,  9, 20, 33],
       [ 0, 13, 28, 45]])

In [39]:
class test(object):
    def __init__(self, a, b):
        self.a = np.arange(a**2).reshape(a,a)
        self.b = np.arange(b)
    
    def cost(self):
        return self.a * self.b

In [40]:
t = test(3,3)

In [42]:
t.a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [43]:
t.b

array([0, 1, 2])

In [41]:
t.cost()

array([[ 0,  1,  4],
       [ 0,  4, 10],
       [ 0,  7, 16]])