# feed forward NN with one hidden layer

In [1]:
import os
import sys
import timeit

import numpy

import theano
import theano.tensor as T
from theano.tensor.signal import pool
from theano.tensor.nnet import conv2d
from logistic_sgd import LogisticRegression, load_data
#from mlp import HiddenLayer

Using gpu device 0: TITAN X (Pascal) (CNMeM is disabled, cuDNN 5105)


In [2]:
class HiddenLayer(object):
    def __init__(self, rng, input, n_in, n_out, activation=T.nnet.relu):
        self.input = input
        self.mu = theano.shared(value = 0.0, name='mu')
        self.sigma_sqr = theano.shared(value = 1.0, name='sigma_sqr')
        sigma = numpy.sqrt(float(self.sigma_sqr.get_value()))
        self.n_in = n_in
        self.n_out = n_out
        temp = rng.normal(float(self.mu.get_value()),sigma,size=(self.n_in, self.n_out))
        W_values = numpy.asarray(temp,dtype=theano.config.floatX)
        self.W = theano.shared(value=W_values, name='W')
        lin_output = T.dot(input, self.W)
        self.output = activation(lin_output)
    def generate_W(self, rng):
        sigma = numpy.sqrt(float(self.sigma_sqr.get_value()))
        temp = rng.normal(float(self.mu.get_value()),sigma,size=(self.n_in, self.n_out))
        W_values = numpy.asarray(temp,dtype=theano.config.floatX)
        self.W.set_value(W_values)

In [3]:
batch_size = 1000
rng = numpy.random.RandomState(2345)
datasets = load_data('mnist.pkl.gz')
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0]
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
n_test_batches = test_set_x.get_value(borrow=True).shape[0]
n_train_batches //= batch_size
n_valid_batches //= batch_size
n_test_batches //= batch_size
# allocate symbolic variables for the data
index = T.lscalar()  # index to a [mini]batch
# start-snippet-1
x = T.matrix('x')   # the data is presented as rasterized images
y = T.ivector('y')  # the labels are presented as 1D vector of
print('... building the model')

... loading data
... building the model


In [4]:
layer0_input = x.reshape((batch_size, 784))
layer0 = HiddenLayer(rng,input=layer0_input,n_in = 784,n_out = 500)
layer1 = LogisticRegression(input = layer0.output, n_in=500, n_out=10)
cost = layer1.negative_log_likelihood(y)
learning_rate = 0.01
n_epochs = 200

# train full param

In [35]:
test_model = theano.function(
    [index],
    layer1.errors(y),
    givens={
        x: test_set_x[index * batch_size: (index + 1) * batch_size],
        y: test_set_y[index * batch_size: (index + 1) * batch_size]
    }
)

validate_model = theano.function(
    [index],
    layer1.errors(y),
    givens={
        x: valid_set_x[index * batch_size: (index + 1) * batch_size],
        y: valid_set_y[index * batch_size: (index + 1) * batch_size]
    }
)

params = layer1.params + [layer0.W]
grads = T.grad(cost, params)
updates = [
    (param_i, param_i - learning_rate * grad_i)
    for param_i, grad_i in zip(params, grads)
]
train_model = theano.function(
    [index],
    cost,
    updates=updates,
    givens={
        x: train_set_x[index * batch_size: (index + 1) * batch_size],
        y: train_set_y[index * batch_size: (index + 1) * batch_size]
    }
)

In [42]:
validation_frequency = n_train_batches
best_validation_loss = numpy.inf
best_iter = 0
test_score = 0.
start_time = timeit.default_timer()

epoch = 0
done_looping = False

while (epoch < n_epochs) and (not done_looping):
    epoch = epoch + 1
    for minibatch_index in range(n_train_batches):
        iter = (epoch - 1) * n_train_batches + minibatch_index

        if iter % 100 == 0:
            print('training @ iter = ', iter)
        cost_ij = train_model(minibatch_index)

        if (iter + 1) % validation_frequency == 0:

            # compute zero-one loss on validation set
            validation_losses = [validate_model(i) for i
                                 in range(n_valid_batches)]
            this_validation_loss = numpy.mean(validation_losses)
            print('epoch %i, minibatch %i/%i, validation error %f %%' %
                  (epoch, minibatch_index + 1, n_train_batches,
                   this_validation_loss * 100.))

            # if we got the best validation score until now
            if this_validation_loss < best_validation_loss:
                # save best validation score and iteration number
                best_validation_loss = this_validation_loss
                best_iter = iter
                # test it on the test set
                test_losses = [
                    test_model(i)
                    for i in range(n_test_batches)
                ]
                test_score = numpy.mean(test_losses)
                print(('     epoch %i, minibatch %i/%i, test error of '
                       'best model %f %%') %
                      (epoch, minibatch_index + 1, n_train_batches,
                       test_score * 100.))
end_time = timeit.default_timer()
print('Optimization complete.')
print('Best validation score of %f %% obtained at iteration %i, '
      'with test performance %f %%' %
      (best_validation_loss * 100., best_iter + 1, test_score * 100.))

training @ iter =  0
epoch 1, minibatch 50/50, validation error 6.870000 %
     epoch 1, minibatch 50/50, test error of best model 7.060000 %
epoch 2, minibatch 50/50, validation error 6.870000 %
training @ iter =  100
epoch 3, minibatch 50/50, validation error 6.870000 %
epoch 4, minibatch 50/50, validation error 6.840000 %
     epoch 4, minibatch 50/50, test error of best model 7.000000 %
training @ iter =  200
epoch 5, minibatch 50/50, validation error 6.810000 %
     epoch 5, minibatch 50/50, test error of best model 6.990000 %
epoch 6, minibatch 50/50, validation error 6.810000 %
training @ iter =  300
epoch 7, minibatch 50/50, validation error 6.780000 %
     epoch 7, minibatch 50/50, test error of best model 6.980000 %
epoch 8, minibatch 50/50, validation error 6.750000 %
     epoch 8, minibatch 50/50, test error of best model 6.960000 %
training @ iter =  400
epoch 9, minibatch 50/50, validation error 6.750000 %
epoch 10, minibatch 50/50, validation error 6.740000 %
     epoch 

# train distribution of the hidden layer
## distribution param update:
mu_new = mu_old - lr \* mean(grad(cost,W))

sigma_sqr_new = sigma_sqr_old - lr \* (mean(grad(cost,W^2)-1/2/mu \* mean(grad(cost,W)))

In [5]:
layer0_input = x.reshape((batch_size, 784))
layer0 = HiddenLayer(rng,input=layer0_input,n_in = 784,n_out = 500)
layer1 = LogisticRegression(input = layer0.output, n_in=500, n_out=10)
cost = layer1.negative_log_likelihood(y)
learning_rate = 0.01
n_epochs = 200
test_model = theano.function(
    [index],
    layer1.errors(y),
    givens={
        x: test_set_x[index * batch_size: (index + 1) * batch_size],
        y: test_set_y[index * batch_size: (index + 1) * batch_size]
    }
)

validate_model = theano.function(
    [index],
    layer1.errors(y),
    givens={
        x: valid_set_x[index * batch_size: (index + 1) * batch_size],
        y: valid_set_y[index * batch_size: (index + 1) * batch_size]
    }
)

params = layer1.params
grads = T.grad(cost, params)
updates = [
    (param_i, param_i - learning_rate * grad_i)
    for param_i, grad_i in zip(params, grads)
]

grads_hidden_w = T.grad(cost, layer0.W) 
updates.append((layer0.mu, layer0.mu - learning_rate * T.mean(grads_hidden_w)))
# method of moments estimate of sigma_sqr:
# sigma_sqr_hat = T.mean(T.sqr(layer0.W)) - T.sqr(T.mean(layer0.W))
# compute the gradient cost wrt sigma_sqr_hat manually:

grad_sigma_sqr_hat = T.mean(grads_hidden_w/layer0.W/2)-T.mean(grads_hidden_w)/T.mean(layer0.W)/2

updates.append(( layer0.sigma_sqr, T.maximum(layer0.sigma_sqr -learning_rate * grad_sigma_sqr_hat,\
                                             layer0.sigma_sqr/2) ))

train_model = theano.function(
    [index],
    cost,
    updates=updates,
    givens={
        x: train_set_x[index * batch_size: (index + 1) * batch_size],
        y: train_set_y[index * batch_size: (index + 1) * batch_size]
    }
)

In [None]:
validation_frequency = n_train_batches
best_validation_loss = numpy.inf
best_iter = 0
test_score = 0.
start_time = timeit.default_timer()

epoch = 0
done_looping = False

while (epoch < n_epochs) and (not done_looping):
    epoch = epoch + 1
    for minibatch_index in range(n_train_batches):
        iter = (epoch - 1) * n_train_batches + minibatch_index
        cost_ij = train_model(minibatch_index)
        layer0.generate_W(rng)

        if (iter + 1) % validation_frequency == 0:

            # compute zero-one loss on validation set
            validation_losses = [validate_model(i) for i
                                 in range(n_valid_batches)]
            this_validation_loss = numpy.mean(validation_losses)
            print('epoch %i, minibatch %i/%i, validation error %f %%' %
                  (epoch, minibatch_index + 1, n_train_batches,
                   this_validation_loss * 100.))

            # if we got the best validation score until now
            if this_validation_loss < best_validation_loss:
                # save best validation score and iteration number
                best_validation_loss = this_validation_loss
                best_iter = iter
                # test it on the test set
                test_losses = [
                    test_model(i)
                    for i in range(n_test_batches)
                ]
                test_score = numpy.mean(test_losses)
                print(('     epoch %i, minibatch %i/%i, test error of '
                       'best model %f %%') %
                      (epoch, minibatch_index + 1, n_train_batches,
                       test_score * 100.))
end_time = timeit.default_timer()
print('Optimization complete.')
print('Best validation score of %f %% obtained at iteration %i, '
      'with test performance %f %%' %
      (best_validation_loss * 100., best_iter + 1, test_score * 100.))

epoch 1, minibatch 50/50, validation error 89.300000 %
     epoch 1, minibatch 50/50, test error of best model 89.740000 %
epoch 2, minibatch 50/50, validation error 87.660000 %
     epoch 2, minibatch 50/50, test error of best model 88.950000 %
epoch 3, minibatch 50/50, validation error 93.670000 %
epoch 4, minibatch 50/50, validation error 91.080000 %
epoch 5, minibatch 50/50, validation error 93.320000 %
epoch 6, minibatch 50/50, validation error 93.890000 %
epoch 7, minibatch 50/50, validation error 90.990000 %
epoch 8, minibatch 50/50, validation error 89.320000 %
epoch 9, minibatch 50/50, validation error 86.800000 %
     epoch 9, minibatch 50/50, test error of best model 88.620000 %
epoch 10, minibatch 50/50, validation error 87.690000 %
epoch 11, minibatch 50/50, validation error 90.180000 %
epoch 12, minibatch 50/50, validation error 87.330000 %
epoch 13, minibatch 50/50, validation error 88.460000 %
epoch 14, minibatch 50/50, validation error 88.820000 %
epoch 15, minibatch 5

In [12]:
a_value = numpy.asarray([2.,3.,4.])
b_value = numpy.asarray([0.0001,9,0.8])

In [13]:
a = theano.shared(a_value)
b = theano.shared(b_value)
t = a/b


In [14]:
t.eval()

array([  2.00000000e+04,   3.33333333e-01,   5.00000000e+00])

In [101]:
a.T

theano.tensor.var.TensorVariable

In [78]:
layer0.mu.get_value()

array(-0.00040656287296597475)