### 1. Import.

Note, that current version of RBM works with neon version 1.1.3


In [None]:
import sys
sys.path.append('../dbn/')


from rbm_layer import ConvolutionalRBMLayer, RBMLayerWithLabels, RBMLayer
from rbm import RBM
from rbm_optimizer import GradientDescentMomentumRBM


from neon.backends import gen_backend
from neon.optimizers import MultiOptimizer
from neon.initializers import GlorotUniform
from neon.data import DataIterator
from neon.data.datasets import fetch_dataset
from neon.data.datasets import dataset_meta
from neon.callbacks.callbacks import Callbacks


import numpy as np
from matplotlib import pyplot
import os
import gzip
import cPickle

### 2.  Function to load MNIST dataset

In [None]:
def load_mnist(path=".", normalize=True):
    """
    Fetch the MNIST dataset and load it into memory.

    Args:
        path (str, optional): Local directory in which to cache the raw
                              dataset.  Defaults to current directory.
        normalize (bool, optional): whether to scale values between 0 and 1.
                                    Defaults to True.

    Returns:
        tuple: Both training and test sets are returned.
    """
    mnist = dataset_meta['mnist']

    filepath = os.path.join(path, mnist['file'])
    if not os.path.exists(filepath):
        fetch_dataset(mnist['url'], mnist['file'], filepath, mnist['size'])

    with gzip.open(filepath, 'rb') as mnist:
        (X_train, y_train), (X_test, y_test) = cPickle.load(mnist)
        # X_train = X_train.reshape(-1, 1, 28, 28)
        # X_test = X_test.reshape(-1, 1, 28, 28)
        X_train = X_train.reshape(-1, 784)
        X_test = X_test.reshape(-1, 784)

        # X_train = X_train[:, :100]
        # X_test = X_test[:, :100]

        if normalize:
            X_train = X_train / 255.
            X_test = X_test / 255.

        return (X_train, y_train), (X_test, y_test), 10

Setup backend

In [None]:
backend='cpu'
rng_seed = 0
device_id = 0
datatype = np.float32
batch_size = 128

# setup backend
be = gen_backend(backend=backend,
                 batch_size=batch_size,
                 rng_seed=rng_seed,
                 device_id=device_id)

Load dataset.

  data_dir - is a path to the directory with MNIST dataset (or where to save the dataset if you have no one)

In [None]:
data_dir = './'
(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data_dir)

# setup a training set iterator
train_set = DataIterator(X_train, y=y_train, nclass=nclass, lshape=(1, 28, 28))
# setup a validation data set iterator
valid_set = DataIterator(X_test, y=y_test, nclass=nclass, lshape=(1, 28, 28))

### 3. Create neural network with 3 layers.
First layer is **convolutional layer** with 2 filters of shape 1 x 5 x 5 (depth x height x width)
Parameters of this layers are

* `fshape` --- tuple of shape [depth x height x width x n_filters]
* `init` --- initialization class object
* `strides` --- int or dict with keys 'str_d', 'str_h', 'str_w'
* `padding` --- int or dict with keys 'pad_d', 'pad_h', 'pad_w'
    
The second layer is **fully connected** layer with 100 hidden units.
Parameters of this layer are

* `n_hidden` --- number of hidden units
* `init` --- initialization class object. It must have fill(parameter) method which assigns initial value to the parameter.

The third layer is fully connected with additional input units - labels of input images.
Additional parameters of this layer is

* `n_classes` --- number of classes
* `use_fast_weights` --- whether to use fast weights during training.

Each layer also supports sparse training. The idea is that we would like to obtain sparse representation, i.e.
not all hidden units are activated, only some small part of them. Sparsity can be controlled by the following parameters:
    
* `sparse_target` --- target activation ratio of hidden units
* `sparse_cost` --- penalty for activation ratio not being close to the target
* `sparse_damping` --- sparsity damping parameter. Defines how much activation ratio in previous iteration affects activation ratio in current iteration.

And each layer has also the following parameters allowing to control training:

* `persistant` --- boolean, whether to use persistant Contrastive Divergence
* `kPCD` --- int, how much iterations to use to sample "fantasy particles" (in negative phase)

In [None]:
n_hidden = 100

# setup weight initialization function
init_norm = GlorotUniform()

# setiup model layers
n_filters = 2
fshape = 5
layers = [ConvolutionalRBMLayer(fshape=(1, fshape, fshape, n_filters),
                                init=init_norm, strides={'str_d': 1, 'str_h': 2, 'str_w': 2},
                                sparse_cost=0.0, sparse_damping=0.0, sparse_target=0.0,
                                name='layer0'),
          RBMLayer(n_hidden=100, init=init_norm,
                   sparse_cost=0.0, sparse_damping=0.0, sparse_target=0.0,
                   name='layer1'),
          RBMLayerWithLabels(n_hidden=50, init=init_norm, n_classes=nclass, use_fast_weights=True, name='layer2')]

### 4. Setup optimizer

For each layer its own optimizer should be created.
The only one optimization algorithm currently implemented is Gradient Descent Momentum.
Parameters are:

* `learning_rate` --- learning rate
* `momentum` --- momentum coefficient
* `wdecay` --- weight decay. It is a list of 3 numbers - weight decays for weights W and biases of hidden and visible units
* `name` --- name of optimizer

In [None]:
# setup optimizer
learning_rate = 0.1
weight_decay=[0, 0, 0]
momentum = 0

optimizer_l0 = GradientDescentMomentumRBM(0.1, 0.1, wdecay=weight_decay, name='layer0_optimizer')
optimizer_l1 = GradientDescentMomentumRBM(0.05, 0.1, wdecay=weight_decay, name='layer1_optimizer')
optimizer_l2 = GradientDescentMomentumRBM(0.1, 0, wdecay=weight_decay, name='layer2_optimizer')

After optimizers for each layer are configured, they must be combined into one optimizer using MultiOptimizer class

In [None]:
optimizer = MultiOptimizer({'layer0': optimizer_l0,
                            'layer1': optimizer_l1,
                            'layer2': optimizer_l2,
                            'default': optimizer_l0}, name='MultiLayer_optimizer')

Now let's create RBM model and configure callback. Just use standard callbacks from neon package.

In [None]:
# initialize model object
rbm = RBM(layers=layers)

# setup standard callbacks
callbacks = Callbacks(rbm, train_set, output_file='tmp_file')

So, now we are ready to fit RBM model

In [None]:
num_epochs = 1

rbm.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks)

To evaluate model output fprop method can be used

In [None]:
for mb_idx, (x_val, y_val) in enumerate(valid_set):
    hidden = rbm.fprop(x_val)
    break

Model can be saved

In [None]:
serialized_model = rbm.serialize()

import cPickle as pickle

with open('rbm.pkl', 'wb') as save_file:
    pickle.dump(serialized_model, save_file)

And loaded

In [None]:
new_rbm = RBM(layers=layers)
new_rbm.load_weights('rbm.pkl')