# convolutional autoencoder with MNIST
###  inspired by the [Swarbrick's blog](https://swarbrickjones.wordpress.com/2015/04/29/convolutional-autoencoders-in-pythontheanolasagne/) and Professor [G.E.Hinton's paper](http://science.sciencemag.org/content/313/5786/504)

In [1]:
import os, sys, urllib, gzip
sys.path.append('/home/rui/pylearn2')
from __future__ import print_function
try:
    import cPickle as pickle
except:
    import pickle
sys.setrecursionlimit(10000)

import numpy as np
from lasagne.layers import get_output, InputLayer, DenseLayer, Upscale2DLayer, ReshapeLayer
from lasagne.nonlinearities import rectify, leaky_rectify, tanh
from lasagne.updates import nesterov_momentum
from lasagne.objectives import categorical_crossentropy
import pylearn2
from lasagne.layers.cuda_convnet import Conv2DCCLayer as Conv2DLayerFast
from lasagne.regularization import regularize_layer_params, l2, l1
import theano
import theano.tensor as T
import time
import lasagne
from lasagne.layers import Conv2DLayer as Conv2DLayerSlow
from lasagne.layers import MaxPool2DLayer as MaxPool2DLayerSlow
try:
    from lasagne.layers.cuda_convnet import Conv2DCCLayer as Conv2DLayerFast
    from lasagne.layers.cuda_convnet import MaxPool2DCCLayer as MaxPool2DLayerFast
    print('Using cuda_convnet (faster)')
except ImportError:
    from lasagne.layers import Conv2DLayer as Conv2DLayerFast
    from lasagne.layers import MaxPool2DLayer as MaxPool2DLayerFast
    print('Using lasagne.layers (slower)')

Using cuda_convnet (faster)


Using gpu device 0: GeForce GTX 750 Ti (CNMeM is disabled, CuDNN 3007)
  "downsample module has been moved to the theano.tensor.signal.pool module.")


In [2]:
f = gzip.open('/home/rui/Downloads/mnist.pkl.gz', 'rb')
try:
    train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
except:
    train_set, valid_set, test_set = pickle.load(f)
f.close()
X, y = train_set
X = np.reshape(X, (-1, 1, 28, 28))
X_out = X.reshape((X.shape[0], -1))

In [3]:
conv_num_filters = 16
filter_size = 3
pool_size = 2
encode_size = 16
dense_mid_size = 128
pad_in = 'valid'    
pad_out = 'full'

In [5]:
def build_cnn(input_var=None):
    
    network = InputLayer(shape=(None,  X.shape[1], X.shape[2], X.shape[3]),input_var=input_var)
    
    network = Conv2DLayerFast(network, num_filters=conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_in)

    network = Conv2DLayerFast(network, num_filters=conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_in)

    network = MaxPool2DLayerFast(network, pool_size=pool_size)

    network = Conv2DLayerFast(network, num_filters=2*conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_in)

    network = MaxPool2DLayerFast(network, pool_size=pool_size)
    
    network = ReshapeLayer(network, shape =(([0], -1)))
    
    network = DenseLayer(network, num_units= dense_mid_size, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh)
    
    encode_layer = DenseLayer(network, name= 'encode', num_units= encode_size, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh)

    network = DenseLayer(encode_layer, num_units= dense_mid_size, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh)
    
    network = DenseLayer(network, num_units= 800, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh)
    
    network = ReshapeLayer(network, shape =(([0], 2*conv_num_filters, 5, 5)))
    
    network = Upscale2DLayer(network, scale_factor = pool_size)
    
    network = Conv2DLayerFast(network, num_filters=conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_out)

    network = Upscale2DLayer(network, scale_factor = pool_size)
    
    network = Conv2DLayerSlow(network, num_filters=conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_out)

    network = Conv2DLayerSlow(network, num_filters=1, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.sigmoid, filter_size=filter_size, pad=pad_out)
    
    network = ReshapeLayer(network, shape =(([0], -1)))

    return network, encode_layer

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]


Trainning part

In [6]:
num_epochs = 30
input_var = T.tensor4('inputs')
target_var = T.matrix('targets')
learnrate=0.2
# Create neural network model (depending on first command line parameter)
print("Building model and compiling functions...")
network, encode_layer = build_cnn(input_var)
l2_penalty = regularize_layer_params(network, l2)
l1_penalty = regularize_layer_params(network, l1)
reconstructed = lasagne.layers.get_output(network)
loss = lasagne.objectives.squared_error(reconstructed, target_var)
loss = loss.mean()
params = lasagne.layers.get_all_params(network, trainable=True)
print("Starting training...")

for epoch in range(num_epochs):
    train_err = 0
    train_batches = 0
    start_time = time.time()
    if epoch % 5 == 0:
        learnrate = learnrate * 0.8
        updates = lasagne.updates.nesterov_momentum(
            loss, params, learning_rate=learnrate, momentum=0.9)
        train_fn = theano.function([input_var, target_var], loss, updates=updates,on_unused_input='warn')
        
    for batch in iterate_minibatches(X, X_out, 500, shuffle=False):
        inputs, targets = batch
        train_err += train_fn(inputs, targets)
        train_batches += 1

        # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))



    # Optionally, you could now dump the network weights to a file like this:
np.savez('CAE_MNIST.npz', *lasagne.layers.get_all_param_values(network))


Building model and compiling functions...
Starting training...
Epoch 1 of 30 took 24.963s
  training loss:		0.086922
Epoch 2 of 30 took 13.797s
  training loss:		0.041286
Epoch 3 of 30 took 13.801s
  training loss:		0.028758
Epoch 4 of 30 took 13.826s
  training loss:		0.025219
Epoch 5 of 30 took 13.810s
  training loss:		0.023327
Epoch 6 of 30 took 17.382s
  training loss:		0.022095
Epoch 7 of 30 took 13.828s
  training loss:		0.021484
Epoch 8 of 30 took 13.839s
  training loss:		0.021024
Epoch 9 of 30 took 13.835s
  training loss:		0.020552
Epoch 10 of 30 took 13.832s
  training loss:		0.020142
Epoch 11 of 30 took 17.407s
  training loss:		0.019707
Epoch 12 of 30 took 13.848s
  training loss:		0.019458
Epoch 13 of 30 took 13.843s
  training loss:		0.019248
Epoch 14 of 30 took 13.847s
  training loss:		0.019057
Epoch 15 of 30 took 13.843s
  training loss:		0.018876
Epoch 16 of 30 took 17.495s
  training loss:		0.018654
Epoch 17 of 30 took 13.846s
  training loss:		0.018521
Epoch 18 of

In [7]:
CAE_predict = theano.function([input_var], [lasagne.layers.get_output(network),lasagne.layers.get_output(encode_layer)])
X_pred = np.zeros((50000,784))
X_encode = np.zeros((50000,encode_size))
i = 0
for batch in iterate_minibatches(X, X_out, 1000, shuffle=False):
    inputs, targets = batch
    a, b = CAE_predict(inputs)
    X_pred[1000*i:1000*(i+1)] = a
    X_encode[1000*i:1000*(i+1)] = b
    i+=1



In [8]:
np.savez('CAE_MNIST_learned_feature.npz', X_pred,X_encode)

In [9]:
sys.getsizeof(X_pred)

313600112