# convolutional autoencoder with MNIST
###  inspired by the [Swarbrick's blog](https://swarbrickjones.wordpress.com/2015/04/29/convolutional-autoencoders-in-pythontheanolasagne/) and Professor [G.E.Hinton's paper](http://science.sciencemag.org/content/313/5786/504)

In [1]:
import os, sys, urllib, gzip
sys.path.append('/home/rui/pylearn2')
from __future__ import print_function
try:
    import cPickle as pickle
except:
    import pickle
sys.setrecursionlimit(10000)

import numpy as np
from lasagne.layers import get_output, InputLayer, DenseLayer, Upscale2DLayer, ReshapeLayer
from lasagne.nonlinearities import rectify, leaky_rectify, tanh
from lasagne.updates import nesterov_momentum
from lasagne.objectives import categorical_crossentropy
import pylearn2
from lasagne.layers.cuda_convnet import Conv2DCCLayer as Conv2DLayerFast
from lasagne.regularization import regularize_layer_params, l2, l1
import theano
import theano.tensor as T
import time
import lasagne
from lasagne.layers import Conv2DLayer as Conv2DLayerSlow
from lasagne.layers import MaxPool2DLayer as MaxPool2DLayerSlow
try:
    from lasagne.layers.cuda_convnet import Conv2DCCLayer as Conv2DLayerFast
    from lasagne.layers.cuda_convnet import MaxPool2DCCLayer as MaxPool2DLayerFast
    print('Using cuda_convnet (faster)')
except ImportError:
    from lasagne.layers import Conv2DLayer as Conv2DLayerFast
    from lasagne.layers import MaxPool2DLayer as MaxPool2DLayerFast
    print('Using lasagne.layers (slower)')

Using cuda_convnet (faster)


Using gpu device 0: GeForce GTX 750 Ti (CNMeM is disabled, CuDNN 3007)
  "downsample module has been moved to the theano.tensor.signal.pool module.")


In [2]:
f = gzip.open('/home/rui/Downloads/mnist.pkl.gz', 'rb')
try:
    train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
except:
    train_set, valid_set, test_set = pickle.load(f)
f.close()
X, y = train_set
X = np.reshape(X, (-1, 1, 28, 28))
X_out = X.reshape((X.shape[0], -1))

In [3]:
conv_num_filters = 16
filter_size = 3
pool_size = 2
encode_size = 16
dense_mid_size = 128
pad_in = 'valid'    
pad_out = 'full'

In [4]:
def build_cnn(input_var=None):
    
    network = InputLayer(shape=(None,  X.shape[1], X.shape[2], X.shape[3]),input_var=input_var)
    
    network = Conv2DLayerFast(network, num_filters=conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_in)

    network = Conv2DLayerFast(network, num_filters=conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_in)

    network = MaxPool2DLayerFast(network, pool_size=pool_size)

    network = Conv2DLayerFast(network, num_filters=2*conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_in)

    network = MaxPool2DLayerFast(network, pool_size=pool_size)
    
    network = ReshapeLayer(network, shape =(([0], -1)))
    
    network = DenseLayer(network, num_units= dense_mid_size, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh)
    
    encode_layer = DenseLayer(network, name= 'encode', num_units= encode_size, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh)
    
    action_layer = DenseLayer(encode_layer, name= 'action', num_units= encode_size, W=lasagne.init.Orthogonal(1.0),\
                            nonlinearity=lasagne.nonlinearities.tanh)
    
    network = DenseLayer(action_layer, num_units= 800, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh)
    
    network = ReshapeLayer(network, shape =(([0], 2*conv_num_filters, 5, 5)))
    
    network = Upscale2DLayer(network, scale_factor = pool_size)
    
    network = Conv2DLayerFast(network, num_filters=conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_out)

    network = Upscale2DLayer(network, scale_factor = pool_size)
    
    network = Conv2DLayerFast(network, num_filters=conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_out)

    network = Conv2DLayerSlow(network, num_filters=1, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.sigmoid, filter_size=filter_size, pad=pad_out)
    
    network = ReshapeLayer(network, shape =(([0], -1)))

    return network

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]


In [5]:
input_var = T.tensor4('inputs')
target_var = T.matrix('targets')
learnrate=0.01
# Create neural network model (depending on first command line parameter)
network = build_cnn(input_var)

with np.load('CAE_MNIST2.npz') as f:
    param_values = [f['arr_%d' % i] for i in range(len(f.files))]
lasagne.layers.set_all_param_values(network, param_values)

reconstructed = lasagne.layers.get_output(network)
loss = lasagne.objectives.squared_error(reconstructed, target_var)
loss = loss.mean()
params = lasagne.layers.get_all_params(network, trainable=True)

Building model and compiling functions...


Trainning part

In [12]:
# num_epochs = 30
# input_var = T.tensor4('inputs')
# target_var = T.matrix('targets')
# learnrate=0.01
# # Create neural network model (depending on first command line parameter)
# print("Building model and compiling functions...")
# network, encode_layer = build_cnn(input_var)
# l2_penalty = regularize_layer_params(network, l2)
# l1_penalty = regularize_layer_params(network, l1)
# reconstructed = lasagne.layers.get_output(network)
# loss = lasagne.objectives.squared_error(reconstructed, target_var)
# loss = loss.mean()
# params = lasagne.layers.get_all_params(network, trainable=True)
# updates = lasagne.updates.nesterov_momentum(
#     loss, params, learning_rate=learnrate, momentum=0.975)
# train_fn = theano.function([input_var, target_var], loss, updates=updates,on_unused_input='warn')
# print("Starting training...")

# for epoch in range(num_epochs):
#     train_err = 0
#     train_batches = 0
#     start_time = time.time()
#     for batch in iterate_minibatches(X, X_out, 500, shuffle=False):
#         inputs, targets = batch
#         train_err += train_fn(inputs, targets)
#         train_batches += 1

#         # Then we print the results for this epoch:
#     print("Epoch {} of {} took {:.3f}s".format(
#         epoch + 1, num_epochs, time.time() - start_time))
#     print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
#     # Optionally, you could now dump the network weights to a file like this:
# np.savez('CAE_MNIST2.npz', *lasagne.layers.get_all_param_values(network))


Building model and compiling functions...
Starting training...
Epoch 1 of 30 took 15.382s
  training loss:		0.128702
Epoch 2 of 30 took 15.357s
  training loss:		0.101073
Epoch 3 of 30 took 15.367s
  training loss:		0.091356
Epoch 4 of 30 took 15.369s
  training loss:		0.071927
Epoch 5 of 30 took 15.387s
  training loss:		0.061472
Epoch 6 of 30 took 15.395s
  training loss:		0.049544
Epoch 7 of 30 took 15.378s
  training loss:		0.039866
Epoch 8 of 30 took 15.408s
  training loss:		0.034431
Epoch 9 of 30 took 15.377s
  training loss:		0.031566
Epoch 10 of 30 took 15.383s
  training loss:		0.029721
Epoch 11 of 30 took 15.384s
  training loss:		0.028369
Epoch 12 of 30 took 15.538s
  training loss:		0.027291
Epoch 13 of 30 took 15.483s
  training loss:		0.026373
Epoch 14 of 30 took 15.508s
  training loss:		0.025548
Epoch 15 of 30 took 15.545s
  training loss:		0.024828
Epoch 16 of 30 took 15.418s
  training loss:		0.024241
Epoch 17 of 30 took 15.876s
  training loss:		0.023776
Epoch 18 of

In [19]:
def train_model(num_epochs=10, learnrate=0.01):
    input_var = T.tensor4('inputs')
    target_var = T.matrix('targets')
    learnrate=0.01
    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    reconstructed = lasagne.layers.get_output(network)
    loss = lasagne.objectives.squared_error(reconstructed, target_var)
    loss = loss.mean()
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
        loss, params, learning_rate=learnrate, momentum=0.975)
    train_fn = theano.function([input_var, target_var], loss, updates=updates,on_unused_input='warn')
    print("Starting training...")

    for epoch in range(num_epochs):
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(X, X_out, 500, shuffle=False):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

            # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        # Optionally, you could now dump the network weights to a file like this:
    np.savez('CAE_MNIST2.npz', *lasagne.layers.get_all_param_values(network))

In [6]:
all_layers = lasagne.layers.get_all_layers(network)

In [7]:
action_layer = all_layers[9]
action_layer.name

'action'

In [8]:
original_W = action_layer.W.get_value()
original_b = action_layer.b.get_value()


In [9]:
original_b.shape

(16,)

In [10]:
action1_W = np.random.randn(original_W.shape[0],original_W.shape[1]).astype(np.float32)
action1_b = np.random.randn(original_b.shape[0]).astype(np.float32)
action2_W = np.random.randn(original_W.shape[0],original_W.shape[1]).astype(np.float32)
action2_b = np.random.randn(original_b.shape[0]).astype(np.float32)
action3_W = np.random.randn(original_W.shape[0],original_W.shape[1]).astype(np.float32)
action3_b = np.random.randn(original_b.shape[0]).astype(np.float32)
action4_W = np.random.randn(original_W.shape[0],original_W.shape[1]).astype(np.float32)
action4_b = np.random.randn(original_b.shape[0]).astype(np.float32)

In [11]:
action_layer.W.set_value(action1_W)
action_layer.b.set_value(action1_b)

In [23]:
params[10:12]

[action.W, action.b]

In [22]:
num_epochs = 10
updates = lasagne.updates.nesterov_momentum(
    loss, params[10:12], learning_rate=learnrate, momentum=0.975)
# updates = lasagne.updates.rmsprop(
#     loss, params[10:12], learning_rate=0.01)
train_fn = theano.function([input_var, target_var], loss, updates=updates,on_unused_input='warn')
print("Starting training...")
for epoch in range(num_epochs):
    train_err = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X, X_out, 500, shuffle=False):
        inputs, targets = batch
        train_err += train_fn(inputs, targets)
        train_batches += 1

        # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))



Starting training...
Epoch 1 of 10 took 15.385s
  training loss:		0.027733
Epoch 2 of 10 took 15.375s
  training loss:		0.022417
Epoch 3 of 10 took 15.370s
  training loss:		0.021625
Epoch 4 of 10 took 15.409s
  training loss:		0.021240
Epoch 5 of 10 took 15.410s
  training loss:		0.020966
Epoch 6 of 10 took 15.384s
  training loss:		0.020743
Epoch 7 of 10 took 15.374s
  training loss:		0.020559
Epoch 8 of 10 took 15.403s
  training loss:		0.020415
Epoch 9 of 10 took 15.463s
  training loss:		0.020268
Epoch 10 of 10 took 15.538s
  training loss:		0.020132


In [24]:
action1_W  = action_layer.W.get_value()
action1_b = action_layer.b.get_value()

In [25]:
action_layer.W.set_value(original_W)
action_layer.b.set_value(original_b)

In [26]:
num_epochs = 1
updates = lasagne.updates.nesterov_momentum(
    loss, params[10:12], learning_rate=learnrate, momentum=0.975)
train_fn = theano.function([input_var, target_var], loss, updates=updates,on_unused_input='warn')
print("Starting training...")
for epoch in range(num_epochs):
    train_err = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X, X_out, 500, shuffle=False):
        inputs, targets = batch
        train_err += train_fn(inputs, targets)
        train_batches += 1

        # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))



Starting training...
Epoch 1 of 1 took 9.679s
  training loss:		0.029062


In [39]:
type(params[0])

theano.sandbox.cuda.var.CudaNdarraySharedVariable

In [20]:
all_param = lasagne.layers.get_all_param_values(network)

In [14]:
len(all_param)

18

In [16]:
def build_encode_net(input_var=None):
    
    network = InputLayer(shape=(None,  X.shape[1], X.shape[2], X.shape[3]),input_var=input_var)
    
    network = Conv2DLayerFast(network, num_filters=conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_in)

    network = Conv2DLayerFast(network, num_filters=conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_in)

    network = MaxPool2DLayerFast(network, pool_size=pool_size)

    network = Conv2DLayerFast(network, num_filters=2*conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_in)

    network = MaxPool2DLayerFast(network, pool_size=pool_size)
    
    network = ReshapeLayer(network, shape =(([0], -1)))
    
    network = DenseLayer(network, num_units= dense_mid_size, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh)
    
    encode_layer = DenseLayer(network, name= 'encode', num_units= encode_size, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh)
    return encode_layer

In [26]:
encode_network = build_encode_net(input_var)
lasagne.layers.set_all_param_values(encode_network, all_param[0:10])

In [19]:
def build_decode_net(input_var=None):
    
    network = InputLayer(shape=(None, encode_size),input_var=input_var)
    
    network = DenseLayer(network, num_units= 800, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh)
    
    network = ReshapeLayer(network, shape =(([0], 2*conv_num_filters, 5, 5)))
    
    network = Upscale2DLayer(network, scale_factor = pool_size)
    
    network = Conv2DLayerFast(network, num_filters=conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_out)

    network = Upscale2DLayer(network, scale_factor = pool_size)
    
    network = Conv2DLayerSlow(network, num_filters=conv_num_filters, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.tanh, filter_size=filter_size, pad=pad_out)

    network = Conv2DLayerSlow(network, num_filters=1, W=lasagne.init.Orthogonal(1.0),\
                              nonlinearity=lasagne.nonlinearities.sigmoid, filter_size=filter_size, pad=pad_out)
    
    network = ReshapeLayer(network, shape =(([0], -1)))

    return network

In [54]:
decode_input_var = T.matrix('inputs')
decode_network = build_decode_net(decode_input_var)
lasagne.layers.set_all_param_values(decode_network, all_param[10:])

In [32]:
temp = CAE_encode(inputs)

In [52]:
X_encode = X_encode.astype(np.float32)

In [39]:
CAE_encode = theano.function([input_var], [lasagne.layers.get_output(encode_network)])
X_pred = np.zeros((50000,784))
X_encode = np.zeros((50000,encode_size))
i = 0
for batch in iterate_minibatches(X, X_out, 1000, shuffle=False):
    inputs, targets = batch
    X_encode[1000*i:1000*(i+1)] = CAE_encode(inputs)
    i+=1


In [55]:
CAE_decode = theano.function([decode_input_var], [lasagne.layers.get_output(decode_network)])
i = 0
for batch in iterate_minibatches(X_encode, X_out, 1000, shuffle=False):
    inputs, targets = batch
    X_pred[1000*i:1000*(i+1)] = CAE_decode(inputs)[0]
    i+=1

In [56]:
np.savez('CAE_MNIST_learned_feature2.npz', X_pred,X_encode)

In [9]:
sys.getsizeof(X_pred)

313600112