In [1]:
import numpy as np
import tensorflow as tf

http://kvfrans.com/variational-autoencoders-explained/

### implementation notes

* for last_dim, next_dim in zip(encode_dim, encode_dim[1:]): is very nice for iteration to get two terms at same time

  \[last_dim,next_dim  for last_dim, next_dim in zip(reversed(encode_dim), reversed(encode_dim[:-1]))\]
  
  

* Argue: should the encoder and decoder share bias term?
    - no, since they have different dimension
    - yes, since $f^{-1}$ should be exactly same with $f$

In [2]:
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
num_steps = 30000

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [4]:
encode_dim = [784,512]
input_dim = encode_dim[0]
latent_dim = 2
num = 64
learning_rate = 0.001
def init(shape,rtype='uniform'):
    if rtype == 'normal':
        return tf.random_normal(shape=shape, stddev=1. / tf.sqrt(shape[0] / 2.))
    if rtype == 'uniform':
        if len(shape) == 2:
            init_max_value = np.sqrt(6. / (shape[0] + shape[1]))
            return tf.random_uniform(shape,np.negative(init_max_value),init_max_value)
        else:
            init_max_value = np.sqrt(6. / (shape[0]))
            return tf.random_uniform(shape,np.negative(init_max_value),init_max_value)
with tf.Graph().as_default():
    with tf.Session() as sess:
        input_x = tf.placeholder(dtype=tf.float32,shape=[num,input_dim])
        weight_list = []
        encode_bias_list = []
        decode_bias_list = []
        # init encoding weight,bias
        for last_dim, next_dim in zip(encode_dim, encode_dim[1:]):
            weight_list.append(tf.Variable(init([last_dim,next_dim])))
            encode_bias_list.append(tf.Variable(init([next_dim])))
        # init decoding bias
        for last_dim, next_dim in zip(reversed(encode_dim), reversed(encode_dim[:-1])):
            decode_bias_list.append(tf.Variable(init([next_dim])))
        hidden_mean_weight = tf.Variable(init([encode_dim[-1],latent_dim]))
        hidden_mean_bias = tf.Variable(init([latent_dim]))
        hidden_variance_weight = tf.Variable(init([encode_dim[-1],latent_dim]))
        hidden_variance_bias = tf.Variable(init([latent_dim]))
        decode_weight = tf.Variable(init([latent_dim,encode_dim[-1]]))
        decode_bias = tf.Variable(init([encode_dim[-1]]))
        # Building Graph
        # encoder
        hidden_layer = input_x
        for weight,bias in zip(weight_list,encode_bias_list):
            hidden_layer = tf.nn.sigmoid(tf.matmul(hidden_layer,weight) + bias)
        # representation trick
        mean = tf.matmul(hidden_layer, hidden_mean_weight) + hidden_mean_bias
        variance = tf.matmul(hidden_layer, hidden_variance_weight) + hidden_variance_bias
        # sample from N(0,1)
        samples = tf.random_normal(tf.shape(variance),dtype=tf.float32,mean=0.0,stddev=1.0,name='samples')
        hidden_layer = mean + tf.exp(variance / 2) * samples
        hidden_layer = tf.nn.sigmoid(tf.matmul(hidden_layer,decode_weight) + decode_bias)
        # decoder
        for weight,bias in zip(reversed(weight_list),decode_bias_list):
            hidden_layer = tf.nn.sigmoid(tf.matmul(hidden_layer, tf.transpose(weight)) + bias)
        recon = hidden_layer
        # compute loss
        ## reconstruction loss
        recon_loss = (input_x * tf.log(1e-9 + recon)) + ((1- input_x) * tf.log(1e-9 + 1 - recon))
        recon_loss = - tf.reduce_sum( recon_loss, 1)
        ## close to gaussian loss
        gaussian_loss = -0.5 * tf.reduce_sum((1 + variance - tf.square(mean) - tf.exp(variance)),1)
        loss = tf.reduce_mean(recon_loss + gaussian_loss)
        
        optimizer = tf.train.RMSPropOptimizer(learning_rate = learning_rate)
        train_op = optimizer.minimize(loss)
        
        init = tf.global_variables_initializer()
        
        sess.run(init)
        # Training
        for i in range(1, num_steps+1):
            # Prepare Data
            # Get the next batch of MNIST data (only images are needed, not labels)
            batch_x, _ = mnist.train.next_batch(num)

            # Train
            feed_dict = {input_x: batch_x}
            _, l = sess.run([train_op, loss], feed_dict=feed_dict)
            if i % 1000 == 0 or i == 1:
                print('Step %i, Loss: %f' % (i, l))

Step 1, Loss: 567.372131
Step 1000, Loss: 171.603958
Step 2000, Loss: 186.927261
Step 3000, Loss: 178.683792
Step 4000, Loss: 164.049332
Step 5000, Loss: 173.377045
Step 6000, Loss: 167.649780
Step 7000, Loss: 169.515900
Step 8000, Loss: 163.574280
Step 9000, Loss: 170.399658
Step 10000, Loss: 169.687378
Step 11000, Loss: 153.051865
Step 12000, Loss: 160.581680
Step 13000, Loss: 147.483719
Step 14000, Loss: 162.355103
Step 15000, Loss: 165.988983
Step 16000, Loss: 152.213760
Step 17000, Loss: 161.139526
Step 18000, Loss: 153.681381
Step 19000, Loss: 151.893921
Step 20000, Loss: 156.812286
Step 21000, Loss: 153.698975
Step 22000, Loss: 150.289948
Step 23000, Loss: 150.592926
Step 24000, Loss: 155.173920
Step 25000, Loss: 162.995850
Step 26000, Loss: 160.207031
Step 27000, Loss: 160.512283
Step 28000, Loss: 160.839035
Step 29000, Loss: 162.958496
Step 30000, Loss: 156.991272


In [None]:
encode_dim = [784,400,200]
for last_dim, next_dim in zip(encode_dim, encode_dim[1:]):
    print last_dim,next_dim
print "-----------------------"
for last_dim, next_dim in zip(reversed(encode_dim), reversed(encode_dim[:-1])):
    print last_dim,next_dim