# Factorized Variational Autoencoder

Following the model specified in the Disney Research paper "Factorized Variational Autoencoders for
Modeling Audience Reactions to Movies"

## Testing with toy data
We begin by testing with a toy dataset taken from the Edward examples:
https://github.com/blei-lab/edward/blob/master/examples/probabilistic_matrix_factorization.py


In [6]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import datetime
from sklearn.model_selection import train_test_split

tfd = tf.contrib.distributions

N = 1000
M = 10000
D = 3
sigma = 0.1
batch_size = 100
epochs = 10

U_true = np.random.randn(D, N)
V_true = np.random.randn(D, M)

X = np.dot(np.transpose(U_true), V_true) + np.random.normal(0, sigma, size=(N, M))
X = X.astype(np.float32)


In [50]:
def make_encoder(data, z_dim, batch_size, num_features):
    data = tf.reshape(data, [batch_size, num_features])

    # sample latent variables
    x = tf.layers.dense(inputs=data,
            units=512, activation=tf.nn.relu)
    x = tf.layers.dense(inputs=x,
            units=256, activation=tf.nn.relu)
    x = tf.layers.dense(inputs=x,
            units=128, activation=tf.nn.relu)
    u_net = tf.layers.dense(inputs=x,
                      units = z_dim * 2,
                      activation=None)
    u_loc = u_net[..., :z_dim]
    u_scale = u_net[..., z_dim:]
    u = tfd.MultivariateNormalDiag(u_loc, scale_diag=u_scale, name='sample_latent_U')
    
    # observation latent variables
    x_t = tf.transpose(data)
    x_t = tf.layers.dense(inputs=x_t,
            units=64, activation=tf.nn.relu)
    x_t = tf.layers.dense(inputs=x_t,
            units=32, activation=tf.nn.relu)
    x_t = tf.layers.dense(inputs=x_t,
            units=16, activation=tf.nn.relu)
    v_net = tf.layers.dense(inputs=x_t,
                      units = z_dim * 2,
                      activation=None)
    v_loc = v_net[..., z_dim:]    
    v_scale = v_net[..., :z_dim]
    
    v = tfd.MultivariateNormalDiag(v_loc, scale_diag=v_scale, name='observation_latent_V')

    print(u.sample())
    print(v.sample())
    
    # factorized latent variables
    z = tfd.MultivariateNormalDiag(loc=tf.multiply(u.sample(), v.sample()),
            scale_diag=tf.ones(z_dim),
            name='approximate_posterior_q')
 
    return u, v, z


def make_decoder(z, num_features, z_dim):
    x = tf.layers.dense(inputs=z,
            units=128, activation=tf.nn.relu)
    x = tf.layers.dense(inputs=x,
             units=256, activation=tf.nn.relu)
    x = tf.layers.dense(inputs=x,
            units=512, activation=tf.nn.relu)
    
    decoder_net = tf.layers.dense(inputs=x,
            units=num_features, activation=None)
    
    data_dist = tfd.MultivariateNormalDiag(loc=decoder_net,
                            name='posterior_p')
        
    return data_dist


def make_prior(z_dim):
    u_prior =  tfd.MultivariateNormalDiag(scale_diag=tf.ones(z_dim),
                                    name='U')
    v_prior = tfd.MultivariateNormalDiag(scale_diag=tf.ones(z_dim),
                                    name='V')
        
    return u_prior, v_prior


def prior_prob(u_prior, v_prior):
    loc = tf.multiply(u_prior.sample(), v_prior.sample())
    
    return tfd.MultivariateNormalDiag(loc=loc)


In [51]:
graph = tf.Graph()
with graph.as_default():
    # input pipeline
    dataset = tf.data.Dataset.from_tensor_slices(X)
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_initializable_iterator()
    data = iterator.get_next()
    
    with tf.variable_scope('priors'):
        U_prior, V_prior = make_prior(z_dim=D)
        
    # inference network; encoder
    with tf.variable_scope('encoder'):
        u, v, encoder_q = make_encoder(data, z_dim=D, batch_size=batch_size,
                              num_features=M)
    
    z = encoder_q.sample()
    u_hat = u.mean()
    v_hat = v.mean()

    # generative network; decoder
    with tf.variable_scope('decoder'):
        decoder_p = make_decoder(z, z_dim=D, num_features=M)
    
    # prior
    with tf.variable_scope('prior'):
        u_prior, v_prior = make_prior(z_dim=D)

    # loss
    log_p_v = v.log_prob(v.mean())
    log_p_u = u.log_prob(u.mean())
    likelihood = decoder_p.log_prob(data)
    kl_divergence = tfd.kl_divergence(encoder_q, prior_prob(u_prior, v_prior))
    loss = tf.reduce_mean(kl_divergence - likelihood + log_p_u + log_p_v)
    tf.summary.scalar('loss', loss)

    # optimizer
    optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)

    merged = tf.summary.merge_all()
    

Tensor("encoder/sample_latent_U/sample/affine_linear_operator/forward/add:0", shape=(100, 3), dtype=float32)
Tensor("encoder/observation_latent_V/sample/affine_linear_operator/forward/add:0", shape=(10000, 3), dtype=float32)


ValueError: Dimensions must be equal, but are 100 and 10000 for 'encoder/Mul' (op: 'Mul') with input shapes: [100,3], [10000,3].

Estimate parameters of the model:

In [17]:
# tensorboard
import datetime
run = 'run-{date:%d.%m.%Y_%H:%M:%S}'.format( date=datetime.datetime.now() )
tb_writer = tf.summary.FileWriter('/logs/' + run, graph=graph)

# training
with tf.Session(graph=graph) as sess:
    sess.run(tf.global_variables_initializer())
    batch_counter = 0
    for epoch in range(epochs):
        sess.run(iterator.initializer)

        while True:
            try:
                _, epoch_z, summary = sess.run([optimizer, z, merged])
                tb_writer.add_summary(summary, batch_counter)
                batch_counter += 1
            except tf.errors.OutOfRangeError:
                break

Check to see how well we did: