# Factorized Variational Autoencoder

Following the model specified in the Disney Research paper "Factorized Variational Autoencoders for
Modeling Audience Reactions to Movies"

## Testing with toy data
We begin by testing with a toy dataset taken from the Edward examples:
https://github.com/blei-lab/edward/blob/master/examples/probabilistic_matrix_factorization.py


In [15]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np

tfd = tf.contrib.distributions

N = 1000
M = 10000
D = 3
sigma = 0.1
batch_size = 100

U_true = np.random.randn(D, N)
V_true = np.random.randn(D, M)

X = np.dot(np.transpose(U_true), V_true) + np.random.normal(0, sigma, size=(N, M))
X = X.astype(np.float32)

In [20]:
def make_encoder(x, z_dim):
    x = tf.layers.dense(inputs=x,
            units=512, activation=tf.nn.relu)
    x = tf.layers.dense(inputs=x,
            units=256, activation=tf.nn.relu)
    x = tf.layers.dense(inputs=x,
            units=128, activation=tf.nn.relu)
    encoder_net = tf.layers.dense(inputs=x,
                      units = z_dim * 2,
                      activation=None)
    
    
    
    loc = encoder_net[..., :z_dim]
    scale = tf.nn.softplus(encoder_net[..., z_dim:] + 0.5)

    return tfd.MultivariateNormalDiag(loc=loc,
            scale_diag=scale,
            name='encoder_distribution')
    

def make_decoder(z, num_features, z_dim):
    
    z = tf.reshape(z, [-1, batch_size, z_dim])
    x = tf.layers.dense(inputs=latent_code,
            units=128, activation=tf.nn.relu)
    x = tf.layers.dense(inputs=x,
             units=256, activation=tf.nn.relu)
    x = tf.layers.dense(inputs=x,
            units=512, activation=tf.nn.relu)
    
    decoder_net = tf.layers.dense(inputs=x,
            units=num_features, activation=None)
    
    data_dist = tfd.MultivariateNormalDiag(loc=decoder_net,
                            name='decoder_distribution')
        
    return data_dist


def make_V_prior(z_dim):
    mvn =  tfd.MultivariateNormalDiag(scale_diag=tf.ones(z_dim),
                                    name='V_prior_distribution')
    return mvn

def make_U_prior(z_dim):
    mvn =  tfd.MultivariateNormalDiag(scale_diag=tf.ones(z_dim))
    mvln = tfd.TransformedDistribution(distribution=mvn,
                                       bijector=tfd.bijectors.Exp(),
                                       name='U_prior_distribution')
    return mvln


In [22]:
graph = tf.Graph()
with graph.as_default():
    # input pipeline
    dataset = tf.data.Dataset.from_tensor_slices(X)
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_initializable_iterator()
    data = iterator.get_next()
    
    # inference network; encoder
    with tf.variable_scope('encoder'):
        encoder = make_encoder(data, z_dim=D)
    
    # replace this with element-wise product of U and V samples
    z = encoder.sample()

    # priors
    with tf.variable_scope('priors'):
        U_prior = make_U_prior(z_dim=D)
        V_prior = make_V_prior(z_dim=D)
        
        
    # loss
    def joint_log_prob(z):
        with tf.variable_scope('decoder'):
            decoder = make_decoder(z, z_dim=D, num_features=M)
        return decoder.log_prob(data) + U_prior.log_prob() + V_prior.log_prob(z)
    
    loss = tf.reduce_sum(
            tfp.vi.monte_carlo_csiszar_f_divergence(
                f=tfp.vi.kl_reverse,
                p_log_prob=joint_log_prob,
                q=encoder,
                num_draws=1))
    
    
    # optimizer
    optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
