In [1]:
import tensorflow as tf
print(tf.__version__)

1.12.0


In [2]:
from omni_anomaly.vae import Lambda, VAE
import tfsnippet as spt
from tensorflow.python.ops.linalg.linear_operator_identity import LinearOperatorIdentity
from tensorflow_probability.python.distributions import LinearGaussianStateSpaceModel, MultivariateNormalDiag
from tfsnippet.distributions import Normal
from tfsnippet.utils import VarScopeObject, reopen_variable_scope
from tfsnippet.variational import VariationalInference

from omni_anomaly.recurrent_distribution import RecurrentDistribution
from omni_anomaly.vae import Lambda, VAE
from omni_anomaly.wrapper import TfpDistribution, softplus_std, rnn,  wrap_params_net

from functools import partial
from omni_anomaly.training import Trainer
from tfsnippet.variational import VariationalInference

class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self
        
config = {
    "z_dim": 3,
    "x_dim": 55, 
    "rnn_cell": 'GRU',
    "rnn_num_hidden": 500,
    "window_length": 100,
    "dense_dim": 500,
    "posterior_flow_type": 'nf',  # 'nf' or None,
    "nf_layers": 20,  # number of flow layers
    "max_epoch": 10,
    "train_start": 0,
    "max_train_size": None , # `None` means full train set,
    "batch_size": 50,
    "l2_reg": 0.0001,
    "initial_lr": 0.001,
    "lr_anneal_factor": 0.5,
    "lr_anneal_epoch_freq": 40,
    "lr_anneal_step_freq": None,
    "std_epsilon": 1e-4,
    
    # the paper indicate the benifit of using GRU, LSGMM and posterior flow
    # here user these params to turn on such features
    "use_connected_z_q": True,
    "use_connected_z_p": True

}

config = AttrDict(config)

In [3]:
# rebuild graph by extracting code

tf.get_default_graph()

vae = VAE(
                p_z=TfpDistribution(
                    LinearGaussianStateSpaceModel(
                        num_timesteps=config.window_length,
                        transition_matrix=LinearOperatorIdentity(config.z_dim),
                        transition_noise=MultivariateNormalDiag(
                            scale_diag=tf.ones([config.z_dim])),
                        observation_matrix=LinearOperatorIdentity(config.z_dim),
                        observation_noise=MultivariateNormalDiag(
                            scale_diag=tf.ones([config.z_dim])),
                        initial_state_prior=MultivariateNormalDiag(
                            scale_diag=tf.ones([config.z_dim]))
                    )
                ) if config.use_connected_z_p else Normal(mean=tf.zeros([config.z_dim]), std=tf.ones([config.z_dim])),
                p_x_given_z=Normal,
                q_z_given_x=partial(RecurrentDistribution,
                                    mean_q_mlp=partial(tf.layers.dense, units=config.z_dim, name='z_mean', reuse=tf.AUTO_REUSE),
                                    std_q_mlp=partial(softplus_std, units=config.z_dim, epsilon=config.std_epsilon,
                                                      name='z_std'),
                                    z_dim=config.z_dim, window_length=config.window_length) if config.use_connected_z_q else Normal,
                h_for_p_x=Lambda(
                    partial(
                        wrap_params_net,
                        h_for_dist=lambda x: rnn(x=x,
                                                 window_length=config.window_length,
                                                 rnn_num_hidden=config.rnn_num_hidden,
                                                 hidden_dense=2,
                                                 dense_dim=config.dense_dim,
                                                 name='rnn_p_x'),
                        mean_layer=partial(
                            tf.layers.dense, units=config.x_dim, name='x_mean', reuse=tf.AUTO_REUSE
                        ),
                        std_layer=partial(
                            softplus_std, units=config.x_dim, epsilon=config.std_epsilon,
                            name='x_std'
                        )
                    ),
                    name='p_x_given_z'
                ),
                h_for_q_z=Lambda(
                    lambda x: {'input_q': rnn(x=x,
                                              window_length=config.window_length,
                                              rnn_num_hidden=config.rnn_num_hidden,
                                              hidden_dense=2,
                                              dense_dim=config.dense_dim,
                                              name="rnn_q_z")},
                    name='q_z_given_x'
                ) if config.use_connected_z_q else Lambda(
                    partial(
                        wrap_params_net,
                        h_for_dist=lambda x: rnn(x=x,
                                                 window_length=config.window_length,
                                                 rnn_num_hidden=config.rnn_num_hidden,
                                                 hidden_dense=2,
                                                 dense_dim=config.dense_dim,
                                                 name="rnn_q_z"),
                        mean_layer=partial(
                            tf.layers.dense, units=config.z_dim, name='z_mean', reuse=tf.AUTO_REUSE
                        ),
                        std_layer=partial(
                            softplus_std, units=config.z_dim, epsilon=config.std_epsilon,
                            name='z_std'
                        )
                    ),
                    name='q_z_given_x'
                )
            )
_posterior_flow = spt.layers.planar_normalizing_flows(config.nf_layers, name='posterior_flow')

# input placeholders
input_x = tf.placeholder(dtype=tf.float32, shape=[None, config.window_length, config.x_dim], name='input_x')
learning_rate = tf.placeholder(dtype=tf.float32, shape=(), name='learning_rate')

# get training_loss
with tf.name_scope('training_loss'):
    chain = vae.chain(input_x, n_z=None, posterior_flow=_posterior_flow)
    x_log_prob = chain.model['x'].log_prob(group_ndims=0)
    log_joint = tf.reduce_sum(x_log_prob, -1)
    chain.vi.training.sgvb()
    vi = VariationalInference(
        log_joint=log_joint,
        latent_log_probs=chain.vi.latent_log_probs,
        axis=chain.vi.axis
    )
    loss = tf.reduce_mean(vi.training.sgvb())


In [4]:
# plot graph

output_dir = "/data/zhiyang.wang/logs/OmniAnomaly/6"

writer = tf.summary.FileWriter(output_dir, tf.get_default_graph())


In [5]:
# get variables
tf.get_collection("trainable_variables")

[<tf.Variable 'q_z_given_x/rnn_q_z/rnn/gru_cell/gates/kernel:0' shape=(555, 1000) dtype=float32_ref>,
 <tf.Variable 'q_z_given_x/rnn_q_z/rnn/gru_cell/gates/bias:0' shape=(1000,) dtype=float32_ref>,
 <tf.Variable 'q_z_given_x/rnn_q_z/rnn/gru_cell/candidate/kernel:0' shape=(555, 500) dtype=float32_ref>,
 <tf.Variable 'q_z_given_x/rnn_q_z/rnn/gru_cell/candidate/bias:0' shape=(500,) dtype=float32_ref>,
 <tf.Variable 'q_z_given_x/rnn_q_z/dense/kernel:0' shape=(500, 500) dtype=float32_ref>,
 <tf.Variable 'q_z_given_x/rnn_q_z/dense/bias:0' shape=(500,) dtype=float32_ref>,
 <tf.Variable 'q_z_given_x/rnn_q_z/dense_1/kernel:0' shape=(500, 500) dtype=float32_ref>,
 <tf.Variable 'q_z_given_x/rnn_q_z/dense_1/bias:0' shape=(500,) dtype=float32_ref>,
 <tf.Variable 'vae/variational/z_mean/kernel:0' shape=(503, 3) dtype=float32_ref>,
 <tf.Variable 'vae/variational/z_mean/bias:0' shape=(3,) dtype=float32_ref>,
 <tf.Variable 'vae/variational/z_std/kernel:0' shape=(503, 3) dtype=float32_ref>,
 <tf.Variabl