In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected
from tensorflow.examples.tutorials.mnist import input_data

In [5]:
-np.exp(2)

-7.3890560989306504

In [20]:
np.log(0.35)

-1.0498221244986778

In [22]:
10e-4

0.001

In [2]:
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# Network architecture
n_inputs = 28*28
n_hidden1 = 400
n_hidden2 = 400
n_outputs = 10

In [4]:
def neurons_bayes(X, n_neurons, name, mu, rho, noise, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        W = tf.Variable(mu + tf.log(1 + tf.exp(rho))*noise,
                       name="weights")
        b = tf.Variable(tf.zeros([n_neurons]), name="biases")
        z = tf.matmul(X,W) + b
        if activation == "relu":
            return tf.nn.relu(z)
        else:
            return z

In [5]:
tf.reset_default_graph()

# variational posterior parameters
mu1 = tf.Variable(tf.random_uniform([n_inputs, n_hidden1]), name="mu1")
mu2 = tf.Variable(tf.random_uniform([n_hidden1, n_hidden2]), name="mu2")
mu3 = tf.Variable(tf.random_uniform([n_hidden2, n_outputs]), name="mu3")
rho1 = tf.Variable(tf.random_uniform([n_inputs, n_hidden1], 0, 1.0), name="rho1")
rho2 = tf.Variable(tf.random_uniform([n_hidden1, n_hidden2], 0, 1.0), name="rho1")
rho3 = tf.Variable(tf.random_uniform([n_hidden2, n_outputs], 0, 1.0), name="rho1")

noise1 = tf.random_normal((n_inputs, n_hidden1), dtype=tf.float32)
noise2 = tf.random_normal((n_hidden1, n_hidden2), dtype=tf.float32)
noise3 = tf.random_normal((n_hidden2, n_outputs), dtype=tf.float32)

# defining layers
X = tf.placeholder(tf.float32, shape = [None, n_inputs])
y = tf.placeholder(tf.int64, shape = (None))
hidden1 = neurons_bayes(X, n_hidden1, "hidden1", mu1, rho1, noise1, "relu") 
hidden2 = neurons_bayes(hidden1, n_hidden2, "hidden2", mu2, rho2, noise2, "relu")
logits = neurons_bayes(hidden2, n_outputs, "outputs", mu3, rho3, noise3)

In [6]:
# defining loss function
eps = 1e-10
weights = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) \
           if "weights" in v.name]
with tf.name_scope("loss"):
    log_xentropy = tf.reduce_mean(tf.log(eps + tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                 logits=logits)))
    
    log_prior_weights = [-v**2/(2*0.05) - tf.log(eps + tf.sqrt(2*np.pi*0.05)) for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) \
           if "weights" in v.name]
    log_prior = (tf.reduce_sum(log_prior_weights[0]) + tf.reduce_sum(log_prior_weights[1]) 
                + tf.reduce_sum(log_prior_weights[2])) 
    
    sigma1 = tf.log(1+tf.exp(rho1))
    sigma2 = tf.log(1+tf.exp(rho2))
    sigma3 = tf.log(1+tf.exp(rho3))
    
    log_posterior_1 = -(weights[0] - mu1)**2 /(2*sigma1**2) - tf.log(eps + tf.sqrt(2*np.pi)*sigma1)
    log_posterior_2 = -(weights[1] - mu2)**2 /(2*sigma2**2) - tf.log(eps + tf.sqrt(2*np.pi)*sigma2)
    log_posterior_3 = -(weights[2] - mu3)**2 /(2*sigma3**2) - tf.log(eps + tf.sqrt(2*np.pi)*sigma3)
    log_variational_posterior = (tf.reduce_sum(log_posterior_1) + 
                                 tf.reduce_sum(log_posterior_2) + 
                                 tf.reduce_sum(log_posterior_3)) 
    
    # expected lower bound
    elb = log_variational_posterior - log_xentropy - log_prior

In [7]:
# optimizer
# learning algorithm
learning_rate = 0.01
with tf.name_scope("train"):    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(elb)

In [8]:
# random sampling to get weights
weights = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) \
           if "weights" in v.name]

noise1 = tf.random_normal((n_inputs, n_hidden1), dtype=tf.float32)
noise2 = tf.random_normal((n_hidden1, n_hidden2), dtype=tf.float32)
noise3 = tf.random_normal((n_hidden2, n_outputs), dtype=tf.float32)

w1_reassign = tf.assign(weights[0], tf.ones((n_inputs, n_hidden1))*mu1 + tf.log(1 + tf.exp(rho1))*noise1)
w2_reassign = tf.assign(weights[1], tf.ones((n_hidden1, n_hidden2))*mu2 + tf.log(1 + tf.exp(rho2))*noise2)
w3_reassign = tf.assign(weights[2], tf.ones((n_hidden2, n_outputs))*mu3 + tf.log(1 + tf.exp(rho3))*noise3)

In [9]:
# evaluation
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [10]:
init = tf.global_variables_initializer()

### Training time

In [11]:
# training time
n_epochs = 400
batch_size = 100

In [12]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run([w1_reassign, w2_reassign, w3_reassign])
            sess.run(training_op, feed_dict={X: X_batch, y:y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y:y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images,
                                y: mnist.test.labels})
        current_elb = elb.eval(feed_dict={X: X_batch, y:y_batch})
        print(epoch, "Train accuracy:", acc_train, "Test_accuracy:", acc_test, "ELB:", current_elb)

0 Train accuracy: 0.1 Test_accuracy: 0.1009 ELB: 3.85166e+06
1 Train accuracy: 0.0 Test_accuracy: 0.0974 ELB: 4.45058e+06
2 Train accuracy: 0.08 Test_accuracy: 0.101 ELB: 5.13468e+06
3 Train accuracy: 0.18 Test_accuracy: 0.101 ELB: 5.86646e+06
4 Train accuracy: 0.0 Test_accuracy: 0.0892 ELB: 6.63904e+06
5 Train accuracy: 0.14 Test_accuracy: 0.1032 ELB: 5.09849e+06
6 Train accuracy: 0.18 Test_accuracy: 0.1009 ELB: 6.59419e+06
7 Train accuracy: 0.04 Test_accuracy: 0.0889 ELB: -6.39503e+07
8 Train accuracy: 0.08 Test_accuracy: 0.0975 ELB: -4.74646e+07
9 Train accuracy: 0.14 Test_accuracy: 0.101 ELB: -3.38489e+08
10 Train accuracy: 0.08 Test_accuracy: 0.101 ELB: -6.37419e+08
11 Train accuracy: 0.12 Test_accuracy: 0.101 ELB: -2.3951e+09
12 Train accuracy: 0.02 Test_accuracy: 0.0931 ELB: -6.59212e+10
13 Train accuracy: 0.04 Test_accuracy: 0.101 ELB: -2.8514e+10
14 Train accuracy: 0.1 Test_accuracy: 0.101 ELB: -3.9801e+10
15 Train accuracy: 0.1 Test_accuracy: 0.1117 ELB: -2.88929e+10
16 Train

KeyboardInterrupt: 