In [46]:
import sys
import os

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from skimage.measure import block_reduce

In [47]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [218]:
batch_sz = 20

def next_batch(set_choice, batch_sz):
    if set_choice:
        image, label = mnist.train.next_batch(batch_sz)
    else:
        image, label = mnist.test.next_batch(batch_sz)
    digits = np.argmax(label, axis = 1)
    false_index = np.where((digits ==0))[0].ravel()
    true_index = np.where(digits ==1)[0].ravel()
    true_image, false_image = image[true_index,:], image[false_index,:]
    new_labels = np.hstack((np.full(np.shape(true_image)[0], 1),
                     np.full(np.shape(false_image)[0], 0)))
    indicies = [i for i in range(len(new_labels))]
    np.random.shuffle(indicies)
    to_return_images = np.vstack((true_image, false_image))[indicies,:]
    new_labels = new_labels[indicies]
    to_return_labels = np.zeros((len(new_labels), 2))
    to_return_labels[np.arange(new_labels.size), new_labels] = 1
    to_return_images = np.array([block_reduce(im.reshape(28,28), block_size=(3,3), \
                  func=np.mean).reshape(100) for im in to_return_images])
    return (to_return_images, to_return_labels)

training_images, training_labels = next_batch(True, mnist.train.num_examples)
testing_images, testing_labels = next_batch(False, mnist.test.num_examples)

In [219]:
def query_examples(set_images, set_labels, batch_size):
    indicies = np.random.choice(set_images.shape[0], batch_size)
    return set_images[indicies,:], set_labels[indicies]

plt.imshow(images[2,:].reshape(10,10))
plt.show()

def main_net_complex(x):
    hidden_1 = tf.layers.dense(x, 500, activation = tf.nn.relu, use_bias = True)
    hidden_2 = tf.layers.dense(hidden_1, 500, activation = tf.nn.relu, use_bias = True)
    output_logits = tf.layers.dense(hidden_2, 2, activation = None, use_bias = True)
    return output_logits

In [247]:
#Computational Graph
delta,gamma = .5,1

tf.reset_default_graph()
x_ = tf.placeholder(tf.float32, name = "InputImages", shape= [None, 100])
y_ = tf.placeholder(tf.float32, name = "InputLabels", shape = [None, 2])

weights = tf.Variable(tf.random_normal([100,2], name = "NetworkWeights", stddev = 0.1))
biases = tf.Variable(tf.random_normal([2], name = "NetworkBiases", stddev = 0.1))
output_logits =  tf.matmul(x_, weights) + biases

rl = tf.nn.softmax_cross_entropy_with_logits_v2(labels = y_, logits = output_logits)

chain_weights = tf.Variable(tf.random_normal([100,2], stddev = 0.1))
chain_biases = tf.Variable(tf.random_normal([2], stddev = 0.1))

expected_weights = tf.Variable(tf.zeros(dtype = tf.float32, shape = chain_weights.get_shape()))
expected_biases = tf.Variable(tf.zeros(dtype = tf.float32, shape = chain_biases.get_shape()))
                               
c_output_logits =  tf.matmul(x_, chain_weights) + chain_biases
cl = tf.nn.softmax_cross_entropy_with_logits_v2(labels = y_, logits = c_output_logits)

dcw = tf.gradients(cl, chain_weights)
dcb = tf.gradients(cl, chain_biases)
dc = tf.concat((tf.reshape(dcw, [-1]),tf.reshape(dcb, [-1])), axis = 0)
c = tf.concat((tf.reshape(chain_weights, [-1]),tf.reshape(chain_biases, [-1])), axis = 0)

#need to initialize this somewhere in computations below
Xi_w = tf.Variable(tf.random_normal(weights.get_shape(), stddev = gamma))
Xi_b = tf.Variable(tf.random_normal(biases.get_shape(), stddev = gamma))

dpos = tf.constant(2+delta, dtype = tf.float32) 
dneg = tf.constant(2-delta, dtype = tf.float32)
twod = tf.constant(2*delta, dtype = tf.float32)
eightd = tf.constant(np.sqrt(8*delta), dtype = tf.float32)
gam = tf.constant(gamma, dtype = tf.float32)

proposal_w = ((dneg/dpos)*chain_weights)-((twod/dpos)*gam*dcw)+(eightd*Xi_w)
proposal_b = ((dneg/dpos)*chain_biases)-((twod/dpos)*gam*dcb)+(eightd*Xi_b)

p_output_logits =  tf.matmul(x_, tf.reshape(proposal_w,[100,2])) + proposal_b
pl = tf.nn.softmax_cross_entropy_with_logits_v2(labels = y_, logits = p_output_logits)

dpw = tf.gradients(pl, proposal_w)
dpb = tf.gradients(pl, proposal_b)
dp = tf.concat((tf.reshape(dpw, [-1]),tf.reshape(dpb, [-1])), axis = 0)        
p = tf.concat((tf.reshape(proposal_w, [-1]),tf.reshape(proposal_b, [-1])), axis = 0)

half = tf.constant(1/2, dtype = tf.float32)
dfour = tf.constant(delta/4, dtype = tf.float32)

#These Values must have shape [1]. I need to sort that out soon...

p_u_v = p + half * tf.tensordot((c-p), dp, axes = 1) \
           + dfour * tf.tensordot((c+p), dp, axes = 1) \
           + dfour *(eightd*dp)**2

p_v_u = c + half * tf.tensordot((p-c), dc, axes = 1) \
           + dfour * tf.tensordot((c+p), dc, axes = 1) \
           + dfour *(eightd*dc)**2

train_step = tf.train.AdamOptimizer(learning_rate = .001).minimize(rl)
correct_prediction = tf.equal(tf.argmax(output_logits,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [256]:
def langevin_pCN(delta, cycles, burn_in, batches, dim):
        '''
        Method Used to train NN via modified pCN algorithm
        '''
        
        for batch in range(batches):
            
            sess.run(tf.initialize_variables([chain_weights,
                                              chain_biases,
                                              expected_weights,
                                              expected_biases]))
            acceptance_count = 0
            counter = 0
            
            if dim[0] == 100:
                batch_images, batch_labels = query_examples(training_images, training_labels, batch_sz) 
            else:
                batch_images, batch_labels = mnist.train.next_batch(batch_sz)

            for i in range(cycles):
                       
                puv, pvu, pw, pb, cw, cb = sess.run([p_u_v,
                                                     p_v_u,
                                                     proposal_w,
                                                     proposal_b,
                                                     chain_weights,
                                                     chain_biases],
                                   feed_dict = {x_ : batch_images,
                                                y_ : batch_labels})

                alpha = np.min((np.exp(np.linalg.norm(puv - pvu)), 1))
                
                if np.random.uniform(0,1) < alpha:
                    up_w = tf.assign(chain_weights, pw.reshape((dim[0],dim[1])))
                    up_b = tf.assign(chain_biases, pb.reshape(dim[1]))
                    sess.run([up_w, up_b])
                    if (i > burn_in):
                        acceptance_count += 1
                        counter += 1
                        ew_up = tf.assign(expected_weights, tf.add(expected_weights, chain_weights))
                        eb_up = tf.assign(expected_biases, tf.add(expected_biases, chain_biases))
                        sess.run([up_w, up_b, ew_up, eb_up])
                else:
                    if (i > burn_in):
                        counter += 1
                        ew_up = tf.assign(expected_weights, tf.add(expected_weights, chain_weights))
                        eb_up = tf.assign(expected_biases, tf.add(expected_biases, chain_biases))
                        sess.run([ew_up, eb_up])
            
            
            ew, eb = sess.run([expected_weights, expected_biases])
            new_weights = tf.assign(weights, (ew/counter))
            new_biases = tf.assign(biases, (eb/counter))
            sess.run([new_weights, new_biases])
            print('Final Acceptance Ratio: ' + str((acceptance_count/counter)))
                
        print ('\n' + 'Langevin pCN training complete')     

In [249]:
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
batch_sz = 20

print("Training with SGD")
num_batch = 10
for i in range(num_batch): 
    train_i, train_l = query_examples(training_images, training_labels, batch_sz)
    acc,_ = sess.run([accuracy, train_step], feed_dict = {x_: train_i, y_: train_l})
    print("training progress: " + str(float(i)/num_batch))
    
acc = sess.run(accuracy, feed_dict = {x_: testing_images, y_: testing_labels})
print("\n" + "SGD Training Complete, Test ACC of: " + str(acc) + "\n")

print("Begining pCN Training:")
langevin_pCN(delta, 20, 5, 5)

print("test accuracy is:" + str(sess.run(accuracy, feed_dict = {x_ : testing_images,y_ : testing_labels})))

Training with SGD
training progress: 0.0
training progress: 0.1
training progress: 0.2
training progress: 0.3
training progress: 0.4
training progress: 0.5
training progress: 0.6
training progress: 0.7
training progress: 0.8
training progress: 0.9

SGD Training Complete, Test ACC of: 0.5475177

Begining pCN Training:




[-4.276761 -2.459141]
Final Acceptance Ratio: 1.0
[-4.400471  -2.3345618]
Final Acceptance Ratio: 1.0
[-4.5968513 -2.13831  ]
Final Acceptance Ratio: 1.0
[-4.0832453 -2.6529267]
Final Acceptance Ratio: 1.0
[-4.067588  -2.6697557]
Final Acceptance Ratio: 1.0

Langevin pCN training complete
test accuracy is:0.855792


In [265]:
#Computational Graph
delta,gamma = .5, .5

tf.reset_default_graph()
x_ = tf.placeholder(tf.float32, name = "InputImages", shape= [None, 784])
y_ = tf.placeholder(tf.float32, name = "InputLabels", shape = [None, 10])

weights = tf.Variable(tf.random_normal([784,10], name = "NetworkWeights", stddev = 0.1))
biases = tf.Variable(tf.random_normal([10], name = "NetworkBiases", stddev = 0.1))
output_logits =  tf.matmul(x_, weights) + biases

rl = tf.nn.softmax_cross_entropy_with_logits_v2(labels = y_, logits = output_logits)

chain_weights = tf.Variable(tf.random_normal([784,10], stddev = 0.1))
chain_biases = tf.Variable(tf.random_normal([10], stddev = 0.1))

expected_weights = tf.Variable(tf.zeros(dtype = tf.float32, shape = chain_weights.get_shape()))
expected_biases = tf.Variable(tf.zeros(dtype = tf.float32, shape = chain_biases.get_shape()))
                               

c_output_logits =  tf.matmul(x_, chain_weights) + chain_biases
cl = tf.nn.softmax_cross_entropy_with_logits_v2(labels = y_, logits = c_output_logits)

dcw = tf.gradients(cl, chain_weights)
dcb = tf.gradients(cl, chain_biases)
dc = tf.concat((tf.reshape(dcw, [-1]),tf.reshape(dcb, [-1])), axis = 0)
c = tf.concat((tf.reshape(chain_weights, [-1]),tf.reshape(chain_biases, [-1])), axis = 0)

#need to initialize this somewhere in computations below
Xi_w = tf.Variable(tf.random_normal(weights.get_shape(), stddev = gamma))
Xi_b = tf.Variable(tf.random_normal(biases.get_shape(), stddev = gamma))

dpos = tf.constant(2+delta, dtype = tf.float32) 
dneg = tf.constant(2-delta, dtype = tf.float32)
twod = tf.constant(2*delta, dtype = tf.float32)
eightd = tf.constant(np.sqrt(8*delta), dtype = tf.float32)
gam = tf.constant(gamma, dtype = tf.float32)

proposal_w = ((dneg/dpos)*chain_weights)-((twod/dpos)*gam*dcw)+(eightd*Xi_w)
proposal_b = ((dneg/dpos)*chain_biases)-((twod/dpos)*gam*dcb)+(eightd*Xi_b)

p_output_logits =  tf.matmul(x_, tf.reshape(proposal_w,[784,10])) + proposal_b
pl = tf.nn.softmax_cross_entropy_with_logits_v2(labels = y_, logits = p_output_logits)

dpw = tf.gradients(pl, proposal_w)
dpb = tf.gradients(pl, proposal_b)
dp = tf.concat((tf.reshape(dpw, [-1]),tf.reshape(dpb, [-1])), axis = 0)        
p = tf.concat((tf.reshape(proposal_w, [-1]),tf.reshape(proposal_b, [-1])), axis = 0)

half = tf.constant(1/2, dtype = tf.float32)
dfour = tf.constant(delta/4, dtype = tf.float32)

p_u_v = p + half * tf.tensordot((c-p), dp, axes = 1) \
           + dfour * tf.tensordot((c+p), dp, axes = 1) \
           + dfour *(eightd*dp)**2

p_v_u = c + half * tf.tensordot((p-c), dc, axes = 1) \
           + dfour * tf.tensordot((c+p), dc, axes = 1) \
           + dfour *(eightd*dc)**2

train_step = tf.train.AdamOptimizer(learning_rate = .001).minimize(rl)
correct_prediction = tf.equal(tf.argmax(output_logits,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


In [267]:
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
batch_sz = 20

print("Training with SGD")
num_batch = 100
for i in range(num_batch): 
    train_i, train_l = mnist.train.next_batch(batch_sz)
    #acc,_ = sess.run([accuracy, train_step], feed_dict = {x_: train_i, y_: train_l})
    print("training progress: " + str(float(i)/num_batch))
    
t_images, t_labels = mnist.test.next_batch(mnist.test.num_examples)
acc = sess.run(accuracy, feed_dict = {x_: t_images, y_: t_labels})
print("\n" + "SGD Training Complete, Test ACC of: " + str(acc) + "\n")

print("Begining pCN Training:")
langevin_pCN(delta, 100, 5, 5, [784, 10])

print("test accuracy is:" + str(sess.run(accuracy, feed_dict = {x_ : t_images,y_ : t_labels})))

Training with SGD
training progress: 0.0
training progress: 0.01
training progress: 0.02
training progress: 0.03
training progress: 0.04
training progress: 0.05
training progress: 0.06
training progress: 0.07
training progress: 0.08
training progress: 0.09
training progress: 0.1
training progress: 0.11
training progress: 0.12
training progress: 0.13
training progress: 0.14
training progress: 0.15
training progress: 0.16
training progress: 0.17
training progress: 0.18
training progress: 0.19
training progress: 0.2
training progress: 0.21
training progress: 0.22
training progress: 0.23
training progress: 0.24
training progress: 0.25
training progress: 0.26
training progress: 0.27
training progress: 0.28
training progress: 0.29
training progress: 0.3
training progress: 0.31
training progress: 0.32
training progress: 0.33
training progress: 0.34
training progress: 0.35
training progress: 0.36
training progress: 0.37
training progress: 0.38
training progress: 0.39
training progress: 0.4
tra



Final Acceptance Ratio: 1.0
Final Acceptance Ratio: 1.0
Final Acceptance Ratio: 1.0
Final Acceptance Ratio: 1.0
Final Acceptance Ratio: 1.0

Langevin pCN training complete
test accuracy is:0.1621
