In [5]:
'''
Defining all the global variables in this cell
'''
IMG_HEIGHT = 32
IMG_WIDTH = 32
IMG_DEPTH = 1

G_WIN_SIZE= 12
G_DIM = G_WIN_SIZE*G_WIN_SIZE*IMG_DEPTH

STD_VAR = 0.11

LOC_DIM = 2 # the number of dimensions for the locations are just x and y so 2
GLIMPSE_FC1 = 256
GLIMPSE_FC2 = 512

LSTM_HIDDEN = 512

NUM_GLIMPSES = 6


NUM_CLASSES = 10

BASE_OUT = 1
SCALE = 3
PAD_SIZE = G_WIN_SIZE * (2 ** (SCALE-1))

NUM_EPISODES = 10



In [6]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf 
import numpy as np 
# from config import *
# from model import *


'''
Expecting means and locs to be of dimension [time_steps, batch_size, no_locations]
'''
def calc_likelihood(means, locs, sigma):

    means = tf.stack(means)
    locs = tf.stack(locs)
    dist = tf.contrib.distributions.Normal(means, sigma)
    pdf_val = dist.log_prob(locs)
#     print(pdf_val.shape)
    likelihood = tf.reduce_sum(pdf_val, 2)

    return tf.transpose(likelihood)


def read_and_decode(filename_queue, alter=True):
    with tf.name_scope('read_and_decode'):
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)
        features = tf.parse_single_example(serialized_example, features={
            'label': tf.FixedLenFeature([], tf.int64),
            'image_raw': tf.FixedLenFeature([], tf.string)
        })

        label = tf.cast(features['label'], tf.int32)
        label = tf.one_hot(label, depth=10)
        print(label.shape)
        # label = tf.reshape(label, [1])

        image = tf.decode_raw(features['image_raw'], tf.uint8)

        # Convert back to image shape
        image = tf.reshape(image, [IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH])
        print(image.shape)

        image = tf.cast(image, tf.float32)

        return image, label



In [7]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf
# from config import *

'''
This file contains the model used for the project
'''

class Model:
    # The inputs passed to the model and the batch size are the class variables used by all the functions
    # inputs are 4D tensors [batch_size, HEIGHT, WIDTH, CHANNELS]
    # batch_size is a scalar values representing the number of passed in the inputs tensor
    # Collecting the mean values used by the model with collect_means
    # Collecting the locations using collect_locs
    def __init__(self, inputs, b_size):
        self.inputs = inputs
        self.batch_size = b_size * NUM_EPISODES
        print(self.batch_size)
        self.collect_locs = []
        self.collect_means = []
        

    # This function calculated the inital locations, then build the LSTM cell 
    # the output of this function is the classifier output of the last LSTM cell 
    # class_outs is a 2D tensor of dimenstions [batch_size, number_of_claases]
    def __call__(self):
        initial_locs = tf.zeros([self.batch_size, LOC_DIM])
        
        input_lstm = self.glimpse_network(self.inputs, initial_locs)

        collect_outputs= []
        baselines = []
        prev_output = tf.zeros([self.batch_size, LSTM_HIDDEN])
        prev_state = tf.zeros([self.batch_size, LSTM_HIDDEN])

        curr_out, next_state = self.lstm_layer(prev_output, prev_state, input_lstm)
        prev_state = next_state
        prev_output = self.next_location(curr_out, False)
        
        for i in range(NUM_GLIMPSES):
            curr_out, next_state = self.lstm_layer(prev_output, prev_state, input_lstm)
            changed = tf.concat([tf.ones([self.batch_size, 1]), curr_out], 1)
            collect_outputs.append(changed)
            base = self.baseline_layer(curr_out, GLIMPSE_FC2, BASE_OUT, 'baseline')
            baselines.append(base)
            prev_output = self.next_location(curr_out, True)
            prev_state = next_state
            if i == NUM_GLIMPSES-1:
                class_outs = self.fc_layer(curr_out, GLIMPSE_FC2, NUM_CLASSES, 'softmax', None)


        return baselines, class_outs, self.collect_means, self.collect_locs


    def baseline_layer(self, image, in_size, out_size, name):
        with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
            weights = tf.get_variable("weights", [in_size, out_size], initializer=tf.contrib.layers.xavier_initializer())
            biases = tf.get_variable("biases", [out_size], initializer=tf.contrib.layers.xavier_initializer())
            y = tf.nn.tanh(tf.add(tf.matmul(image, weights), biases))
#             y = tf.stop_gradient(y)

            return y

    

    # This function is called by the current LSTM cell to get inputs to the next cell
    # next_inputs are of dimension [batch_size, 256]
    def next_location(self, prev_inputs, is_first):
        with tf.variable_scope('next_loc', reuse=tf.AUTO_REUSE):
            weights = tf.get_variable("weights", [GLIMPSE_FC2+1, LOC_DIM], initializer=tf.contrib.layers.xavier_initializer())
            prev_inputs = tf.concat([tf.ones([self.batch_size, 1]), prev_inputs], 1)
            means = tf.clip_by_value(tf.matmul(prev_inputs, weights), -0.5, 0.5)

#             dist = tf.contrib.distributions.Normal(means, STD_VAR)
#             locs = tf.stop_gradient(tf.squeeze(dist.sample(1)))
#             print(locs.shape)
            locs = means
            if is_first:
                self.collect_locs.append(locs)
                self.collect_means.append(means)

            next_inputs = self.glimpse_network(self.inputs, locs)
            
            return next_inputs


    # This function has the glimpse network where the locations are processed 
    # output is a 2D tensor of dimension [batch_size, 256]
    def glimpse_network(self, input_img, locations):

        loc_out1 = self.fc_layer(locations, LOC_DIM, 256, 'lc1', tf.nn.tanh)
        loc_out2 = self.fc_layer(loc_out1, 256, 512, 'lc2', tf.nn.tanh)

        glimpses = tf.image.extract_glimpse(input_img, [G_WIN_SIZE,G_WIN_SIZE], 
                                                locations, centered=True, normalized=True)
        # print(input_img.shape)
        glimpses_2 = tf.image.extract_glimpse(input_img, [24,24], 
                                                locations, centered=True, normalized=True)
        # glimpses_3 = tf.image.extract_glimpse(input_img, [28,28], 
        #                                         locations, centered=True, normalized=True)
        
        glimpses = tf.squeeze(glimpses)
        glimpses_2 = tf.squeeze(tf.image.resize_images(glimpses_2, [12,12]))
        # glimpses_3 = tf.squeeze(tf.image.resize_images(glimpses_3, [8,8]))

        glimpses = tf.reshape(glimpses, [-1, 12*12*1])
        glimpses_2 = tf.reshape(glimpses_2, [-1, 144])
        # glimpses_3 = tf.reshape(glimpses_3, [-1, 64])

        glimpses = tf.concat([glimpses, glimpses_2], 1)

        # glimpses = tf.reshape(glimpses, [-1, G_WIN_SIZE*G_WIN_SIZE*IMG_DEPTH])

        g_out1 = self.fc_layer(glimpses, 288, 512, 'g1', tf.nn.tanh)
        g_out2 = self.fc_layer(g_out1, 512, 512, 'g2', tf.nn.tanh)


        return tf.nn.relu(loc_out2 + g_out2)


    # general template for a fully connected layer used by the model
    # output dimensions are [batch_size, out_size]
    def fc_layer(self, image, in_size, out_size, name, activation):
        with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
            weights = tf.get_variable("weights", [in_size, out_size], initializer=tf.contrib.layers.xavier_initializer())
            biases = tf.get_variable("biases", [out_size], initializer=tf.contrib.layers.xavier_initializer())
            y = tf.add(tf.matmul(image, weights), biases)

#             y = tf.layers.batch_normalization(y, training=True)
            
            if activation is not None:
                y = activation(y)

            return y
    
    
    def lstm_layer(self, last_output, last_state, curr_input):
        
        with tf.variable_scope('lstm_cell', reuse=tf.AUTO_REUSE):
            whprev = tf.get_variable("whprev", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            wx = tf.get_variable("wxcurr", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            wf = tf.get_variable("wf", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            bf = tf.get_variable("bf", [GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            
            wi = tf.get_variable("wi", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            bi = tf.get_variable("bi", [GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            
            wc = tf.get_variable("wc", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            bc = tf.get_variable("bc", [GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            
            wo = tf.get_variable("wo", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            bo = tf.get_variable("bo", [GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            
            main_mix = tf.matmul(last_output, whprev)+ tf.matmul(curr_input, wx)
            
            ft = tf.nn.sigmoid(tf.add(tf.matmul(main_mix, wf), bf))
            
            it = tf.nn.sigmoid(tf.add(tf.matmul(main_mix, wi), bi))
            
            cbart = tf.nn.tanh(tf.add(tf.matmul(main_mix, wc), bc))
             
            ct = tf.multiply(ft, last_state) + tf.multiply(it, cbart)
            
            ot = tf.nn.sigmoid(tf.add(tf.matmul(main_mix, wo), bo))
            
            ht = tf.multiply(ot, tf.nn.tanh(ct))
            
            return ht, ct

In [None]:
'''
Train function
Processes inputs in mini-batchs 
Builds the model and trains the parameters for predetermined number of times 
'''

import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import os 
# from config import *
# from model import *
# from util import *
from tensorflow.python.framework import ops
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


def train(batch_size, epochs, log, output):
    # Read arguments
    ops.reset_default_graph()
    filename_queue = tf.train.string_input_producer(['/N/u/ramyarao/project/cifar_model/cifar_gray_train.tfrecords'])
    image, label = read_and_decode(filename_queue)
    batch = tf.train.shuffle_batch([image, label], batch_size=batch_size, capacity=500, num_threads=2, min_after_dequeue=250)

    # placeholders for the input and labels
    X = tf.placeholder(tf.float32, [None, IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH], name='X')
    y = tf.placeholder(tf.float32, [None, 10], name='labels')

    # Model instantiated and called for processing the inputs
    model = Model(X, batch_size)
    b_t, y_hat, means, locs = model()

    class_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
        labels=y,
        logits=y_hat
    ))

    correct_prediction = tf.equal(tf.argmax(y_hat, 1), tf.argmax(y, 1))
    # correct_prediction = tf.equal(y_hat, y)
    reward_last_step = tf.expand_dims(tf.cast(correct_prediction, tf.float32), 1)
    rewards = tf.tile(reward_last_step, (1, NUM_GLIMPSES)) 

    log_likelihood = calc_likelihood(means, locs, STD_VAR)
    penalty = rewards - b_t

    del_j = tf.reduce_mean(log_likelihood * penalty)

    baseline_loss = tf.reduce_mean(tf.square((rewards - b_t)))
    
    loss = -del_j + class_loss + baseline_loss

    var_list = tf.trainable_variables()

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        opt = tf.train.AdamOptimizer(1e-3).minimize(loss, var_list=var_list)

    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    saver = tf.train.Saver()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 1.0
    
    gpu_options = tf.GPUOptions(allow_growth=True)
#     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        sess.run(tf.global_variables_initializer())

        merged_summary = tf.summary.merge_all()
        writer = tf.summary.FileWriter(log, sess.graph)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        # training the model for a predetermined number of epochs
        for i in range(epochs):
            batch_x, batch_lbl = sess.run(batch)
            batch_x = np.tile(batch_x, [NUM_EPISODES, 1, 1, 1])
            batch_lbl = np.tile(batch_lbl, [NUM_EPISODES, 1])
            sess.run(opt, feed_dict={X: batch_x, y: batch_lbl})
            print(sess.run(tf.argmax(y_hat, 1)))
#             print(sess.run([locs[-1], means[-1]], feed_dict={X: batch_x, y: batch_lbl}))
            acc, d, c, b = sess.run([accuracy, -del_j, class_loss, baseline_loss] ,feed_dict={X: batch_x, y: batch_lbl})


            s = sess.run(merged_summary, feed_dict={X: batch_x, y: batch_lbl})
            writer.add_summary(s, i)

            if (i+1) % 550 == 0:
                print('Step {}: A={} d={} c={} b={}'.format(i+1, acc, d, c, b))

            if (((i+1) % 550 == 0) and (acc > 0.90)):
                # Please change the directory here to save the model in a different location
                params = saver.save(sess, '/N/u/ramyarao/project/model/{}_{}.ckpt'.format(output, i+1))
                print('Model saved: {}'.format(params))

        coord.request_stop()
        coord.join(threads)
    return
        

if __name__ == '__main__':
    train(100, 550*100000, 'logs', 'model')







(10,)
(32, 32, 1)
100
Step 550: A=0.5199999809265137 d=-3.303061008453369 c=4.688730716705322 b=1.8929040431976318
Step 1100: A=0.4300000071525574 d=-3.4584290981292725 c=3.339381694793701 b=2.046644926071167
Step 1650: A=0.3799999952316284 d=-3.067120313644409 c=5.256348133087158 b=1.6526150703430176
Step 2200: A=0.4000000059604645 d=-2.648296356201172 c=6.410726547241211 b=1.2963687181472778
Step 2750: A=0.38999998569488525 d=-3.572054624557495 c=8.223984718322754 b=2.1597421169281006
Step 3300: A=0.44999998807907104 d=-3.730309247970581 c=5.269430160522461 b=2.343402862548828
Step 3850: A=0.36000001430511475 d=-3.4375429153442383 c=5.771368980407715 b=2.010227918624878
Step 4400: A=0.3499999940395355 d=-3.448655843734741 c=5.493671894073486 b=2.0188536643981934
Step 4950: A=0.4300000071525574 d=-3.0171427726745605 c=4.702121257781982 b=1.616219401359558
Step 5500: A=0.3799999952316284 d=-2.8335392475128174 c=5.039307117462158 b=1.4449281692504883
Step 6050: A=0.3799999952316284 d=-2

In [None]:
'''
Train function
Processes inputs in mini-batchs 
Builds the model and trains the parameters for predetermined number of times 
'''

import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import os 
# from config import *
# from model import *
# from util import *
from tensorflow.python.framework import ops
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


def test(batch_size, epochs, log, output):
    # Read arguments
    ops.reset_default_graph()
    filename_queue = tf.train.string_input_producer(['/N/u/ramyarao/project/cifar_model/cifar_gray_test.tfrecords'])
    image, label = read_and_decode(filename_queue)
    batch = tf.train.shuffle_batch([image, label], batch_size=batch_size, capacity=500, num_threads=2, min_after_dequeue=250)

    # placeholders for the input and labels
    X = tf.placeholder(tf.float32, [None, IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH], name='X')
    y = tf.placeholder(tf.float32, [None, 10], name='labels')

    # Model instantiated and called for processing the inputs
    model = Model(X, batch_size)
    b_t, y_hat, means, locs = model()

    temp1 = tf.argmax(y_hat, 1)
    correct_prediction = tf.equal(tf.argmax(y_hat, 1), tf.argmax(y, 1))

    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    saver = tf.train.Saver()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.33
    with tf.Session(config=config) as sess:

        new_saver = tf.train.import_meta_graph('/N/u/ramyarao/project/model/'+output + '.meta')
        new_saver.restore(sess, '/N/u/ramyarao/project/model/'+ output)

        merged_summary = tf.summary.merge_all()
        writer = tf.summary.FileWriter(log, sess.graph)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        # training the model for a predetermined number of epochs
        for i in range(epochs):
            batch_x, batch_lbl = sess.run(batch)
            
            print(sess.run([temp1], feed_dict={X: batch_x, y: batch_lbl}))
            print(sess.run([tf.argmax(batch_lbl,1)], feed_dict={X: batch_x, y: batch_lbl}))
            acc = sess.run(accuracy, feed_dict={X: batch_x, y: batch_lbl})

            print('Step {}: {}'.format(i+1, acc))


        coord.request_stop()
        coord.join(threads)
        


if __name__ == '__main__':
    test(200, 3, 'logs', 'model_6270.ckpt')


