In [None]:
'''
Defining all the global variables in this cell
'''
IMG_HEIGHT = 32
IMG_WIDTH = 32
IMG_DEPTH = 3

G_WIN_SIZE= 24
G_DIM = G_WIN_SIZE*G_WIN_SIZE*IMG_DEPTH

STD_VAR = 0.11

LOC_DIM = 2 # the number of dimensions for the locations are just x and y so 2
GLIMPSE_FC1 = 256
GLIMPSE_FC2 = 864

LSTM_HIDDEN = 864

NUM_GLIMPSES = 6


NUM_CLASSES = 10

BASE_OUT = 1
SCALE = 3
PAD_SIZE = G_WIN_SIZE * (2 ** (SCALE-1))

NUM_EPISODES = 1



In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf 
import numpy as np 
from config import *
from model import *


'''
Expecting means and locs to be of dimension [time_steps, batch_size, no_locations]
'''
def calc_likelihood(means, locs, sigma):

    means = tf.stack(means)
    locs = tf.stack(locs)
    dist = tf.contrib.distributions.Normal(means, sigma)
    pdf_val = dist.log_prob(locs)

    likelihood = tf.reduce_sum(pdf_val, 2)

    return tf.transpose(likelihood)


def read_and_decode(filename_queue, alter=True):
    with tf.name_scope('read_and_decode'):
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)
        features = tf.parse_single_example(serialized_example, features={
            'label': tf.FixedLenFeature([], tf.int64),
            'image_raw': tf.FixedLenFeature([], tf.string)
        })

        label = tf.cast(features['label'], tf.int32)
        label = tf.one_hot(label, depth=10)

        image = tf.decode_raw(features['image_raw'], tf.uint8)

        # Convert back to image shape
        image = tf.reshape(image, [IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH])

        image = tf.cast(image, tf.float32)

        return image, label




In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf
from config import *

'''
This file contains the model used for the project
'''

class Model:
    # The inputs passed to the model and the batch size are the class variables used by all the functions
    # inputs are 4D tensors [batch_size, HEIGHT, WIDTH, CHANNELS]
    # batch_size is a scalar values representing the number of passed in the inputs tensor
    # Collecting the mean values used by the model with collect_means
    # Collecting the locations using collect_locs
    def __init__(self, inputs, b_size):
        self.inputs = inputs
        self.batch_size = b_size * NUM_EPISODES
        self.collect_locs = []
        self.collect_means = []
        

    # This function calculated the inital locations, then build the LSTM cell 
    # the output of this function is the classifier output of the last LSTM cell 
    # class_outs is a 2D tensor of dimenstions [batch_size, number_of_claases]
    def __call__(self):
        initial_locs = tf.random_uniform([self.batch_size, LOC_DIM], minval=-1, maxval=1)

        # print(initial_locs)
        
        input_lstm = self.glimpse_network(self.inputs, initial_locs)

        collect_outputs= []
        baselines = []
        prev_output = tf.zeros([self.batch_size, LSTM_HIDDEN])
        prev_state = tf.zeros([self.batch_size, LSTM_HIDDEN])

        curr_out, next_state = self.peephole_lstm(prev_output, prev_state, input_lstm)
        prev_state = next_state
        prev_output = self.next_location(curr_out, False)
        
        for i in range(NUM_GLIMPSES):
            curr_out, next_state = self.peephole_lstm(prev_output, prev_state, input_lstm)
            collect_outputs.append(curr_out)
            base = self.baseline_layer(curr_out, GLIMPSE_FC2, BASE_OUT, 'baseline')
            baselines.append(base)
            prev_output = self.next_location(curr_out, True)
            prev_state = next_state

            
        class_outs = self.fc_layer(collect_outputs[-1], GLIMPSE_FC2, NUM_CLASSES, 'softmax', None)

        return baselines, class_outs, self.collect_means, self.collect_locs


    def baseline_layer(self, image, in_size, out_size, name):
        with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
            # weights = tf.get_variable("weights", [in_size, out_size], initializer=tf.contrib.layers.xavier_initializer())
            # biases = tf.get_variable("biases", [out_size], initializer=tf.contrib.layers.xavier_initializer())

            weights = tf.Variable(tf.truncated_normal([in_size, out_size], stddev=0.1))
            biases = tf.Variable(tf.constant(0.1, shape=[out_size]))
            y = tf.add(tf.matmul(image, weights), biases)
            # y = tf.stop_gradient(y)

        return y

    

    # This function is called by the current LSTM cell to get inputs to the next cell
    # next_inputs are of dimension [batch_size, 256]
    def next_location(self, prev_inputs, is_first):
        with tf.variable_scope('next_loc', reuse=tf.AUTO_REUSE):
            # weights = tf.get_variable("weights", [GLIMPSE_FC2, LOC_DIM], initializer=tf.contrib.layers.xavier_initializer())
            # biases = tf.get_variable("biases", [LOC_DIM], initializer=tf.contrib.layers.xavier_initializer())

            weights = tf.Variable(tf.truncated_normal([GLIMPSE_FC2, LOC_DIM], stddev=0.1))
            biases = tf.Variable(tf.constant(0.1, shape=[LOC_DIM]))
            y = tf.add(tf.matmul(prev_inputs, weights), biases)
            # y = tf.stop_gradient(y)
            
        means = tf.nn.tanh(y)
        # means = tf.stop_gradient(means)

        locs = means + tf.random_normal((self.batch_size, LOC_DIM), STD_VAR)
	locs = means
        # locs = tf.stop_gradient(locs)

        if is_first:
            self.collect_locs.append(locs)
            self.collect_means.append(means)

        next_inputs = self.glimpse_network(self.inputs, locs)
        
        return next_inputs


    # This function has the glimpse network where the locations are processed 
    # output is a 2D tensor of dimension [batch_size, 256]
    def glimpse_network(self, input_img, locations):

        loc_out1 = self.fc_layer(locations, LOC_DIM, GLIMPSE_FC1, 'lc1', tf.nn.tanh)
        loc_out2 = self.fc_layer(loc_out1, GLIMPSE_FC1, GLIMPSE_FC2, 'lc2', tf.nn.tanh)

        # input_img = tf.image.pad_to_bounding_box(input_img,PAD_SIZE , PAD_SIZE, 28+(16 *2), 28+(16*2))

        glimpses = tf.image.extract_glimpse(input_img, [G_WIN_SIZE,G_WIN_SIZE], 
                                                locations, centered=True, normalized=True)
        # glimpses_2 = tf.image.extract_glimpse(input_img, [G_WIN_SIZE*2,G_WIN_SIZE*2], 
        #                                         locations, centered=True, normalized=True)
        # glimpses_3 = tf.image.extract_glimpse(input_img, [G_WIN_SIZE * 4,G_WIN_SIZE* 4], 
        #                                         locations, centered=True, normalized=True)


        # glimpses_2 = tf.image.resize_images(glimpses_2, [8,8])
        # glimpses_3 = tf.image.resize_images(glimpses_3, [8,8])
        g = self.inception(glimpses, 'inception1')
        
        res_reshape = self.pool_operations(g, 'pool3', [1,2,2,1], [1,2,2,1])
        g_out2 = tf.nn.tanh(tf.reshape(res_reshape, [-1, 6*6*24]))

        return tf.nn.relu(loc_out2 + g_out2)


    def inception(self, g1, name):
        with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
            convg1a = self.conv_layer(g1, 1,3,3, 'convg1a')
            convg1b = self.conv_layer(g1, 1,3,3, 'convg1b')
            convg1c = self.pool_operations(g1, 'pool1', [1,2,2,1], [1,1,1,1])

            outg1a = self.conv_layer(g1, 1,3,6, 'outg1a')
            outg1b = self.conv_layer(convg1a, 1,3,6, 'outg1b')
            outg1c = self.conv_layer(convg1b, 3,3,6, 'outg1c')
            outg1d = self.conv_layer(convg1c, 1,3,6, 'outg1d')

            out1 = tf.concat([outg1a, outg1b, outg1c, outg1d], 3)

            # convg2a = self.conv_layer(g2, 1,3,3, 'convg2a')
            # convg2b = self.conv_layer(g2, 1,3,3, 'convg2b')
            # convg2c = self.pool_operations(g2, 'pool2', [1,2,2,1], [1,1,1,1])

            # outg2a = self.conv_layer(g2, 1,3,3, 'outg2a')
            # outg2b = self.conv_layer(convg2a, 5,3,3, 'outg2b')
            # outg2c = self.conv_layer(convg2b, 3,3,3, 'outg2c')
            # outg2d = self.conv_layer(convg2c, 1,3,3, 'outg2d')

            # out2 = tf.concat([outg2a, outg2b, outg2c, outg2d], 3)

            # convg3a = self.conv_layer(g3, 1,3,2, 'convg3a')
            # convg3b = self.conv_layer(g3, 1,3,2, 'convg3b')
            # convg3c = self.pool_operations(g3, 'pool3', [1,2,2,1], [1,1,1,1])

            # outg3a = self.conv_layer(g3, 1,3,2, 'outg3a')
            # outg3b = self.conv_layer(convg3a, 5,2,2, 'outg3b')
            # outg3c = self.conv_layer(convg3b, 3,2,2, 'outg3c')
            # outg3d = self.conv_layer(convg3c, 1,3,2, 'outg3d')

            # out3 = tf.concat([outg3a, outg3b, outg3c, outg3d], 3)

            # fin = tf.concat([out1, out2], 3)

            return out1


    def conv_layer(self, in_image, fil_size, no_in, no_out, name):
        with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
            # weights = tf.get_variable("weights", [fil_size, fil_size, no_in, no_out], initializer=tf.contrib.layers.xavier_initializer())
            # biases = tf.get_variable("biases", [no_out], initializer=tf.contrib.layers.xavier_initializer())

            weights = tf.Variable(tf.truncated_normal([fil_size, fil_size, no_in, no_out], stddev=0.1))
            biases = tf.Variable(tf.constant(0.1, shape=[no_out]))

            conv = tf.nn.conv2d(in_image, weights, strides=[1,1,1,1], padding='SAME')
            
            return tf.nn.tanh(conv + biases)

    def pool_operations(self, image1, given_name, ksize_value, stride_value):
        with  tf.variable_scope(given_name, reuse=tf.AUTO_REUSE):
            pool_1 = tf.nn.avg_pool(image1, ksize=ksize_value, strides=stride_value, padding='SAME')
            
            return pool_1


    # general template for a fully connected layer used by the model
    # output dimensions are [batch_size, out_size]
    def fc_layer(self, image, in_size, out_size, name, activation):
        with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
            weights = tf.get_variable("weights", [in_size, out_size], initializer=tf.contrib.layers.xavier_initializer())
            biases = tf.get_variable("biases", [out_size], initializer=tf.contrib.layers.xavier_initializer())
            y = tf.add(tf.matmul(image, weights), biases)
            tf.summary.histogram('weights_hist', weights)
            tf.summary.histogram('biases_hist', biases)

            if activation is not None:
                y = activation(y)

            return y
    
    
    def peephole_lstm(self, last_output, last_state, curr_input):
        
        with tf.variable_scope('lstm_cell', reuse=tf.AUTO_REUSE):
            whprev = tf.get_variable("whprev", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            wx = tf.get_variable("wxcurr", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            wsprev = tf.get_variable("wsprev", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            wct = tf.get_variable("wct", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())

            wf = tf.get_variable("wf", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            bf = tf.get_variable("bf", [GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            
            wi = tf.get_variable("wi", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            bi = tf.get_variable("bi", [GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            
            wc = tf.get_variable("wc", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            bc = tf.get_variable("bc", [GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            
            wo = tf.get_variable("wo", [GLIMPSE_FC2, GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            bo = tf.get_variable("bo", [GLIMPSE_FC2], initializer=tf.contrib.layers.xavier_initializer())
            
            last_mix = tf.matmul(last_state, wsprev) + tf.matmul(last_output, whprev)+ tf.matmul(curr_input, wx)
            main_mix = tf.matmul(last_output, whprev)+ tf.matmul(curr_input, wx)
            
            ft = tf.nn.sigmoid(tf.add(tf.matmul(last_mix, wf), bf))
            
            it = tf.nn.sigmoid(tf.add(tf.matmul(last_mix, wi), bi))
            
            cbart = tf.nn.tanh(tf.add(tf.matmul(main_mix, wc), bc))
             
            ct = tf.multiply(ft, last_state) + tf.multiply((1-ft), cbart)

            ot_mix = tf.matmul(ct, wct) + tf.matmul(last_output, whprev)+ tf.matmul(curr_input, wx)
            
            ot = tf.nn.sigmoid(tf.add(tf.matmul(ot_mix, wo), bo))
            
            ht = tf.multiply(ot, tf.nn.tanh(ct))
            
            return ht, ct
        




In [None]:
'''
Train function
Processes inputs in mini-batchs 
Builds the model and trains the parameters for predetermined number of times 
'''

import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import os 
from config import *
from model import *
from util import *
from tensorflow.python.framework import ops
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


def train(batch_size, epochs, log, output):
    # Read arguments
    filename_queue = tf.train.string_input_producer(['norm_cifar_gray.tfrecords'])
    image, label = read_and_decode(filename_queue)
    batch = tf.train.shuffle_batch([image, label], batch_size=batch_size, capacity=500, num_threads=2, min_after_dequeue=250)

    # placeholders for the input and labels
    X = tf.placeholder(tf.float32, [None, IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH], name='X')
    y = tf.placeholder(tf.float32, [None, 10], name='labels')

    # Model instantiated and called for processing the inputs
    model = Model(X, batch_size)
    b_t, y_hat, means, locs = model()

    class_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
        labels=y,
        logits=y_hat
    ))

    correct_prediction = tf.equal(tf.argmax(y_hat, 1), tf.argmax(y, 1))
    # correct_prediction = tf.equal(y_hat, y)
    reward_last_step = tf.expand_dims(tf.cast(correct_prediction, tf.float32), 1)
    rewards = tf.tile(reward_last_step, (1, NUM_GLIMPSES)) 

    log_likelihood = calc_likelihood(means, locs, STD_VAR)
    penalty = rewards - b_t

    del_j = tf.reduce_mean(log_likelihood * penalty)

    baseline_loss = tf.reduce_mean(tf.square((rewards - b_t)))

    loss = (-del_j) + class_loss + baseline_loss

    var_list = tf.trainable_variables()


    grads = tf.gradients(loss, var_list)
    grads, _ = tf.clip_by_global_norm(grads, 5)

    global_step = tf.get_variable(
    'global_step', [], initializer=tf.constant_initializer(0), trainable=False)

    learning_rate = tf.train.exponential_decay(
    1e-03,
    global_step,
    55000//batch_size,
    0.97,
    staircase=True)
    learning_rate = tf.maximum(learning_rate, 1e-4)

    opt = tf.train.AdamOptimizer(1e-3)
    train_op = opt.apply_gradients(zip(grads, var_list), global_step=global_step)


    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    saver = tf.train.Saver()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.33
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        merged_summary = tf.summary.merge_all()
        writer = tf.summary.FileWriter(log, sess.graph)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        # training the model for a predetermined number of epochs
        for i in range(epochs):
            batch_x, batch_lbl = sess.run(batch)
            batch_x = np.tile(batch_x, [NUM_EPISODES, 1, 1, 1])
            batch_lbl = np.tile(batch_lbl, [NUM_EPISODES, 1])
            sess.run(train_op, feed_dict={X: batch_x, y: batch_lbl})
            acc = sess.run(accuracy, feed_dict={X: batch_x, y: batch_lbl})


            s = sess.run(merged_summary, feed_dict={X: batch_x, y: batch_lbl})
            writer.add_summary(s, i)

            if (i+1) % 275 == 0:
                print('Step {}: {}'.format(i+1, acc))

            if (((i+1) % 275 == 0) and (acc > 0.45)):
                # Please change the directory here to save the model in a different location
                params = saver.save(sess, '/N/u/ramyarao/project/model/{}_{}.ckpt'.format(output, i+1))
                print('Model saved: {}'.format(params))

        coord.request_stop()
        coord.join(threads)
    return
        

if __name__ == '__main__':
    train(200, 275*100000, 'logs', 'model')








In [None]:
'''
Train function
Processes inputs in mini-batchs 
Builds the model and trains the parameters for predetermined number of times 
'''

import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import os 
# from config import *
# from model import *
# from util import *
from tensorflow.python.framework import ops
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


def test(batch_size, epochs, log, output):
    # Read arguments
    ops.reset_default_graph()
    filename_queue = tf.train.string_input_producer(['/N/u/ramyarao/project/cifar_model/cifar_gray_test.tfrecords'])
    image, label = read_and_decode(filename_queue)
    batch = tf.train.shuffle_batch([image, label], batch_size=batch_size, capacity=1500, num_threads=2, min_after_dequeue=250)
    

    # placeholders for the input and labels
    X = tf.placeholder(tf.float32, [None, IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH], name='X')
    y = tf.placeholder(tf.float32, [None, 10], name='labels')

    # Model instantiated and called for processing the inputs
    model = Model(X, batch_size)
    b_t, y_hat, means, locs, states = model()


    correct_prediction = tf.equal(tf.argmax(y_hat, 1), tf.argmax(y, 1))


    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    saver = tf.train.Saver()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.33
    with tf.Session(config=config) as sess:

        new_saver = tf.train.import_meta_graph('/N/u/ramyarao/project/model/'+output + '.meta')
        new_saver.restore(sess, '/N/u/ramyarao/project/model/'+ output)

        merged_summary = tf.summary.merge_all()
        writer = tf.summary.FileWriter(log, sess.graph)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        
        collect_acc = []
        # training the model for a predetermined number of epochs
        for i in range(epochs):
            batch_x, batch_lbl = sess.run(batch)
            batch_x = batch_x.reshape((batch_size, IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH))


            # sess.run([train1,train2, assign_op], feed_dict={X: batch_x, y: batch_lbl})
            acc = sess.run(accuracy, feed_dict={X: batch_x, y: batch_lbl})
            collect_acc.append(acc)

            print('Step {}: {}'.format(i+1, acc))

            
        print(np.mean(collect_acc))

        coord.request_stop()
        coord.join(threads)
        
        
if __name__ == '__main__':
    test(1000, 10, 'logs', 'model_27500.ckpt')
        