In [1]:
from SimpleTask import SimpleGridTask
import numpy as np, numpy.random as npr, random as r, SimpleTask
from TransportTask import TransportTask
from NavTask import NavigationTask
import tensorflow as tf
import tensorflow.contrib.slim as slim
import os
import time
from SeqData import SeqData

In [2]:
class ForwardModel():
    def __init__(self, 
                obs_space, 
                input_space,
                n_hidden=100
                ):
        self.n_hidden=n_hidden
        self.act_space=input_space-obs_space
        self.obs_space=obs_space
        #Placeholders 
        self.input = tf.placeholder("float", [None, input_space])
        self.truevalue = tf.placeholder("float", [None, obs_space])
        self.pred=self.build_graph(self.input)
        self.saver = tf.train.Saver()
        
        
    def loss_function(self,batch_size,env):
        accTotal=0
        cost=0
        for i in range(0,batch_size):
            predVecs = env.deconcatenateOneHotStateVector(self.pred[i,:])
            labelVecs = env.deconcatenateOneHotStateVector(self.truevalue[i,:])
            for pv,lv in zip(predVecs,labelVecs):
                cost += tf.nn.softmax_cross_entropy_with_logits(logits=pv, labels=lv)
                accTotal += tf.cast(tf.equal(tf.argmax(pv,axis=0), tf.argmax(lv,axis=0)), tf.float32)
        return cost,accTotal
    
    def build_graph(self,inputVec, reuse=None):
        with tf.variable_scope("forward-model", reuse=reuse):
            hidden = slim.fully_connected(inputVec, self.n_hidden, biases_initializer=None, activation_fn=tf.nn.relu)
            hidden2 = slim.fully_connected(hidden, self.n_hidden, biases_initializer=None, activation_fn=tf.nn.relu)
            return slim.fully_connected(hidden2,self.obs_space, activation_fn=None, biases_initializer=None)
        
    def predict(self, x):
        sess= tf.get_default_session()
        #x.shape = (1,n_steps, n_input)
        return sess.run([self.pred], {self.input:x})

    def load_model(self,model_file_name):
        sess= tf.get_default_session()
        self.saver.restore(sess, model_file_name)

    def train(self,trainset,testset,training_steps,batch_size,env,learning_rate,display_step, model_file_name="FWR_model_"+time.strftime("%Y%m%d-%H%M%S")):
        sess= tf.get_default_session()
        print('Entering loss func')
        cost,accTotal = self.loss_function(batch_size,env)
        print('Defining optimizer')
        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
        self.accuracy = accTotal / (batch_size * trainset.env.stateSubVectors) #tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        # Initialize the variables (i.e. assign their default value)
        print('Running TF initializer')
        init = tf.global_variables_initializer()
        sess.run(init)
        noise_sigma = 0.3
        print('Entering train loop')
        for step in range(1, training_steps + 1):
            batch_x, batch_y = trainset.next_batch_nonseq(batch_size)
            # npbx = np.array( batch_x )
            # npbxs = npbx.shape
            # noise = noise_sigma * np.random.randn( npbxs[0], npbxs[1] )
            # batch_x += noise
            sess.run(self.optimizer, feed_dict={self.input: batch_x, self.truevalue: batch_y})
            if step % display_step == 0 or step == 1:
                # Calculate batch accuracy & loss
                acc, loss = sess.run([self.accuracy, cost], feed_dict={self.input: batch_x, self.truevalue: batch_y})
                print("Step " + str(step*batch_size) + ", Minibatch Loss= " + \
                      "{:.6f}".format(loss) + ", Training Accuracy= " + \
                      "{:.5f}".format(acc))
        print("Optimization Finished!")
        # Calculate accuracy
        test_data, test_label = testset.next_batch_nonseq(5000) 
        acc=sess.run(self.accuracy, feed_dict={self.input: test_data, self.truevalue: test_label})
        print("Testing Accuracy:",acc)
        save_path= self.saver.save(sess, "./"+model_file_name+".ckpt")
        print("Model Saved")
        return acc

In [3]:
def main():
    print('Reading Data')
    s = 'navigation' #'navigation'
    trainf, validf = s+"-data-train-small.pickle", s+"-data-test-small.pickle"
    train, test   = SeqData(trainf), SeqData(validf)
    
    # classType = NavigationTask if s == 'navigation' else TransportTask
    print(train.env.stateSubVectors)
    print('Defining Model')
    # Parameters
    learning_rate = 0.0005
    training_steps = 15000 #2000 # 10000
    batch_size = 64 #256 #128
    display_step = 200
    # Network Parameters
    n_hidden = 200 #128 #5*train.lenOfInput # hidden layer num of features
    len_state = train.lenOfState # linear sequence or not
    len_input = train.lenOfInput

    print('Initializing FM')
    with tf.Graph().as_default(), tf.Session() as sess:
        fm=ForwardModel(len_state,len_input, n_hidden)
        print('FM initialized')
        fm.train(train,test,training_steps,batch_size,train.env,learning_rate,display_step,"trained_model_1")


In [9]:
main()

Reading Data
Reading navigation-data-train-small.pickle
	Built
Reading navigation-data-test-small.pickle
	Built
5
Defining Model
Initializing FM
FM initialized
Entering loss func
Defining optimizer
Running TF initializer
Entering train loop
Step 64, Minibatch Loss= 779.468323, Training Accuracy= 0.08750
Step 12800, Minibatch Loss= 221.971603, Training Accuracy= 0.91562
Step 25600, Minibatch Loss= 93.502754, Training Accuracy= 0.94687
Step 38400, Minibatch Loss= 87.324150, Training Accuracy= 0.94687
Step 51200, Minibatch Loss= 96.650108, Training Accuracy= 0.92500
Step 64000, Minibatch Loss= 81.491425, Training Accuracy= 0.93437
Step 76800, Minibatch Loss= 76.359688, Training Accuracy= 0.93750
Step 89600, Minibatch Loss= 58.662125, Training Accuracy= 0.94687
Step 102400, Minibatch Loss= 89.629333, Training Accuracy= 0.90312
Step 115200, Minibatch Loss= 63.239368, Training Accuracy= 0.94375
Step 128000, Minibatch Loss= 38.974918, Training Accuracy= 0.95625
Step 140800, Minibatch Loss= 51

In [48]:
def run(input_value):
    with tf.Graph().as_default(), tf.Session() as sess:
        fm=ForwardModel(64,74,200)
        fm.load_model('trained_model_1.ckpt')
        result = fm.build_graph(input_value, reuse=True)

        print("---------------------------")
        print("Inital State: ")
        print(np.argmax(input_value[0][0:15]))
        print(np.argmax(input_value[0][15:30]))
        print(np.argmax(input_value[0][30:34]))
        print(np.argmax(input_value[0][34:49]))
        print(np.argmax(input_value[0][49:64]))
        print("---------------------------")
        print("Action: ")
        print(np.argmax(input_value[0][64:74]))
        print("---------------------------")

        result = sess.run(result)
        #print(sess.run(result))
        print("Result: ")
        print(np.argmax(result[0][0:15]))
        print(np.argmax(result[0][15:30]))
        print(np.argmax(result[0][30:34]))
        print(np.argmax(result[0][34:49]))
        print(np.argmax(result[0][49:64]))
        return result

In [30]:
#Test cases 1
print("Testing actions and  state")
print("-----")
for i in range(0,10):
    env = NavigationTask() #(stochasticity=0.2)
    state_i=env.getStateRep()
    index = np.array([i])
    a_s = np.zeros((10))
    a_s[index] = 1
    input_value =  np.concatenate((state_i, a_s))
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    run(input_value)
    print("-----------------------------------------------------------")

Testing actions and  state
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
0
---------------------------
Result: 
0
0
0
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
1
---------------------------
Result: 
0
0
0
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
2
---------------------------
Result: 
0
0
1
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
3
------------------

In [38]:
#Test cases 2
print("Testing different inital state and actions")
print("-----")
for i in range(0,10):
    h = np.random.randint(14, size=1)[0]
    w = np.random.randint(14, size=1)[0]
    env = NavigationTask(agent_start_pos=[np.array([h,w]),'N']) #(stochasticity=0.2)
    state_i=env.getStateRep()
    index = np.array([i])
    a_s = np.zeros((10))
    a_s[index] = 1
    input_value =  np.concatenate((state_i, a_s))
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    run(input_value)
    print("-----------------------------------------------------------")

Testing different inital state and actions
-----
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
3
0
0
14
14
---------------------------
Action: 
0
---------------------------
Result: 
3
0
0
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
8
12
0
14
14
---------------------------
Action: 
1
---------------------------
Result: 
8
12
0
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
13
6
0
14
14
---------------------------
Action: 
2
---------------------------
Result: 
13
6
1
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
8
1
0
14
14
---------------------------
Act

In [50]:
#Test cases 3
print("Testing path of states")
print("-----")
env = NavigationTask() #(stochasticity=0.2)
state_i=env.getStateRep()
for i in range(0,3):
    k = np.random.randint(9, size=1)[0]
    index = np.array([k])
    a_s = np.zeros((10))
    a_s[index] = 1
    input_value =  np.concatenate((state_i, a_s))
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    state_i = run(input_value)[0]
    print("-----------------------------------------------------------")

Testing path of states
-----
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
4
---------------------------
Result: 
0
0
3
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
0
0
3
14
14
---------------------------
Action: 
4
---------------------------
Result: 
0
0
3
8
10
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
0
0
3
8
10
---------------------------
Action: 
4
---------------------------
Result: 
0
2
3
8
10
-----------------------------------------------------------


In [45]:
class ForwardModelWithNoise():
    def __init__(self, 
                obs_space, 
                input_space,
                n_hidden=100
                ):
        self.n_hidden=n_hidden
        self.act_space=input_space-obs_space
        self.obs_space=obs_space
        #Placeholders 
        self.input = tf.placeholder("float", [None, input_space])
        self.truevalue = tf.placeholder("float", [None, obs_space])
        self.pred=self.build_graph(self.input)
        self.saver = tf.train.Saver()
        
        
    def loss_function(self,batch_size,env):
        accTotal=0
        cost=0
        for i in range(0,batch_size):
            predVecs = env.deconcatenateOneHotStateVector(self.pred[i,:])
            labelVecs = env.deconcatenateOneHotStateVector(self.truevalue[i,:])
            for pv,lv in zip(predVecs,labelVecs):
                cost += tf.nn.softmax_cross_entropy_with_logits(logits=pv, labels=lv)
                accTotal += tf.cast(tf.equal(tf.argmax(pv,axis=0), tf.argmax(lv,axis=0)), tf.float32)
        return cost,accTotal
    
    def build_graph(self,inputVec, reuse=None):
        with tf.variable_scope("forward-model", reuse=reuse):
            hidden = slim.fully_connected(inputVec, self.n_hidden, biases_initializer=None, activation_fn=tf.nn.relu)
            hidden2 = slim.fully_connected(hidden, self.n_hidden, biases_initializer=None, activation_fn=tf.nn.relu)
            return slim.fully_connected(hidden2,self.obs_space, activation_fn=None, biases_initializer=None)
        
    def predict(self, x):
        sess= tf.get_default_session()
        #x.shape = (1,n_steps, n_input)
        return sess.run([self.pred], {self.input:x})

    def load_model(self,model_file_name):
        sess= tf.get_default_session()
        self.saver.restore(sess, model_file_name)

    def train(self,trainset,testset,training_steps,batch_size,env,learning_rate,display_step, model_file_name="FWR_model_"+time.strftime("%Y%m%d-%H%M%S")):
        sess= tf.get_default_session()
        print('Entering loss func')
        cost,accTotal = self.loss_function(batch_size,env)
        print('Defining optimizer')
        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
        self.accuracy = accTotal / (batch_size * trainset.env.stateSubVectors) #tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        # Initialize the variables (i.e. assign their default value)
        print('Running TF initializer')
        init = tf.global_variables_initializer()
        sess.run(init)
        noise_sigma = 0.1
        print('Entering train loop')
        for step in range(1, training_steps + 1):
            batch_x, batch_y = trainset.next_batch_nonseq(batch_size)
            npbx = np.array( batch_x )
            npbxs = npbx.shape
            noise = noise_sigma * np.random.randn( npbxs[0], npbxs[1] )
            batch_x += noise
            sess.run(self.optimizer, feed_dict={self.input: batch_x, self.truevalue: batch_y})
            if step % display_step == 0 or step == 1:
                # Calculate batch accuracy & loss
                acc, loss = sess.run([self.accuracy, cost], feed_dict={self.input: batch_x, self.truevalue: batch_y})
                print("Step " + str(step*batch_size) + ", Minibatch Loss= " + \
                      "{:.6f}".format(loss) + ", Training Accuracy= " + \
                      "{:.5f}".format(acc))
        print("Optimization Finished!")
        # Calculate accuracy
        test_data, test_label = testset.next_batch_nonseq(5000) 
        acc=sess.run(self.accuracy, feed_dict={self.input: test_data, self.truevalue: test_label})
        print("Testing Accuracy:",acc)
        save_path= self.saver.save(sess, "./"+model_file_name+".ckpt")
        print("Model Saved")
        return acc

In [51]:
def main2():
    print('Reading Data')
    s = 'navigation' #'navigation'
    trainf, validf = s+"-data-train-small.pickle", s+"-data-test-small.pickle"
    train, test   = SeqData(trainf), SeqData(validf)
    
    # classType = NavigationTask if s == 'navigation' else TransportTask
    print(train.env.stateSubVectors)
    print('Defining Model')
    # Parameters
    learning_rate = 0.0005
    training_steps = 15000 #2000 # 10000
    batch_size = 64 #256 #128
    display_step = 200
    # Network Parameters
    n_hidden = 200 #128 #5*train.lenOfInput # hidden layer num of features
    len_state = train.lenOfState # linear sequence or not
    len_input = train.lenOfInput

    print('Initializing FM')
    with tf.Graph().as_default(), tf.Session() as sess:
        fm=ForwardModelWithNoise(len_state,len_input, n_hidden)
        print('FM initialized')
        fm.train(train,test,training_steps,batch_size,train.env,learning_rate,display_step,"trained_model_2")


In [52]:
main2()

Reading Data
Reading navigation-data-train-small.pickle
	Built
Reading navigation-data-test-small.pickle
	Built
5
Defining Model
Initializing FM
FM initialized
Entering loss func
Defining optimizer
Running TF initializer
Entering train loop
Step 64, Minibatch Loss= 779.819275, Training Accuracy= 0.10000
Step 12800, Minibatch Loss= 251.343781, Training Accuracy= 0.90625
Step 25600, Minibatch Loss= 152.013504, Training Accuracy= 0.91250
Step 38400, Minibatch Loss= 115.781525, Training Accuracy= 0.91875
Step 51200, Minibatch Loss= 109.605118, Training Accuracy= 0.92188
Step 64000, Minibatch Loss= 106.370415, Training Accuracy= 0.92813
Step 76800, Minibatch Loss= 98.728577, Training Accuracy= 0.92188
Step 89600, Minibatch Loss= 104.499077, Training Accuracy= 0.91562
Step 102400, Minibatch Loss= 84.629402, Training Accuracy= 0.92813
Step 115200, Minibatch Loss= 91.076355, Training Accuracy= 0.92813
Step 128000, Minibatch Loss= 87.292824, Training Accuracy= 0.92813
Step 140800, Minibatch Los

In [62]:
def run2(input_value):
    with tf.Graph().as_default(), tf.Session() as sess:
        fm=ForwardModelWithNoise(64,74,200)
        fm.load_model('trained_model_2.ckpt')
        result = fm.build_graph(input_value, reuse=True)

        print("---------------------------")
        print("Inital State: ")
        print(np.argmax(input_value[0][0:15]))
        print(np.argmax(input_value[0][15:30]))
        print(np.argmax(input_value[0][30:34]))
        print(np.argmax(input_value[0][34:49]))
        print(np.argmax(input_value[0][49:64]))
        print("---------------------------")
        print("Action: ")
        print(np.argmax(input_value[0][64:74]))
        print("---------------------------")

        result = sess.run(result)
        #print(sess.run(result))
        print("Result: ")
        print(np.argmax(result[0][0:15]))
        print(np.argmax(result[0][15:30]))
        print(np.argmax(result[0][30:34]))
        print(np.argmax(result[0][34:49]))
        print(np.argmax(result[0][49:64]))
        return result 

In [63]:
#Test cases 1
print("Testing actions and  state")
print("-----")
for i in range(0,10):
    env = NavigationTask() #(stochasticity=0.2)
    state_i=env.getStateRep()
    index = np.array([i])
    a_s = np.zeros((10))
    a_s[index] = 1
    input_value =  np.concatenate((state_i, a_s))
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    run2(input_value)
    print("-----------------------------------------------------------")

Testing actions and  state
-----
INFO:tensorflow:Restoring parameters from trained_model_2.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
0
---------------------------
Result: 
0
2
0
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_2.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
1
---------------------------
Result: 
0
0
0
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_2.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
2
---------------------------
Result: 
0
0
1
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_2.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
3
------------

In [64]:
#Test cases 3
print("Testing path of states")
print("-----")
env = NavigationTask() #(stochasticity=0.2)
state_i=env.getStateRep()
for i in range(0,3):
    k = np.random.randint(9, size=1)[0]
    index = np.array([k])
    a_s = np.zeros((10))
    a_s[index] = 1
    input_value =  np.concatenate((state_i, a_s))
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    state_i = run2(input_value)[0]
    print("-----------------------------------------------------------")

Testing path of states
-----
INFO:tensorflow:Restoring parameters from trained_model_2.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
6
---------------------------
Result: 
0
2
0
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_2.ckpt
---------------------------
Inital State: 
0
2
0
14
14
---------------------------
Action: 
1
---------------------------
Result: 
0
3
2
9
4
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_2.ckpt
---------------------------
Inital State: 
0
3
2
9
4
---------------------------
Action: 
5
---------------------------
Result: 
4
4
3
2
7
-----------------------------------------------------------


In [56]:
def sample_gumbel(shape, eps=1e-20): 
    """Sample from Gumbel(0, 1)"""
    U = tf.random_uniform(shape,minval=0,maxval=1)
    return -tf.log(-tf.log(U + eps) + eps)

def gumbel_softmax_sample(logits, temperature): 
    """ Draw a sample from the Gumbel-Softmax distribution"""
    y = logits + sample_gumbel(tf.shape(logits))
    return tf.nn.softmax( y / temperature)

def gumbel_softmax(logits, temperature, hard=False):
    """Sample from the Gumbel-Softmax distribution and optionally discretize.
    Args:
    logits: [batch_size, n_class] unnormalized log-probs
    temperature: non-negative scalar
    hard: if True, take argmax, but differentiate w.r.t. soft sample y
    Returns:
    [batch_size, n_class] sample from the Gumbel-Softmax distribution.
    If hard=True, then the returned sample will be one-hot, otherwise it will
    be a probabilitiy distribution that sums to 1 across classes
    """
    y = gumbel_softmax_sample(logits, temperature)
    if hard:
        k = tf.shape(logits)[-1]
        #y_hard = tf.cast(tf.one_hot(tf.argmax(y,1),k), y.dtype)
        y_hard = tf.cast(tf.equal(y,tf.reduce_max(y,1,keep_dims=True)),y.dtype)
        y = tf.stop_gradient(y_hard - y) + y
    return y

In [73]:
class ForwardModelDifferentLoss():
    def __init__(self, 
                obs_space, 
                input_space,
                n_hidden=100
                ):
        self.n_hidden=n_hidden
        self.act_space=input_space-obs_space
        self.obs_space=obs_space
        #Placeholders 
        self.input = tf.placeholder("float", [None, input_space])
        self.truevalue = tf.placeholder("float", [None, obs_space])
        self.pred=self.build_graph(self.input)
        self.saver = tf.train.Saver()
        
        
    def loss_function(self,batch_size,env):
        accTotal=0
        cost=0
        for i in range(0,batch_size):
            predVecs = env.deconcatenateOneHotStateVector(self.pred[i,:])
            labelVecs = env.deconcatenateOneHotStateVector(self.truevalue[i,:])
            for pv,lv in zip(predVecs,labelVecs):
                #cost += tf.nn.softmax_cross_entropy_with_logits(logits=pv, labels=lv)
                accTotal += tf.cast(tf.equal(tf.argmax(pv,axis=0), tf.argmax(lv,axis=0)), tf.float32)
        cost =  tf.reduce_mean(tf.pow(self.pred - self.truevalue, 2))
        return cost,accTotal

    
    def build_graph(self,inputVec, reuse=None):
        with tf.variable_scope("forward-model", reuse=reuse):
            hidden = slim.fully_connected(inputVec, self.n_hidden, biases_initializer=None, activation_fn=tf.nn.relu)
            hidden2 = slim.fully_connected(hidden, self.n_hidden, biases_initializer=None, activation_fn=tf.nn.relu)
            return slim.fully_connected(hidden2,self.obs_space, activation_fn=None, biases_initializer=None)
        
    def predict(self, x):
        sess= tf.get_default_session()
        #x.shape = (1,n_steps, n_input)
        return sess.run([self.pred], {self.input:x})

    def load_model(self,model_file_name):
        sess= tf.get_default_session()
        self.saver.restore(sess, model_file_name)

    def train(self,trainset,testset,training_steps,batch_size,env,learning_rate,display_step, model_file_name="FWR_model_"+time.strftime("%Y%m%d-%H%M%S")):
        sess= tf.get_default_session()
        print('Entering loss func')
        cost,accTotal = self.loss_function(batch_size,env)
        print('Defining optimizer')
        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
        self.accuracy = accTotal / (batch_size * trainset.env.stateSubVectors) #tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        # Initialize the variables (i.e. assign their default value)
        print('Running TF initializer')
        init = tf.global_variables_initializer()
        sess.run(init)
        noise_sigma = 0.2
        print('Entering train loop')
        for step in range(1, training_steps + 1):
            batch_x, batch_y = trainset.next_batch_nonseq(batch_size)
            npbx = np.array( batch_x )
            npbxs = npbx.shape
            noise = noise_sigma * np.random.randn( npbxs[0], npbxs[1] )
            batch_x += noise
            sess.run(self.optimizer, feed_dict={self.input: batch_x, self.truevalue: batch_y})
            if step % display_step == 0 or step == 1:
                # Calculate batch accuracy & loss
                acc, loss = sess.run([self.accuracy, cost], feed_dict={self.input: batch_x, self.truevalue: batch_y})
                print("Step " + str(step*batch_size) + ", Minibatch Loss= " + \
                      "{:.6f}".format(loss) + ", Training Accuracy= " + \
                      "{:.5f}".format(acc))
        print("Optimization Finished!")
        # Calculate accuracy
        test_data, test_label = testset.next_batch_nonseq(5000) 
        acc=sess.run(self.accuracy, feed_dict={self.input: test_data, self.truevalue: test_label})
        print("Testing Accuracy:",acc)
        save_path= self.saver.save(sess, "./"+model_file_name+".ckpt")
        print("Model Saved")
        return acc

In [74]:
def main3():
    print('Reading Data')
    s = 'navigation' #'navigation'
    trainf, validf = s+"-data-train-small.pickle", s+"-data-test-small.pickle"
    train, test   = SeqData(trainf), SeqData(validf)
    
    # classType = NavigationTask if s == 'navigation' else TransportTask
    print(train.env.stateSubVectors)
    print('Defining Model')
    # Parameters
    learning_rate = 0.0005
    training_steps = 15000 #2000 # 10000
    batch_size = 64 #256 #128
    display_step = 200
    # Network Parameters
    n_hidden = 200 #128 #5*train.lenOfInput # hidden layer num of features
    len_state = train.lenOfState # linear sequence or not
    len_input = train.lenOfInput

    print('Initializing FM')
    with tf.Graph().as_default(), tf.Session() as sess:
        fm=ForwardModelDifferentLoss(len_state,len_input, n_hidden)
        print('FM initialized')
        fm.train(train,test,training_steps,batch_size,train.env,learning_rate,display_step,"trained_model_3_noise_0_2")

In [75]:
main3()

Reading Data
Reading navigation-data-train-small.pickle
	Built
Reading navigation-data-test-small.pickle
	Built
5
Defining Model
Initializing FM
FM initialized
Entering loss func
Defining optimizer
Running TF initializer
Entering train loop
Step 64, Minibatch Loss= 0.100181, Training Accuracy= 0.15000
Step 12800, Minibatch Loss= 0.042860, Training Accuracy= 0.83438
Step 25600, Minibatch Loss= 0.035182, Training Accuracy= 0.86875
Step 38400, Minibatch Loss= 0.032166, Training Accuracy= 0.90000
Step 51200, Minibatch Loss= 0.030690, Training Accuracy= 0.91562
Step 64000, Minibatch Loss= 0.030267, Training Accuracy= 0.90625
Step 76800, Minibatch Loss= 0.028286, Training Accuracy= 0.90000
Step 89600, Minibatch Loss= 0.026643, Training Accuracy= 0.91562
Step 102400, Minibatch Loss= 0.027317, Training Accuracy= 0.87500
Step 115200, Minibatch Loss= 0.024159, Training Accuracy= 0.90625
Step 128000, Minibatch Loss= 0.021692, Training Accuracy= 0.92813
Step 140800, Minibatch Loss= 0.022776, Train

In [76]:
def run3(input_value):
    with tf.Graph().as_default(), tf.Session() as sess:
        fm=ForwardModelDifferentLoss(64,74,200)
        fm.load_model('trained_model_3_noise_0_2.ckpt')
        result = fm.build_graph(input_value, reuse=True)

        print("---------------------------")
        print("Inital State: ")
        print(np.argmax(input_value[0][0:15]))
        print(np.argmax(input_value[0][15:30]))
        print(np.argmax(input_value[0][30:34]))
        print(np.argmax(input_value[0][34:49]))
        print(np.argmax(input_value[0][49:64]))
        print("---------------------------")
        print("Action: ")
        print(np.argmax(input_value[0][64:74]))
        print("---------------------------")

        result = sess.run(result)
        #print(sess.run(result))
        print("Result: ")
        print(np.argmax(result[0][0:15]))
        print(np.argmax(result[0][15:30]))
        print(np.argmax(result[0][30:34]))
        print(np.argmax(result[0][34:49]))
        print(np.argmax(result[0][49:64]))
        return result 

In [158]:
#Test cases 3
print("Testing path of states")
print("-----")
env = NavigationTask() #(stochasticity=0.2)
state_i=env.getStateRep()
for i in range(0,3):
    k = np.random.randint(9, size=1)[0]
    index = np.array([k])
    a_s = np.zeros((10))
    a_s[index] = 1
    input_value =  np.concatenate((state_i, a_s))
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    state_i = run3(input_value)[0]
    print(state_i)
    print("-----------------------------------------------------------")

Testing path of states
-----
INFO:tensorflow:Restoring parameters from trained_model_3_noise_0_2.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
5
---------------------------
Result: 
0
2
0
14
14
[  9.70046341e-01   3.75662595e-02   1.64687522e-02  -2.36040354e-03
  -4.45783958e-02  -2.03759503e-02  -8.03286582e-02  -3.49288210e-02
   2.82561965e-03   9.49040055e-03   2.84422971e-02   2.87823323e-02
  -5.04236668e-04   1.06691774e-02   6.50222600e-03   1.01937644e-01
   9.79488045e-02   1.52118102e-01   1.34145349e-01   1.27294734e-01
   1.44979551e-01   3.39143686e-02   5.47162592e-02   4.74073552e-02
  -5.53238392e-03   9.29718837e-04   4.66256402e-04   4.01410684e-02
   1.81781910e-02   2.23833099e-02   8.90936494e-01   3.90470922e-02
  -3.08213755e-04  -1.15062520e-02  -4.16978728e-03   7.63580576e-03
  -1.70690119e-02  -3.42252757e-03  -6.95768185e-03  -1.45979971e-03
  -1.27382278e-02   2.92120129e-03  -8.79930705e-03   1.13955215e

In [10]:
class ForwardModelSoftmax():
    def __init__(self, 
                obs_space, 
                input_space,
                 env,
                 batch_size,
                n_hidden=100
                ):
        self.n_hidden=n_hidden
        self.act_space=input_space-obs_space
        self.obs_space=obs_space
        #Placeholders 
        self.input = tf.placeholder("float", [None, input_space])
        self.truevalue = tf.placeholder("float", [None, obs_space])
        self.pred=self.build_graph(self.input, batch_size, env)
        self.saver = tf.train.Saver()
        self.env = env
        
        
    def loss_function(self,batch_size,env):
        accTotal=0
        cost=0
        for i in range(0,batch_size):
            predVecs = env.deconcatenateOneHotStateVector(self.pred[i,:])
            labelVecs = env.deconcatenateOneHotStateVector(self.truevalue[i,:])
            for pv,lv in zip(predVecs,labelVecs):
                #cost += tf.nn.softmax_cross_entropy_with_logits(logits=pv, labels=lv)
                accTotal += tf.cast(tf.equal(tf.argmax(pv,axis=0), tf.argmax(lv,axis=0)), tf.float32)
        cost =  tf.reduce_mean(tf.pow(self.pred - self.truevalue, 2))
        return cost,accTotal

    
    def build_graph(self,inputVec, batch_size, env, reuse=None):
        print('Building Graph')
        with tf.variable_scope("forward-model", reuse=reuse):
            hidden = slim.fully_connected(inputVec, self.n_hidden, biases_initializer=None, activation_fn=tf.nn.relu)
            hidden2 = slim.fully_connected(hidden, self.n_hidden, biases_initializer=None, activation_fn=tf.nn.relu)
            output = slim.fully_connected(hidden2,self.obs_space, activation_fn=None, biases_initializer=None)
            array = []
            target = tf.zeros([batch_size,self.obs_space])
            print('Slicing')
            v1 = tf.nn.softmax(output[:,0:15])
            v2 = tf.nn.softmax(output[:,15:30])
            v3 = tf.nn.softmax(output[:,30:34])
            v4 = tf.nn.softmax(output[:,34:49])
            v5 = tf.nn.softmax(output[:,49:64])
            print('Sliced')
#             for i in range(0,batch_size):
#                 predVecs = env.deconcatenateOneHotStateVector(output[i,:])
#                 for pv in predVecs:
#                     array.append(tf.nn.softmax(pv))
#                 print(array[i])
#                 output= output[-1 , 0:15].assign(array[i])
                
            return tf.concat([v1,v2,v3,v4,v5], 1)
        
    def predict(self, x):
        sess= tf.get_default_session()
        #x.shape = (1,n_steps, n_input)
        return sess.run([self.pred], {self.input:x})

    def load_model(self,model_file_name):
        sess= tf.get_default_session()
        self.saver.restore(sess, model_file_name)

    def train(self,trainset,testset,training_steps,batch_size,env,learning_rate,display_step, model_file_name="FWR_model_"+time.strftime("%Y%m%d-%H%M%S")):
        sess= tf.get_default_session()
        print('Entering loss func')
        cost,accTotal = self.loss_function(batch_size,env)
        print('Defining optimizer')
        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
        self.accuracy = accTotal / (batch_size * trainset.env.stateSubVectors) #tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        # Initialize the variables (i.e. assign their default value)
        print('Running TF initializer')
        init = tf.global_variables_initializer()
        sess.run(init)
        noise_sigma = 0.2
        print('Entering train loop')
        for step in range(1, training_steps + 1):
            batch_x, batch_y = trainset.next_batch_nonseq(batch_size)
            npbx = np.array( batch_x )
            npbxs = npbx.shape
            noise = noise_sigma * np.random.randn( npbxs[0], npbxs[1] )
            batch_x += noise
            sess.run(self.optimizer, feed_dict={self.input: batch_x, self.truevalue: batch_y})
            if step % display_step == 0 or step == 1:
                # Calculate batch accuracy & loss
                acc, loss = sess.run([self.accuracy, cost], feed_dict={self.input: batch_x, self.truevalue: batch_y})
                print("Step " + str(step*batch_size) + ", Minibatch Loss= " + \
                      "{:.6f}".format(loss) + ", Training Accuracy= " + \
                      "{:.5f}".format(acc))
        print("Optimization Finished!")
        # Calculate accuracy
        test_data, test_label = testset.next_batch_nonseq(5000) 
        acc=sess.run(self.accuracy, feed_dict={self.input: test_data, self.truevalue: test_label})
        print("Testing Accuracy:",acc)
        save_path= self.saver.save(sess, "./"+model_file_name+".ckpt")
        print("Model Saved")
        return acc

In [11]:
def main4():
    print('Reading Data')
    s = 'navigation' #'navigation'
    trainf, validf = s+"-data-train-small.pickle", s+"-data-test-small.pickle"
    train, test   = SeqData(trainf), SeqData(validf)
    
    # classType = NavigationTask if s == 'navigation' else TransportTask
    print(train.env.stateSubVectors)
    print('Defining Model')
    # Parameters
    learning_rate = 0.0005
    training_steps = 15000 #2000 # 10000
    batch_size = 64 #256 #128
    display_step = 200
    # Network Parameters
    n_hidden = 200 #128 #5*train.lenOfInput # hidden layer num of features
    len_state = train.lenOfState # linear sequence or not
    len_input = train.lenOfInput
    env = NavigationTask()
    print('Initializing FM')
    with tf.Graph().as_default(), tf.Session() as sess:
        print('Entering with block')
        fm=ForwardModelSoftmax(len_state,len_input, env, batch_size,n_hidden)
        print('FM initialized')
        fm.train(train,test,training_steps,batch_size,train.env,learning_rate,display_step,"trained_model_4_noise_0_2")

In [13]:
main4()

Reading Data
Reading navigation-data-train-small.pickle
	Built
Reading navigation-data-test-small.pickle
	Built
5
Defining Model
Initializing FM
Entering with block
Building Graph
Slicing
Sliced
FM initialized
Entering loss func
Defining optimizer
Running TF initializer
Entering train loop
Step 64, Minibatch Loss= 0.069836, Training Accuracy= 0.14688
Step 12800, Minibatch Loss= 0.043710, Training Accuracy= 0.66563
Step 25600, Minibatch Loss= 0.023637, Training Accuracy= 0.83438
Step 38400, Minibatch Loss= 0.015723, Training Accuracy= 0.89688
Step 51200, Minibatch Loss= 0.016640, Training Accuracy= 0.88438
Step 64000, Minibatch Loss= 0.012688, Training Accuracy= 0.90625
Step 76800, Minibatch Loss= 0.011508, Training Accuracy= 0.91563
Step 89600, Minibatch Loss= 0.012687, Training Accuracy= 0.90938
Step 102400, Minibatch Loss= 0.011830, Training Accuracy= 0.91250
Step 115200, Minibatch Loss= 0.010728, Training Accuracy= 0.92188
Step 128000, Minibatch Loss= 0.010870, Training Accuracy= 0.

In [None]:
env = NavigationTask() 
state_i = env.getStateRep()
predVecs = env.deconcatenateOneHotStateVector(state_i)
target = np.zeros(len(state_i))
index = 0
for pv in predVecs: 
    target[index:index+len(pv)] = pv
    index = index+len(pv)
# print(len(target))
print(target)