In [1]:
from SimpleTask import SimpleGridTask
import numpy as np, numpy.random as npr, random as r, SimpleTask
from TransportTask import TransportTask
from NavTask import NavigationTask
import tensorflow as tf
import tensorflow.contrib.slim as slim
import os
import time
from SeqData import SeqData

In [2]:
class ForwardModel():
    def __init__(self, 
                obs_space, 
                input_space,
                n_hidden=100
                ):
        self.n_hidden=n_hidden
        self.act_space=input_space-obs_space
        self.obs_space=obs_space
        #Placeholders 
        self.input = tf.placeholder("float", [None, input_space])
        self.truevalue = tf.placeholder("float", [None, obs_space])
        self.pred=self.build_graph(self.input)
        self.saver = tf.train.Saver()
        
        
    def loss_function(self,batch_size,env):
        accTotal=0
        cost=0
        for i in range(0,batch_size):
            predVecs = env.deconcatenateOneHotStateVector(self.pred[i,:])
            labelVecs = env.deconcatenateOneHotStateVector(self.truevalue[i,:])
            for pv,lv in zip(predVecs,labelVecs):
                cost += tf.nn.softmax_cross_entropy_with_logits(logits=pv, labels=lv)
                accTotal += tf.cast(tf.equal(tf.argmax(pv,axis=0), tf.argmax(lv,axis=0)), tf.float32)
        return cost,accTotal
    
    def build_graph(self,inputVec, reuse=None):
        with tf.variable_scope("forward-model", reuse=reuse):
            hidden = slim.fully_connected(inputVec, self.n_hidden, biases_initializer=None, activation_fn=tf.nn.relu)
            hidden2 = slim.fully_connected(hidden, self.n_hidden, biases_initializer=None, activation_fn=tf.nn.relu)
            return slim.fully_connected(hidden2,self.obs_space, activation_fn=None, biases_initializer=None)
        
    def predict(self, x):
        sess= tf.get_default_session()
        #x.shape = (1,n_steps, n_input)
        return sess.run([self.pred], {self.input:x})

    def load_model(self,model_file_name):
        sess= tf.get_default_session()
        self.saver.restore(sess, model_file_name)

    def train(self,trainset,testset,training_steps,batch_size,env,learning_rate,display_step, model_file_name="FWR_model_"+time.strftime("%Y%m%d-%H%M%S")):
        sess= tf.get_default_session()
        print('Entering loss func')
        cost,accTotal = self.loss_function(batch_size,env)
        print('Defining optimizer')
        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
        self.accuracy = accTotal / (batch_size * trainset.env.stateSubVectors) #tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        # Initialize the variables (i.e. assign their default value)
        print('Running TF initializer')
        init = tf.global_variables_initializer()
        sess.run(init)
        noise_sigma = 0.3
        print('Entering train loop')
        for step in range(1, training_steps + 1):
            batch_x, batch_y = trainset.next_batch_nonseq(batch_size)
            # npbx = np.array( batch_x )
            # npbxs = npbx.shape
            # noise = noise_sigma * np.random.randn( npbxs[0], npbxs[1] )
            # batch_x += noise
            sess.run(self.optimizer, feed_dict={self.input: batch_x, self.truevalue: batch_y})
            if step % display_step == 0 or step == 1:
                # Calculate batch accuracy & loss
                acc, loss = sess.run([self.accuracy, cost], feed_dict={self.input: batch_x, self.truevalue: batch_y})
                print("Step " + str(step*batch_size) + ", Minibatch Loss= " + \
                      "{:.6f}".format(loss) + ", Training Accuracy= " + \
                      "{:.5f}".format(acc))
        print("Optimization Finished!")
        # Calculate accuracy
        test_data, test_label = testset.next_batch_nonseq(5000) 
        acc=sess.run(self.accuracy, feed_dict={self.input: test_data, self.truevalue: test_label})
        print("Testing Accuracy:",acc)
        save_path= self.saver.save(sess, "./"+model_file_name+".ckpt")
        print("Model Saved")
        return acc

In [8]:
def main():
    print('Reading Data')
    s = 'navigation' #'navigation'
    trainf, validf = s+"-data-train-small.pickle", s+"-data-test-small.pickle"
    train, test   = SeqData(trainf), SeqData(validf)
    
    # classType = NavigationTask if s == 'navigation' else TransportTask
    print(train.env.stateSubVectors)
    print('Defining Model')
    # Parameters
    learning_rate = 0.0005
    training_steps = 15000 #2000 # 10000
    batch_size = 64 #256 #128
    display_step = 200
    # Network Parameters
    n_hidden = 200 #128 #5*train.lenOfInput # hidden layer num of features
    len_state = train.lenOfState # linear sequence or not
    len_input = train.lenOfInput

    print('Initializing FM')
    with tf.Graph().as_default(), tf.Session() as sess:
        fm=ForwardModel(len_state,len_input, n_hidden)
        print('FM initialized')
        fm.train(train,test,training_steps,batch_size,train.env,learning_rate,display_step,"trained_model_1")


In [9]:
main()

Reading Data
Reading navigation-data-train-small.pickle
	Built
Reading navigation-data-test-small.pickle
	Built
5
Defining Model
Initializing FM
FM initialized
Entering loss func
Defining optimizer
Running TF initializer
Entering train loop
Step 64, Minibatch Loss= 779.468323, Training Accuracy= 0.08750
Step 12800, Minibatch Loss= 221.971603, Training Accuracy= 0.91562
Step 25600, Minibatch Loss= 93.502754, Training Accuracy= 0.94687
Step 38400, Minibatch Loss= 87.324150, Training Accuracy= 0.94687
Step 51200, Minibatch Loss= 96.650108, Training Accuracy= 0.92500
Step 64000, Minibatch Loss= 81.491425, Training Accuracy= 0.93437
Step 76800, Minibatch Loss= 76.359688, Training Accuracy= 0.93750
Step 89600, Minibatch Loss= 58.662125, Training Accuracy= 0.94687
Step 102400, Minibatch Loss= 89.629333, Training Accuracy= 0.90312
Step 115200, Minibatch Loss= 63.239368, Training Accuracy= 0.94375
Step 128000, Minibatch Loss= 38.974918, Training Accuracy= 0.95625
Step 140800, Minibatch Loss= 51

In [16]:
def run(input_value):
    with tf.Graph().as_default(), tf.Session() as sess:
        fm=ForwardModel(64,74,200)
        fm.load_model('trained_model_1.ckpt')
        result = fm.build_graph(input_value, reuse=True)

        print("---------------------------")
        print("Inital State: ")
        print(np.argmax(input_value[0][0:15]))
        print(np.argmax(input_value[0][15:30]))
        print(np.argmax(input_value[0][30:34]))
        print(np.argmax(input_value[0][34:49]))
        print(np.argmax(input_value[0][49:64]))
        print("---------------------------")
        print("Action: ")
        print(np.argmax(input_value[0][64:74]))
        print("---------------------------")

        result = sess.run(result)
        #print(sess.run(result))
        print("Result: ")
        print(np.argmax(result[0][0:15]))
        print(np.argmax(result[0][15:30]))
        print(np.argmax(result[0][30:34]))
        print(np.argmax(result[0][34:49]))
        print(np.argmax(result[0][49:64]))

In [30]:
#Test cases 1
print("Testing actions and  state")
print("-----")
for i in range(0,10):
    env = NavigationTask() #(stochasticity=0.2)
    state_i=env.getStateRep()
    index = np.array([i])
    a_s = np.zeros((10))
    a_s[index] = 1
    input_value =  np.concatenate((state_i, a_s))
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    run(input_value)
    print("-----------------------------------------------------------")

Testing actions and  state
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
0
---------------------------
Result: 
0
0
0
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
1
---------------------------
Result: 
0
0
0
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
2
---------------------------
Result: 
0
0
1
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
3
------------------

In [38]:
#Test cases 2
print("Testing different inital state and actions")
print("-----")
for i in range(0,10):
    h = np.random.randint(14, size=1)[0]
    w = np.random.randint(14, size=1)[0]
    env = NavigationTask(agent_start_pos=[np.array([h,w]),'N']) #(stochasticity=0.2)
    state_i=env.getStateRep()
    index = np.array([i])
    a_s = np.zeros((10))
    a_s[index] = 1
    input_value =  np.concatenate((state_i, a_s))
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    run(input_value)
    print("-----------------------------------------------------------")

Testing different inital state and actions
-----
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
3
0
0
14
14
---------------------------
Action: 
0
---------------------------
Result: 
3
0
0
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
8
12
0
14
14
---------------------------
Action: 
1
---------------------------
Result: 
8
12
0
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
13
6
0
14
14
---------------------------
Action: 
2
---------------------------
Result: 
13
6
1
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_1.ckpt
---------------------------
Inital State: 
8
1
0
14
14
---------------------------
Act

In [39]:
class ForwardModelWithNoise():
    def __init__(self, 
                obs_space, 
                input_space,
                n_hidden=100
                ):
        self.n_hidden=n_hidden
        self.act_space=input_space-obs_space
        self.obs_space=obs_space
        #Placeholders 
        self.input = tf.placeholder("float", [None, input_space])
        self.truevalue = tf.placeholder("float", [None, obs_space])
        self.pred=self.build_graph(self.input)
        self.saver = tf.train.Saver()
        
        
    def loss_function(self,batch_size,env):
        accTotal=0
        cost=0
        for i in range(0,batch_size):
            predVecs = env.deconcatenateOneHotStateVector(self.pred[i,:])
            labelVecs = env.deconcatenateOneHotStateVector(self.truevalue[i,:])
            for pv,lv in zip(predVecs,labelVecs):
                cost += tf.nn.softmax_cross_entropy_with_logits(logits=pv, labels=lv)
                accTotal += tf.cast(tf.equal(tf.argmax(pv,axis=0), tf.argmax(lv,axis=0)), tf.float32)
        return cost,accTotal
    
    def build_graph(self,inputVec, reuse=None):
        with tf.variable_scope("forward-model", reuse=reuse):
            hidden = slim.fully_connected(inputVec, self.n_hidden, biases_initializer=None, activation_fn=tf.nn.relu)
            hidden2 = slim.fully_connected(hidden, self.n_hidden, biases_initializer=None, activation_fn=tf.nn.relu)
            return slim.fully_connected(hidden2,self.obs_space, activation_fn=None, biases_initializer=None)
        
    def predict(self, x):
        sess= tf.get_default_session()
        #x.shape = (1,n_steps, n_input)
        return sess.run([self.pred], {self.input:x})

    def load_model(self,model_file_name):
        sess= tf.get_default_session()
        self.saver.restore(sess, model_file_name)

    def train(self,trainset,testset,training_steps,batch_size,env,learning_rate,display_step, model_file_name="FWR_model_"+time.strftime("%Y%m%d-%H%M%S")):
        sess= tf.get_default_session()
        print('Entering loss func')
        cost,accTotal = self.loss_function(batch_size,env)
        print('Defining optimizer')
        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
        self.accuracy = accTotal / (batch_size * trainset.env.stateSubVectors) #tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        # Initialize the variables (i.e. assign their default value)
        print('Running TF initializer')
        init = tf.global_variables_initializer()
        sess.run(init)
        noise_sigma = 0.3
        print('Entering train loop')
        for step in range(1, training_steps + 1):
            batch_x, batch_y = trainset.next_batch_nonseq(batch_size)
            npbx = np.array( batch_x )
            npbxs = npbx.shape
            noise = noise_sigma * np.random.randn( npbxs[0], npbxs[1] )
            batch_x += noise
            sess.run(self.optimizer, feed_dict={self.input: batch_x, self.truevalue: batch_y})
            if step % display_step == 0 or step == 1:
                # Calculate batch accuracy & loss
                acc, loss = sess.run([self.accuracy, cost], feed_dict={self.input: batch_x, self.truevalue: batch_y})
                print("Step " + str(step*batch_size) + ", Minibatch Loss= " + \
                      "{:.6f}".format(loss) + ", Training Accuracy= " + \
                      "{:.5f}".format(acc))
        print("Optimization Finished!")
        # Calculate accuracy
        test_data, test_label = testset.next_batch_nonseq(5000) 
        acc=sess.run(self.accuracy, feed_dict={self.input: test_data, self.truevalue: test_label})
        print("Testing Accuracy:",acc)
        save_path= self.saver.save(sess, "./"+model_file_name+".ckpt")
        print("Model Saved")
        return acc

In [40]:
def main2():
    print('Reading Data')
    s = 'navigation' #'navigation'
    trainf, validf = s+"-data-train-small.pickle", s+"-data-test-small.pickle"
    train, test   = SeqData(trainf), SeqData(validf)
    
    # classType = NavigationTask if s == 'navigation' else TransportTask
    print(train.env.stateSubVectors)
    print('Defining Model')
    # Parameters
    learning_rate = 0.0005
    training_steps = 15000 #2000 # 10000
    batch_size = 64 #256 #128
    display_step = 200
    # Network Parameters
    n_hidden = 200 #128 #5*train.lenOfInput # hidden layer num of features
    len_state = train.lenOfState # linear sequence or not
    len_input = train.lenOfInput

    print('Initializing FM')
    with tf.Graph().as_default(), tf.Session() as sess:
        fm=ForwardModelWithNoise(len_state,len_input, n_hidden)
        print('FM initialized')
        fm.train(train,test,training_steps,batch_size,train.env,learning_rate,display_step,"trained_model_2")


In [41]:
main2()

Reading Data
Reading navigation-data-train-small.pickle
	Built
Reading navigation-data-test-small.pickle
	Built
5
Defining Model
Initializing FM
FM initialized
Entering loss func
Defining optimizer
Running TF initializer
Entering train loop
Step 64, Minibatch Loss= 780.052185, Training Accuracy= 0.12812
Step 12800, Minibatch Loss= 481.009521, Training Accuracy= 0.62187
Step 25600, Minibatch Loss= 320.089294, Training Accuracy= 0.77188
Step 38400, Minibatch Loss= 289.764648, Training Accuracy= 0.78125
Step 51200, Minibatch Loss= 253.788574, Training Accuracy= 0.79688
Step 64000, Minibatch Loss= 280.149353, Training Accuracy= 0.73125
Step 76800, Minibatch Loss= 242.335434, Training Accuracy= 0.80313
Step 89600, Minibatch Loss= 265.590118, Training Accuracy= 0.76562
Step 102400, Minibatch Loss= 228.921127, Training Accuracy= 0.80313
Step 115200, Minibatch Loss= 257.519257, Training Accuracy= 0.79062
Step 128000, Minibatch Loss= 219.384048, Training Accuracy= 0.81563
Step 140800, Minibatch

In [42]:
def run2(input_value):
    with tf.Graph().as_default(), tf.Session() as sess:
        fm=ForwardModel(64,74,200)
        fm.load_model('trained_model_2.ckpt')
        result = fm.build_graph(input_value, reuse=True)

        print("---------------------------")
        print("Inital State: ")
        print(np.argmax(input_value[0][0:15]))
        print(np.argmax(input_value[0][15:30]))
        print(np.argmax(input_value[0][30:34]))
        print(np.argmax(input_value[0][34:49]))
        print(np.argmax(input_value[0][49:64]))
        print("---------------------------")
        print("Action: ")
        print(np.argmax(input_value[0][64:74]))
        print("---------------------------")

        result = sess.run(result)
        #print(sess.run(result))
        print("Result: ")
        print(np.argmax(result[0][0:15]))
        print(np.argmax(result[0][15:30]))
        print(np.argmax(result[0][30:34]))
        print(np.argmax(result[0][34:49]))
        print(np.argmax(result[0][49:64]))

In [44]:
#Test cases 1
print("Testing actions and  state")
print("-----")
for i in range(0,10):
    env = NavigationTask() #(stochasticity=0.2)
    state_i=env.getStateRep()
    index = np.array([i])
    a_s = np.zeros((10))
    a_s[index] = 1
    input_value =  np.concatenate((state_i, a_s))
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    input_value = np.reshape(input_value, [1,74]).astype(np.float32)
    run2(input_value)
    print("-----------------------------------------------------------")

Testing actions and  state
-----
INFO:tensorflow:Restoring parameters from trained_model_2.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
0
---------------------------
Result: 
0
0
0
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_2.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
1
---------------------------
Result: 
0
0
0
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_2.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
2
---------------------------
Result: 
0
0
1
14
14
-----------------------------------------------------------
INFO:tensorflow:Restoring parameters from trained_model_2.ckpt
---------------------------
Inital State: 
0
0
0
14
14
---------------------------
Action: 
3
------------