In [1]:
#NAVI_HARD_CODE_DOMAIN

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
import os
import pandas as pd
#Functional coding
import functools
from functools import partial
from tensorflow.python.ops import array_ops 

In [3]:
#Data Path..
Datapath="DATA/Navigation/Navigation_Data.txt"
Labelpath="DATA/Navigation/Navigation_Label.txt"
Rewardpath="DATA/Navigation/Navigation_Reward.txt"

In [4]:
#Given local path, find full path
def PathFinder(path):
    #python 2
    #script_dir = os.path.dirname('__file__')
    #fullpath = os.path.join(script_dir,path)
    #python 3
    fullpath=os.path.abspath(path)
    print(fullpath)
    return fullpath

#Read Data for Deep Learning
def ReadData(path):
    fullpath=PathFinder(path)
    return pd.read_csv(fullpath, sep=',', header=0)

In [5]:
S_A_pd = ReadData(Datapath)
SP_pd = ReadData(Labelpath)
R_pd = ReadData(Rewardpath)
S_A_matrix=S_A_pd.as_matrix()
SP_matrix=SP_pd.as_matrix()
R_matrix=R_pd.as_matrix()

/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/Navigation/Navigation_Data.txt
/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/Navigation/Navigation_Label.txt
/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/Navigation/Navigation_Reward.txt


In [6]:
default_settings = {
    "dims"          : 2,
    "min_maze_bound": tf.constant(0.0,dtype=tf.float32), 
    "max_maze_bound": tf.constant(10.0,dtype=tf.float32), 
    "min_act_bound": tf.constant(-1.0,dtype=tf.float32), 
    "max_act_bound": tf.constant(1.0,dtype=tf.float32), 
    "goal"    : tf.constant(8.0,dtype=tf.float32),
    "penalty" : tf.constant(1000000.0,dtype=tf.float32),
    "centre"  : tf.constant(5.0,dtype=tf.float32)
   }

In [7]:
class NAVI(object):
    def __init__(self, 
                 default_settings):
        self.__dict__.update(default_settings)
        self.zero = tf.constant(0,dtype=tf.float32)
        self.two = tf.constant(2.0,dtype=tf.float32)
        self.one = tf.constant(1.0,dtype=tf.float32)
        self.lessone = tf.constant(0.99,dtype=tf.float32)
    
    def MINMAZEBOUND(self, dim):
        return self.min_maze_bound
    
    def MAXMAZEBOUND(self, dim):
        return self.max_maze_bound
    
    def MINACTIONBOUND(self, dim):
        return self.min_act_bound
    
    def MAXACTIONBOUND(self, dim):
        return self.max_act_bound
    
    def GOAL(self, dim):
        return self.goal
    
    def CENTER(self, dim):
        return self.centre
    
    def PENALTY(self):
        return self.penalty
    
    def _transition(self, dim, states_packed, actions_packed):
        
        #distance to centre Manhattan
        #distance = tf.abs(previous_state-self.CENTER(dim))
        distance = tf.sqrt(tf.reduce_sum(tf.square(states_packed-tf.pack([self.CENTER(i) for i in range(self.dims)]))))
        states = tf.unpack(states_packed)
        actions = tf.unpack(actions_packed)
        
        previous_state = states[dim]
        
        #scale factor
        scalefactor = self.two/(self.one+tf.exp(-self.two*distance))-self.lessone
        
        #proposed location
        proposedLoc = previous_state + actions[dim]*scalefactor
        
        #new state
        new_state = tf.cond(tf.logical_and(proposedLoc <= self.MAXMAZEBOUND(dim), proposedLoc >= self.MINMAZEBOUND(dim)), \
                            lambda: proposedLoc, \
                            lambda: tf.cond(proposedLoc >self.MAXMAZEBOUND(dim), lambda:self.MAXMAZEBOUND(dim), lambda:self.MINMAZEBOUND(dim) ) \
                           )
        
        return new_state
    
    # For single data point
    def _vector_trans(self, state_size, states_packed, actions_packed):
        new_states=[]
        for i in range(state_size):
            new_states.append(self._transition(i,states_packed,actions_packed))
        return tf.pack(new_states)
    
    def Transition(self, states, actions):
        new_states = []
        batch_size,state_size = states.get_shape()
        states_list = tf.unpack(states)
        actions_list = tf.unpack(actions)
        for i in range(batch_size):
            new_states.append(self._vector_trans(state_size,states_list[i],actions_list[i]))
        return tf.pack(new_states)
    
    def _reward(self, state_size, states_packed, actions_packed):
        reward = self.zero
        states = tf.unpack(states_packed)
        actions = tf.unpack(actions_packed)
        
        for i in range(state_size):
            reward -= tf.abs(states[i]-self.GOAL(i))
        return tf.pack([reward])
    
    def Reward(self, states,actions):
        new_rewards = []
        batch_size,state_size = states.get_shape()
        states_list = tf.unpack(states)
        actions_list = tf.unpack(actions)
        for i in range(batch_size):
            new_rewards.append(self._reward(state_size,states_list[i],actions_list[i]))
        return tf.pack(new_rewards)

In [8]:
# States
states = tf.placeholder(tf.float32,[10, 2],name="States")

# Actions
actions = tf.placeholder(tf.float32,[10, 2],name="Actions")


In [9]:
navi_inst = NAVI(default_settings)

In [10]:
states_list=tf.unpack(states)
actions_list = tf.unpack(actions)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
feed_dict={states:S_A_matrix[:10,2:], actions:S_A_matrix[:10,:2]}
new_state = navi_inst._transition(1,states_list[0],actions_list[0])
print(sess.run([new_state], feed_dict=feed_dict))
print(sess.run([states_list[1]], feed_dict=feed_dict))
print(sess.run([states_list[1]], feed_dict=feed_dict))

[0.0]
[array([ 0.5490576,  0.       ], dtype=float32)]
[array([ 0.5490576,  0.       ], dtype=float32)]


In [11]:
new_rewards = navi_inst.Reward(states,actions)

In [12]:
feed_dict={states:S_A_matrix[:10,2:], actions:S_A_matrix[:10,:2]}
sess.run(new_rewards,feed_dict=feed_dict )

array([[-16.        ],
       [-15.45094299],
       [-15.34755898],
       [-14.92229462],
       [-13.87710953],
       [-12.88495827],
       [-11.48205948],
       [-13.36318207],
       [-13.89812565],
       [-12.66272354]], dtype=float32)

In [13]:
class NAVICell(tf.nn.rnn_cell.RNNCell):

    def __init__(self, default_settings):
        self._num_state_units = 2
        self._num_reward_units = 1
        self.navi = NAVI(default_settings)

    @property
    def state_size(self):
        return self._num_state_units

    @property
    def output_size(self):
        return self._num_reward_units

    def __call__(self, inputs, state, scope=None):
        next_state =  self.navi.Transition(state, inputs)
        reward = self.navi.Reward(state, inputs)      
        return reward, next_state

In [14]:
class ActionOptimizer(object):
    def __init__(self,
                a, # Actions
                num_step, # Number of RNN step, this is a fixed step RNN sequence, 12 for navigation
                num_act, # Number of actions
                batch_size, #Batch Size
                learning_rate=0.01): 
        self.action = tf.reshape(a,[-1,num_step,num_act]) #Reshape rewards
        print(self.action)
        self.batch_size = batch_size
        self.num_step = num_step
        self.learning_rate = learning_rate
        self._p_create_rnn_graph()
        self._p_create_loss()
        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())
    
    def _p_create_rnn_graph(self):
        cell = NAVICell(default_settings)
        initial_state = cell.zero_state(self.batch_size, dtype=tf.float32)
        print('action batch size:{0}'.format(array_ops.shape(self.action)[0]))
        print('Initial_state shape:{0}'.format(initial_state))
        rnn_outputs, state = tf.nn.dynamic_rnn(cell, self.action, dtype=tf.float32,initial_state=initial_state)
        #need output intermediate states as well
        self.outputs = rnn_outputs
        self.last_state = state
        self.pred = tf.reduce_sum(self.outputs,1)
        print("self.pred:{0}".format(self.pred))
            
    def _p_create_loss(self):

        objective = tf.reduce_mean(self.pred) 
        self.loss = -objective
        print(self.loss.get_shape())
        #self.loss = -objective
        self.optimizer = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss, var_list=[a])
        
    def Optimize(self,epoch=100):
        
        new_loss = self.sess.run([self.loss])
        print('Loss in epoch {0}: {1}'.format("Initial", new_loss)) 
        for epoch in range(epoch):
            training = self.sess.run([self.optimizer])
            self.sess.run(tf.assign(a, tf.clip_by_value(a, -1, 1)))
            if True:
                new_loss = self.sess.run([self.loss])
                print('Loss in epoch {0}: {1}'.format(epoch, new_loss))  
        minimum_costs_id=self.sess.run(tf.argmax(self.pred,0))
        print(minimum_costs_id)
        best_action = np.round(self.sess.run(self.action)[minimum_costs_id[0]],4)
        print('Optimal Action Squence:{0}'.format(best_action))
        print('The last state:{0}'.format(self.sess.run(self.last_state)[minimum_costs_id[0]]))
        print('Rewards each time step:{0}'.format(self.sess.run(self.outputs)[minimum_costs_id[0]]))
        

In [15]:
a = tf.Variable(tf.truncated_normal(shape=[240],mean=0.0, stddev=0.5),name="action")
rnn_inst = ActionOptimizer(a, 12,2,10)  

Tensor("Reshape:0", shape=(10, 12, 2), dtype=float32)
action batch size:Tensor("strided_slice:0", shape=(), dtype=int32)
Initial_state shape:Tensor("zeros:0", shape=(10, 2), dtype=float32)
self.pred:Tensor("Sum_1:0", shape=(10, 1), dtype=float32)
()


In [16]:
rnn_inst.Optimize(500)

Loss in epoch Initial: [173.38489]
Loss in epoch 0: [172.84254]
Loss in epoch 1: [172.28981]
Loss in epoch 2: [171.72278]
Loss in epoch 3: [171.10469]
Loss in epoch 4: [170.47356]
Loss in epoch 5: [169.82779]
Loss in epoch 6: [169.14932]
Loss in epoch 7: [168.4619]
Loss in epoch 8: [167.76562]
Loss in epoch 9: [167.05843]
Loss in epoch 10: [166.33661]
Loss in epoch 11: [165.60637]
Loss in epoch 12: [164.86142]
Loss in epoch 13: [164.10628]
Loss in epoch 14: [163.34077]
Loss in epoch 15: [162.56656]
Loss in epoch 16: [161.78735]
Loss in epoch 17: [161.0088]
Loss in epoch 18: [160.23308]
Loss in epoch 19: [159.45383]
Loss in epoch 20: [158.66838]
Loss in epoch 21: [157.88879]
Loss in epoch 22: [157.09119]
Loss in epoch 23: [156.24014]
Loss in epoch 24: [155.39856]
Loss in epoch 25: [154.55693]
Loss in epoch 26: [153.71457]
Loss in epoch 27: [152.88138]
Loss in epoch 28: [152.04764]
Loss in epoch 29: [151.21248]
Loss in epoch 30: [150.37592]
Loss in epoch 31: [149.53806]
Loss in epoch 32: