# NAVI_HARD_CODE_DOMAIN

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
import os
import pandas as pd
#Functional coding
import functools
from functools import partial
from tensorflow.python.ops import array_ops 

In [3]:
#Data Path..
Datapath="DATA/Navigation/Navigation_Data.txt"
Labelpath="DATA/Navigation/Navigation_Label.txt"
Rewardpath="DATA/Navigation/Navigation_Reward.txt"

In [4]:
#Given local path, find full path
def PathFinder(path):
    #python 2
    #script_dir = os.path.dirname('__file__')
    #fullpath = os.path.join(script_dir,path)
    #python 3
    fullpath=os.path.abspath(path)
    print(fullpath)
    return fullpath

#Read Data for Deep Learning
def ReadData(path):
    fullpath=PathFinder(path)
    return pd.read_csv(fullpath, sep=',', header=0)

In [5]:
S_A_pd = ReadData(Datapath)
SP_pd = ReadData(Labelpath)
R_pd = ReadData(Rewardpath)
S_A_matrix=S_A_pd.as_matrix()
SP_matrix=SP_pd.as_matrix()
R_matrix=R_pd.as_matrix()

/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/Navigation/Navigation_Data.txt
/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/Navigation/Navigation_Label.txt
/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/Navigation/Navigation_Reward.txt


In [6]:
default_settings = {
    "dims"          : 2,
    "min_maze_bound": tf.constant(0.0,dtype=tf.float32), 
    "max_maze_bound": tf.constant(10.0,dtype=tf.float32), 
    "min_act_bound": tf.constant(-0.2,dtype=tf.float32), 
    "max_act_bound": tf.constant(0.2,dtype=tf.float32), 
    "goal"    : tf.constant(8.0,dtype=tf.float32),
    "penalty" : tf.constant(1000000.0,dtype=tf.float32),
    "centre"  : tf.constant(5.0,dtype=tf.float32)
   }

In [7]:
class NAVI(object):
    def __init__(self, 
                 default_settings):
        self.__dict__.update(default_settings)
        self.zero = tf.constant(0,dtype=tf.float32)
        self.two = tf.constant(2.0,dtype=tf.float32)
        self.one = tf.constant(1.0,dtype=tf.float32)
        self.lessone = tf.constant(0.99,dtype=tf.float32)
    
    def MINMAZEBOUND(self, dim):
        return self.min_maze_bound
    
    def MAXMAZEBOUND(self, dim):
        return self.max_maze_bound
    
    def MINACTIONBOUND(self, dim):
        return self.min_act_bound
    
    def MAXACTIONBOUND(self, dim):
        return self.max_act_bound
    
    def GOAL(self, dim):
        return self.goal
    
    def CENTER(self, dim):
        return self.centre
    
    def PENALTY(self):
        return self.penalty
    
    def _transition(self, dim, states_packed, actions_packed):
        
        #distance to centre Manhattan
        #distance = tf.abs(previous_state-self.CENTER(dim))
        distance = tf.sqrt(tf.reduce_sum(tf.square(states_packed-tf.pack([self.CENTER(i) for i in range(self.dims)]))))
        states = tf.unpack(states_packed)
        actions = tf.unpack(actions_packed)
        
        previous_state = states[dim]
        
        #scale factor
        scalefactor = self.two/(self.one+tf.exp(-self.two*distance))-self.lessone
        
        #proposed location
        proposedLoc = previous_state + actions[dim]*scalefactor
        
        #new state
        new_state = tf.cond(tf.logical_and(proposedLoc <= self.MAXMAZEBOUND(dim), proposedLoc >= self.MINMAZEBOUND(dim)), \
                            lambda: proposedLoc, \
                            lambda: tf.cond(proposedLoc >self.MAXMAZEBOUND(dim), lambda:self.MAXMAZEBOUND(dim), lambda:self.MINMAZEBOUND(dim) ) \
                           )
        
        return new_state
    
    # For single data point
    def _vector_trans(self, state_size, states_packed, actions_packed):
        new_states=[]
        for i in range(state_size):
            new_states.append(self._transition(i,states_packed,actions_packed))
        return tf.pack(new_states)
    
    def Transition(self, states, actions):
        new_states = []
        batch_size,state_size = states.get_shape()
        states_list = tf.unpack(states)
        actions_list = tf.unpack(actions)
        for i in range(batch_size):
            new_states.append(self._vector_trans(state_size,states_list[i],actions_list[i]))
        return tf.pack(new_states)
    
    def _reward(self, state_size, states_packed, actions_packed):
        reward = self.zero
        states = tf.unpack(states_packed)
        actions = tf.unpack(actions_packed)
        
        for i in range(state_size):
            reward -= tf.abs(states[i]-self.GOAL(i))
        return tf.pack([reward])
    
    def Reward(self, states,actions):
        new_rewards = []
        batch_size,state_size = states.get_shape()
        states_list = tf.unpack(states)
        actions_list = tf.unpack(actions)
        for i in range(batch_size):
            new_rewards.append(self._reward(state_size,states_list[i],actions_list[i]))
        return tf.pack(new_rewards)

In [8]:
# States
states = tf.placeholder(tf.float32,[1, 2],name="States")

# Actions
actions = tf.placeholder(tf.float32,[1, 2],name="Actions")


In [9]:
navi_inst = NAVI(default_settings)
next_state = navi_inst.Transition(states,actions)
reward = navi_inst.Reward(states,actions)
feed_dict={states:xxxx, actions:xxxx}
sess.run(next_state, feed_dict=feed_dict)
sess.run(reward, feed_dict_feed_dict)