# Recurrent Neural Network

This is the third part of our whole neural network planning structure.

The Data in this experiments is generated from RDDL simulator [Github](https://github.com/ssanner/rddlsim), which is written by Prof.Scott Sanner at University of Toronto.

The objective of this network is to learn a bellman Q value equation. While seperated training stratedy cause the objective cannot be fullfilled such easy. The partial objective that training Q(s,a) = E[R+Q(s',a')] is ok, eventhough a' is not optimal choice. Later combined training will solve rest problem.

Problem list:
1. bellman equation is markov, which means LSTM doesn't necessary, BPTT is sufficient for the task. However, It reminds me a concept called TD($\lambda$). Eventhough TD($\lambda$) is not same concept with this one. I am wondering if LSTM is an approximation of Q with some backpropagation that $\lambda\geq 1$


In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
import os
import pandas as pd
#Functional coding
import functools
from functools import partial

In [2]:
Datapath="DATA/Navigation/Nav_RDDL_Data.txt"
Labelpath="DATA/Navigation/Nav_RDDL_Label.txt"
Rewardpath="DATA/Navigation/Nav_RDDL_Reward.txt"

In [3]:
#Given local path, find full path
def PathFinder(path):
    #python 2
    #script_dir = os.path.dirname('__file__')
    #fullpath = os.path.join(script_dir,path)
    #python 3
    fullpath=os.path.abspath(path)
    print(fullpath)
    return fullpath

#Read Data for Deep Learning
def ReadData(path):
    fullpath=PathFinder(path)
    return pd.read_csv(fullpath, sep=',', header=0)

#Won't use this one to normalize
#Input Normalization
def Normalize(features, mean = [], std = []):
    if mean == []:
        mean = np.mean(features, axis = 0)
        std = np.std(features, axis = 0)
#     print std
#     print std[:,None]
    new_feature = (features.T - mean[:,None]).T
    new_feature = (new_feature.T / std[:,None]).T
    new_feature[np.isnan(new_feature)]=0
#     print new_feature
    return new_feature, mean, std

In [4]:
#Weight constructing function
def weight_variable(shape):
    initial = tf.truncated_normal(shape,stddev=0.005)
    return tf.Variable(initial,name="weights")

#Bias constructing function
def bias_variable(shape):
    initial = tf.constant(0.,shape=shape)
    return tf.Variable(initial,name="biases")

In [5]:
r_pd = ReadData(Rewardpath)

/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/Navigation/Nav_RDDL_Reward.txt


In [6]:
r_matrix=r_pd.as_matrix()
_,OUTPUT_SIZE = r_matrix.shape

In [7]:
# Input label
r = tf.placeholder(tf.float32, [None, OUTPUT_SIZE],name="Labels")

# Dropout
dropout = tf.placeholder(tf.float32, name="Dropout")

In [8]:
from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=100):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=100):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
            batch_index+=1
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:960px;height:600px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

In [9]:
class LSTMRNN(object):
    def __init__(self,
                r, # Reward Value
                num_step, # Number of RNN step, this is a fixed step RNN sequence, 12 for navigation
                dropout,
                learning_rate=0.001, #Learning rate
                l2_lambda = 1E-4,
                batch_size = 120,
                lstm_state_size = 10): #LSTM hidden state size
        self.r = tf.reshape(r,[-1,num_step,1]) #Reshape rewards
        print(self.r)
        self.scope = "LSTM"
        self.num_step = num_step
        self.learning_rate = learning_rate
        self.lstm_state_size = lstm_state_size
        self.l2_lambda = l2_lambda
        self.batch_size = batch_size
        self.dropout = dropout
        self._p_create_rnn_graph()
        self._p_create_loss()
        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())
    
    def _p_create_rnn_graph(self):
        lstm = tf.nn.rnn_cell.GRUCell(self.lstm_state_size)
        lstm = tf.nn.rnn_cell.DropoutWrapper(cell=lstm, output_keep_prob=self.dropout)
        self.w = weight_variable([self.lstm_state_size,1])
        self.b = bias_variable([1])
        with tf.variable_scope(self.scope) as vs:
            rnn_outputs, state = tf.nn.dynamic_rnn(lstm, self.r, dtype=tf.float32)
            self.outputs = tf.reshape(tf.matmul(tf.reshape(rnn_outputs,[-1,self.lstm_state_size]),self.w)+self.b,[-1,self.num_step,1])
            print("self.outputs:{0}".format(self.outputs))
            output_trans = tf.transpose(self.outputs, [1, 0, 2])
            self.pred = tf.gather(output_trans, int(output_trans.get_shape()[0]) - 1)
            
    def _p_create_loss(self):
        labels_part_1 = tf.transpose(self.outputs, [1, 0, 2])
        labels_part_2 = tf.reshape(tf.fill(tf.shape(self.pred), 0.0),[-1,1,1])
        indexs=tf.convert_to_tensor(list(range(1,self.num_step)), name="indices")
        labels_part = tf.transpose(tf.gather(labels_part_1, indexs), [1, 0, 2])
        self.label = tf.concat(1,[labels_part,labels_part_2])+self.r
        mse = tf.reduce_mean(tf.square(tf.sub(self.label,self.outputs)), reduction_indices=1)
        l2_lstm_loss = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if "Bias" in v.name])
        l2_weight_loss = tf.nn.l2_loss(self.w)
        self.loss =  tf.reduce_mean(mse)+(l2_lstm_loss+l2_weight_loss)*self.l2_lambda
        self.optimizer = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)
        
    def train_model(self,rewards,dropout_value,epoch=100):
        
        
        summary_writer = tf.summary.FileWriter('experiment', graph=self.sess.graph)

        #Training
        feed_whole = {r: rewards, dropout:1.0}
        new_loss = self.sess.run([self.loss],feed_dict=feed_whole)
        print('Loss in epoch {0}: {1}'.format("Initial", new_loss)) 
        for epoch in range(epoch):
            batches = self._p_get_batches(rewards,self.batch_size)
            for step in range(len(batches)):
                feed_dict = {r: batches[step][0],dropout:dropout_value}
                #training = self.sess.run([self.optimizer1], feed_dict=feed_dict)
                training = self.sess.run([self.optimizer], feed_dict=feed_dict)
            if epoch%10 == 0:
                new_loss = self.sess.run([self.loss],feed_dict=feed_whole)
                print('Loss in epoch {0}: {1}'.format(epoch, new_loss))  
                
    def _p_get_batches(self,r_matrix,batch_size):
        assert batch_size%self.num_step == 0, "Batch size {0} should be multiple of step size{1}!".format(batch_size,self.num_step)
        remaining_size = len(r_matrix)
        batch_index=0
        batches = []
        while(remaining_size>0):
            batch = []
            if remaining_size<batch_size:
                batch.append(r_matrix[batch_index*batch_size:-1])
            else:
                batch.append(r_matrix[batch_index*batch_size:(batch_index+1)*batch_size])
            batch_index+=1
            remaining_size-=batch_size
            batches.append(batch)
        return batches
    
    def save_weights(self, path = "WEIGHTS_FOLDER/LSTM.ckpt"):
        lstm_variables = [v for v in tf.trainable_variables() if v.name.startswith(self.scope)]
        saver = tf.train.Saver(lstm_variables)
        saver.save(self.sess, PathFinder(path))
        
    def reload_weights(self, path = "WEIGHTS_FOLDER/LSTM.ckpt"):
        lstm_variables = [v for v in tf.trainable_variables() if v.name.startswith(self.scope)]
        saver = tf.train.Saver(lstm_variables)
        saver.restore(self.sess,PathFinder(path))
        
rnn_inst = LSTMRNN(r, 12, dropout)        

Tensor("Reshape:0", shape=(?, 12, 1), dtype=float32)
self.outputs:Tensor("LSTM/Reshape_1:0", shape=(?, 12, 1), dtype=float32)


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [10]:
names = [v.name for v in tf.trainable_variables()]
names

['weights:0',
 'biases:0',
 'LSTM/RNN/GRUCell/Gates/Linear/Matrix:0',
 'LSTM/RNN/GRUCell/Gates/Linear/Bias:0',
 'LSTM/RNN/GRUCell/Candidate/Linear/Matrix:0',
 'LSTM/RNN/GRUCell/Candidate/Linear/Bias:0']

In [11]:
rnn_inst.label

<tf.Tensor 'add:0' shape=(?, 12, 1) dtype=float32>

In [12]:
tf.trainable_variables()[0].get_shape()

TensorShape([Dimension(10), Dimension(1)])

In [13]:
tf.trainable_variables()[1].get_shape()

TensorShape([Dimension(1)])

In [14]:
show_graph(tf.get_default_graph().as_graph_def())

In [15]:
rnn_inst.save_weights()

/home/wuga/Documents/Notebook/VAE-PLANNING/WEIGHTS_FOLDER/LSTM.ckpt


In [16]:
rnn_inst.reload_weights()

/home/wuga/Documents/Notebook/VAE-PLANNING/WEIGHTS_FOLDER/LSTM.ckpt


In [17]:
rnn_inst.train_model(r_matrix,1.0,500)

Loss in epoch Initial: [100.83932]
Loss in epoch 0: [96.501022]
Loss in epoch 10: [54.327183]
Loss in epoch 20: [24.428553]
Loss in epoch 30: [7.4370399]
Loss in epoch 40: [2.2887967]
Loss in epoch 50: [1.9452279]
Loss in epoch 60: [1.9010432]
Loss in epoch 70: [1.8540471]
Loss in epoch 80: [1.8089033]
Loss in epoch 90: [1.8038957]
Loss in epoch 100: [1.8608733]
Loss in epoch 110: [1.8559504]
Loss in epoch 120: [1.7830753]
Loss in epoch 130: [1.7121613]
Loss in epoch 140: [1.6768345]
Loss in epoch 150: [1.6578218]
Loss in epoch 160: [1.6432674]
Loss in epoch 170: [1.6334188]
Loss in epoch 180: [1.62637]
Loss in epoch 190: [1.6210225]
Loss in epoch 200: [1.6168293]
Loss in epoch 210: [1.6135349]
Loss in epoch 220: [1.6110047]
Loss in epoch 230: [1.6088994]
Loss in epoch 240: [1.6068351]
Loss in epoch 250: [1.6047717]
Loss in epoch 260: [1.6026168]
Loss in epoch 270: [1.6003888]
Loss in epoch 280: [1.598137]
Loss in epoch 290: [1.5958476]
Loss in epoch 300: [1.5934541]
Loss in epoch 310:

In [18]:
rnn_inst.save_weights()

/home/wuga/Documents/Notebook/VAE-PLANNING/WEIGHTS_FOLDER/LSTM.ckpt
