In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
import os
import pandas as pd
#Functional coding
import functools
from functools import partial
from tensorflow.python.ops import array_ops 

In [2]:
#Data Path..
Datapath="DATA/Reservoir/Reservoir_Data.txt"
Labelpath="DATA/Reservoir/Reservoir_Label.txt"
Rewardpath="DATA/Reservoir/Reservoir_Reward.txt"

In [3]:
#Given local path, find full path
def PathFinder(path):
    #python 2
    #script_dir = os.path.dirname('__file__')
    #fullpath = os.path.join(script_dir,path)
    #python 3
    fullpath=os.path.abspath(path)
    print(fullpath)
    return fullpath

#Read Data for Deep Learning
def ReadData(path):
    fullpath=PathFinder(path)
    return pd.read_csv(fullpath, sep=',', header=0)

In [4]:
S_A_pd = ReadData(Datapath)
SP_pd = ReadData(Labelpath)
R_pd = ReadData(Rewardpath)
S_A_matrix=S_A_pd.as_matrix()
SP_matrix=SP_pd.as_matrix()
R_matrix=R_pd.as_matrix()

/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/Reservoir/Reservoir_Data.txt
/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/Reservoir/Reservoir_Label.txt
/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/Reservoir/Reservoir_Reward.txt


In [5]:
default_settings = {
    "max_cap"          : [100, 200, 400, 500],
    "high_bound"       : [80, 180, 380, 480],
    "low_bound"        : [20, 30, 40, 60],
    "rain"             : [5, 10, 20, 30],
    "downstream"       : [[0,1],[1,2],[2,3]],
    "downtosea"        : [3],
    "biggestmaxcap"    : 1000,
    "num_reservoir"    : 4
   }


In [6]:
class RESERVOIR(object):
    def __init__(self, 
                 default_settings):
        self.downstream = default_settings["downstream"]
        self.downtosea = default_settings["downtosea"]
        self.biggestmaxcap = tf.constant(default_settings["biggestmaxcap"],dtype=tf.float32)
        self.zero = tf.constant(0,dtype=tf.float32)
        self.two = tf.constant(2.0,dtype=tf.float32)
        self.one = tf.constant(1.0,dtype=tf.float32)
        self.nfive = tf.constant(-5.0,dtype=tf.float32)
        self.nhund = tf.constant(-100.0,dtype=tf.float32)
        self.npone = tf.constant(-0.1,dtype=tf.float32)
        self._maxcaps(default_settings["max_cap"])
        self._high_bounds(default_settings["high_bound"])
        self._low_bounds(default_settings["low_bound"])
        self._rains(default_settings["rain"])
        
    def _maxcaps(self, max_cap_list):
        self.max_cap = []
        for i in max_cap_list:
            self.max_cap.append(tf.constant(i,dtype=tf.float32))
    
    def _high_bounds(self, high_bound_list):
        self.high_bound = []
        for i in high_bound_list:
            self.high_bound.append(tf.constant(i,dtype=tf.float32))
            
    def _low_bounds(self, low_bound_list):
        self.low_bound = []
        for i in low_bound_list:
            self.low_bound.append(tf.constant(i,dtype=tf.float32))
            
    def _rains(self, rain_list):
        self.rain = []
        for i in rain_list:
            self.rain.append(tf.constant(i,dtype=tf.float32))
            
    def MAXCAP(self, reservoir_id):
        return self.max_cap[reservoir_id]
    
    def HIGH_BOUND(self, reservoir_id):
        return self.high_bound[reservoir_id]
    
    def LOW_BOUND(self, reservoir_id):
        return self.low_bound[reservoir_id]
    
    def RAIN(self,reservoir_id):
        return self.rain[reservoir_id]
    
    def DOWNSTREAM(self, reservoir_id1, reservoir_id2):
        for pair in self.downstream:
            if reservoir_id1 == pair[0] and reservoir_id2 == pair[1]:
                return True
        return False
    
    def DOWNTOSEA(self, reservoir_id):
        if reservoir_id in self.downtosea:
            return True
        else:
            return False
        
    def BIGGESTMAXCAP(self):
        return self.biggestmaxcap
        
            
    def _transition(self, reservoir_id, states, actions):
        previous_state = states[reservoir_id]
        vaporated = (self.one/self.two)*tf.sin(previous_state/self.BIGGESTMAXCAP())*previous_state
        upstreamflow = self.zero
        for i in range(len(states)):
            if self.DOWNSTREAM(i,reservoir_id):
                upstreamflow+=actions[i]
        new_state = previous_state + self.RAIN(reservoir_id)-vaporated-actions[reservoir_id]+upstreamflow
        return new_state
    
    # For single data point
    def _vector_trans(self, state_size, states_packed, actions_packed):
        new_states = []
        states = tf.unpack(states_packed)
        actions = tf.unpack(actions_packed)
        for i in range(state_size):
            new_states.append(self._transition(i,states,actions))
        return tf.pack(new_states)
    
    def Transition(self, states, actions):
        new_states = []
        batch_size,state_size = states.get_shape()
        states_list = tf.unpack(states)
        actions_list = tf.unpack(actions)
        for i in range(batch_size):
            new_states.append(self._vector_trans(state_size,states_list[i],actions_list[i]))
        return tf.pack(new_states)
    
    def _reward(self,states_packed):
        reward = self.zero
        states = tf.unpack(states_packed)
        for i in range(len(states)):
            reward+=tf.cond(tf.logical_and(states[i]>=self.LOW_BOUND(i),states[i]<=self.HIGH_BOUND(i)),\
                            lambda: self.zero, \
                            lambda: tf.cond(states[i]<self.LOW_BOUND(i), \
                                            lambda: self.nfive*(self.LOW_BOUND(i)-states[i]),\
                                            lambda: self.nhund*(states[i]-self.HIGH_BOUND(i))\
                                           )\
                            )
            reward+=tf.abs(((self.HIGH_BOUND(i)+self.LOW_BOUND(i))/self.two)-states[i])*self.npone
        return tf.pack([reward])
    
    #Reward for Reservoir is computed on 'Next State'
    def Reward(self, states):
        new_rewards = []
        batch_size,_ = states.get_shape()
        states_list = tf.unpack(states)
        for i in range(batch_size):
            new_rewards.append(self._reward(states_list[i]))
        return tf.pack(new_rewards)       

In [7]:
# States
states = tf.placeholder(tf.float32,[10, 4],name="States")

# Actions
actions = tf.placeholder(tf.float32,[10, 4],name="Actions")

In [8]:
reservoir_inst = RESERVOIR(default_settings)

In [9]:
states_list=tf.unpack(states)
actions_list = tf.unpack(actions)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
feed_dict={states:S_A_matrix[:10,4:], actions:S_A_matrix[:10,:4]}
new_state = reservoir_inst._transition(0,tf.unpack(states_list[0]),tf.unpack(actions_list[0]))
print(sess.run([new_state], feed_dict=feed_dict))
print(sess.run([states_list[1]], feed_dict=feed_dict))
print(sess.run([states_list[1]], feed_dict=feed_dict))

[34.210938]
[array([ 34.21093369,  98.94076538,  70.70948792,  79.58050537], dtype=float32)]
[array([ 34.21093369,  98.94076538,  70.70948792,  79.58050537], dtype=float32)]


In [10]:
new_rewards = reservoir_inst.Reward(states)

In [11]:
feed_dict={states:S_A_matrix[:10,4:], actions:S_A_matrix[:10,:4]}
sess.run(new_rewards,feed_dict=feed_dict )

array([[-96.        ],
       [-35.15583038],
       [-31.02294159],
       [-29.95938873],
       [-25.39591026],
       [-22.01288414],
       [-22.55617714],
       [-26.20808411],
       [-27.96719742],
       [-26.70908737]], dtype=float32)

In [12]:
class RESERVOIRCell(tf.nn.rnn_cell.RNNCell):

    def __init__(self, default_settings):
        self._num_state_units = default_settings["num_reservoir"]
        self._num_reward_units = default_settings["num_reservoir"]+1
        self.reservoir = RESERVOIR(default_settings)

    @property
    def state_size(self):
        return self._num_state_units

    @property
    def output_size(self):
        return self._num_reward_units

    def __call__(self, inputs, state, scope=None):
        next_state =  self.reservoir.Transition(state, inputs)
        reward = self.reservoir.Reward(next_state)      
        return tf.concat(1,[reward,next_state]), next_state

In [13]:
class ActionOptimizer(object):
    def __init__(self,
                a, # Actions
                num_step, # Number of RNN step, this is a fixed step RNN sequence, 12 for navigation
                num_act, # Number of actions
                batch_size, #Batch Size
                learning_rate=1): 
        self.action = a
        print(self.action)
        self.batch_size = batch_size
        self.num_step = num_step
        self.learning_rate = learning_rate
        self._p_create_rnn_graph()
        self._p_create_loss()
        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())
    
    def _p_create_rnn_graph(self):
        cell = RESERVOIRCell(default_settings)
        initial_state = cell.zero_state(self.batch_size, dtype=tf.float32)+tf.constant([[75,50,50,50]],dtype=tf.float32)
        print('action batch size:{0}'.format(array_ops.shape(self.action)[0]))
        print('Initial_state shape:{0}'.format(initial_state))
        rnn_outputs, state = tf.nn.dynamic_rnn(cell, self.action, dtype=tf.float32,initial_state=initial_state)
        #need output intermediate states as well
        concated = tf.concat(0,rnn_outputs)
        print('concated shape:{0}'.format(concated.get_shape()))
        something_unpacked =  tf.unpack(concated, axis=2)
        self.outputs = tf.reshape(something_unpacked[0],[-1,self.num_step,1])
        print(' self.outputs:{0}'.format(self.outputs.get_shape()))
        self.intern_states = tf.pack([something_unpacked[i+1] for i in range(default_settings["num_reservoir"])], axis=2)
        self.last_state = state
        self.pred = tf.reduce_sum(self.outputs,1)
        print("self.pred:{0}".format(self.pred))
            
    def _p_create_loss(self):

        objective = tf.reduce_mean(self.pred) 
        self.loss = -objective
        print(self.loss.get_shape())
        #self.loss = -objective
        self.optimizer = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss, var_list=[a])
        
    def Optimize(self,epoch=100):
        
        new_loss = self.sess.run([self.loss])
        print('Loss in epoch {0}: {1}'.format("Initial", new_loss)) 
        for epoch in range(epoch):
            training = self.sess.run([self.optimizer])
            action_upperbound=self.sess.run(self.intern_states)
            self.sess.run(tf.assign(self.action, tf.clip_by_value(self.action, 0, action_upperbound)))
            if True:
                new_loss = self.sess.run([self.loss])
                print('Loss in epoch {0}: {1}'.format(epoch, new_loss))  
        minimum_costs_id=self.sess.run(tf.argmax(self.pred,0))
        print(minimum_costs_id)
        best_action = np.round(self.sess.run(self.action)[minimum_costs_id[0]],4)
        print('Optimal Action Squence:{0}'.format(best_action))
        print('Best Cost: {0}'.format(self.sess.run(self.pred)[minimum_costs_id[0]]))
        print('The last state:{0}'.format(self.sess.run(self.last_state)[minimum_costs_id[0]]))
        print('Rewards each time step:{0}'.format(self.sess.run(self.outputs)[minimum_costs_id[0]]))
        print('Intermediate states:{0}'.format(self.sess.run(self.intern_states)[minimum_costs_id[0]]))

In [14]:
a = tf.Variable(tf.truncated_normal(shape=[10,10,4],mean=0.0, stddev=0.5),name="action")
rnn_inst = ActionOptimizer(a, 10,4,10)  

Tensor("action/read:0", shape=(10, 10, 4), dtype=float32)
action batch size:Tensor("strided_slice:0", shape=(), dtype=int32)
Initial_state shape:Tensor("add_122:0", shape=(10, 4), dtype=float32)
concated shape:(10, 10, 5)
 self.outputs:(10, 10, 1)
self.pred:Tensor("Sum:0", shape=(10, 1), dtype=float32)
()


In [15]:
rnn_inst.Optimize(500)

Loss in epoch Initial: [5235.8481]
Loss in epoch 0: [223.2253]
Loss in epoch 1: [222.78189]
Loss in epoch 2: [222.31973]
Loss in epoch 3: [221.84167]
Loss in epoch 4: [221.34448]
Loss in epoch 5: [220.83025]
Loss in epoch 6: [220.29912]
Loss in epoch 7: [219.76834]
Loss in epoch 8: [219.22433]
Loss in epoch 9: [218.67358]
Loss in epoch 10: [218.14795]
Loss in epoch 11: [217.64226]
Loss in epoch 12: [217.15059]
Loss in epoch 13: [216.66216]
Loss in epoch 14: [216.16983]
Loss in epoch 15: [215.65247]
Loss in epoch 16: [215.10881]
Loss in epoch 17: [214.54033]
Loss in epoch 18: [213.94504]
Loss in epoch 19: [213.31924]
Loss in epoch 20: [212.67233]
Loss in epoch 21: [212.0535]
Loss in epoch 22: [211.51218]
Loss in epoch 23: [211.00525]
Loss in epoch 24: [210.48882]
Loss in epoch 25: [209.9458]
Loss in epoch 26: [209.37447]
Loss in epoch 27: [208.77542]
Loss in epoch 28: [208.19836]
Loss in epoch 29: [207.72787]
Loss in epoch 30: [207.29927]
Loss in epoch 31: [206.85855]
Loss in epoch 32: 