In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
import os
import pandas as pd
#Functional coding
import functools
from functools import partial
from tensorflow.python.ops import array_ops 

In [2]:
'a' in ['a','b','c']

True

In [3]:
Datapath="DATA/HVAC/HVAC_Data.txt"
Labelpath="DATA/HVAC/HVAC_Label.txt"
Rewardpath="DATA/HVAC/HVAC_Reward.txt"

In [4]:
#Given local path, find full path
def PathFinder(path):
    #python 2
    #script_dir = os.path.dirname('__file__')
    #fullpath = os.path.join(script_dir,path)
    #python 3
    fullpath=os.path.abspath(path)
    print(fullpath)
    return fullpath

#Read Data for Deep Learning
def ReadData(path):
    fullpath=PathFinder(path)
    return pd.read_csv(fullpath, sep=',', header=0)

In [5]:
S_A_pd = ReadData(Datapath)
SP_pd = ReadData(Labelpath)
R_pd = ReadData(Rewardpath)
S_A_matrix=S_A_pd.as_matrix()
SP_matrix=SP_pd.as_matrix()
R_matrix=R_pd.as_matrix()

/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/HVAC/HVAC_Data.txt
/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/HVAC/HVAC_Label.txt
/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/HVAC/HVAC_Reward.txt


In [6]:
default_settings = {                
    "cap": tf.constant(80.0,dtype=tf.float32), 
    "outside_resist" : tf.constant(4.0,dtype=tf.float32),
    "hall_resist" : tf.constant(2.0,dtype=tf.float32),
    "wall_resist" : tf.constant(1.5,dtype=tf.float32),
    "cap_air" : tf.constant(1.006,dtype=tf.float32), 
    "cost_air" : tf.constant(1.0,dtype=tf.float32), 
    "time_delta" : tf.constant(1.0,dtype=tf.float32),
    "temp_air" : tf.constant(40.0,dtype=tf.float32),
    "temp_up" : tf.constant(23.5,dtype=tf.float32),
    "temp_low" : tf.constant(20.0,dtype=tf.float32),
    "temp_outside" : tf.constant(6.0,dtype=tf.float32),
    "temp_hall" : tf.constant(10.0,dtype=tf.float32),
    "penalty" : tf.constant(20000.0,dtype=tf.float32),
    "air_max" : tf.constant(10.0,dtype=tf.float32)
   }

In [7]:
class HVAC(object):
    def __init__(self, 
                 adj_outside, #Adjacent to outside 
                 adj_hall, #Adjacent to hall
                 adj, #Adjacent between rooms
                 rooms, #Room names
                 default_settings):
        self.__dict__.update(default_settings)
        self.adj_outside = adj_outside
        self.adj_hall = adj_hall
        self.adj = adj
        self.rooms = rooms
        self.zero = tf.constant(0,dtype=tf.float32)
        
    def ADJ(self, space1, space2):
        for pair in self.adj:
            if space1 in pair and space2 in pair:
                return True
        return False
                 
    def ADJ_OUTSIDE(self,  space):
        if space in self.adj_outside:
            return True
        else:
            return False
            
    def ADJ_HALL(self, space):
        if space in self.adj_hall:
            return True
        else:
            return False  
        
    def R_OUTSIDE(self, space):
        return self.outside_resist
    
    def R_HALL(self, space):
        return self.hall_resist
    
    def R_WALL(self, space1, space2):
        return self.wall_resist
        
    def CAP(self, space):
        return self.cap
    
    def CAP_AIR(self):
        return self.cap_air
    
    def COST_AIR(self):
        return self.cost_air
    
    def TIME_DELTA(self):
        return self.time_delta
    
    def TEMP_AIR(self):
        return self.temp_air
    
    def TEMP_UP(self, space):
        return self.temp_up
    
    def TEMP_LOW(self, space):
        return self.temp_low
    
    def TEMP_OUTSIDE(self, space):
        return self.temp_outside
    
    def TEMP_HALL(self, space):
        return self.temp_hall
    
    def PENALTY(self):
        return self.penalty
    
    def AIR_MAX(self, space):
        return self.air_max
    
    # Single state function, need map to matrix later
    def _transition(self, space, states, actions):
        
        previous_state = states[space]
        heating_info = actions[space]*self.CAP_AIR()*(self.TEMP_AIR()-previous_state)
        neighbor_info = self.zero
        for p in self.rooms:
            if self.ADJ(space,p):
                neighbor_info += (states[p]-previous_state)/self.R_WALL(space,p)
        outside_info = self.zero
        if self.ADJ_OUTSIDE(space):
            outside_info=(self.TEMP_OUTSIDE(space)-previous_state)/self.R_OUTSIDE(space)
        wall_info = self.zero
        if self.ADJ_HALL(space):
            wall_info=(self.TEMP_HALL(space)-previous_state)/self.R_HALL(space)
            
        new_state = previous_state + self.TIME_DELTA()/self.CAP(space)*(heating_info + neighbor_info + outside_info + wall_info)
        return new_state
    
    # For single data point
    def _vector_trans(self, state_size, states_packed, actions_packed):
        new_states = []
        states = tf.unpack(states_packed)
        actions = tf.unpack(actions_packed)
        for i in range(state_size):
            new_states.append(self._transition(i,states,actions))
        return tf.pack(new_states)
    
    def Transition(self, states, actions):
        new_states = []
        batch_size,state_size = states.get_shape()
        states_list = tf.unpack(states)
        actions_list = tf.unpack(actions)
        for i in range(batch_size):
            new_states.append(self._vector_trans(state_size,states_list[i],actions_list[i]))
        return tf.pack(new_states)
    
    # For single data point
    def _reward(self, state_size, states_packed, actions_packed):
        reward = self.zero
        states = tf.unpack(states_packed)
        actions = tf.unpack(actions_packed)
        
        #For each room
        for i in range(state_size):
            
            #Penalty for breaking upper or lower bound constraints
            break_penalty = tf.cond(tf.logical_or(states[i] <self.TEMP_LOW(i), states[i] > self.TEMP_UP(i)), lambda: self.PENALTY(), lambda: self.zero)
                
            #Penalty for distance to centre(no bug)
            dist_penalty = tf.abs(((self.TEMP_UP(i)+self.TEMP_LOW(i))/tf.constant(2.0, dtype=tf.float32))-states[i])
            
            #Penalty for energy cost
            ener_penalty = actions[i]*self.COST_AIR()
            
            #break_penalty+tf.constant(10.0, tf.float32)*dist_penalty
            reward -= (break_penalty+tf.constant(10.0, tf.float32)*dist_penalty+ener_penalty)
            
        return tf.pack([reward])
            
    def Reward(self, states,actions):
        new_rewards = []
        batch_size,state_size = states.get_shape()
        states_list = tf.unpack(states)
        actions_list = tf.unpack(actions)
        for i in range(batch_size):
            new_rewards.append(self._reward(state_size,states_list[i],actions_list[i]))
        return tf.pack(new_rewards)
            
    
                

In [8]:
adj_outside = [0,2,3,5]
adj_hall = [0,1,2,3,4,5]
adj = [[0,1],[0,3],[1,2],[1,4],[2,5],[3,4],[4,5]]
rooms = [0,1,2,3,4,5]

In [9]:
# States
states = tf.placeholder(tf.float32,[10, 6],name="States")

# Actions
actions = tf.placeholder(tf.float32,[10, 6],name="Actions")

In [10]:
hvac_inst = HVAC(adj_outside,adj_hall,adj,rooms,default_settings)

In [11]:
states_list=tf.unpack(states)
actions_list = tf.unpack(actions)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
feed_dict={states:S_A_matrix[:10,6:], actions:S_A_matrix[:10,:6]}
new_state = hvac_inst._transition(0,states_list[0],actions_list[0])
print(sess.run([new_state], feed_dict=feed_dict))
print(sess.run([states_list[1]], feed_dict=feed_dict))
print(sess.run([states_list[1]], feed_dict=feed_dict))


[13.76]
[array([ 13.76000023,  13.77250004,  13.76000023,  13.76000023,
        13.77250004,  13.76000023], dtype=float32)]
[array([ 13.76000023,  13.77250004,  13.76000023,  13.76000023,
        13.77250004,  13.76000023], dtype=float32)]


In [12]:
hvac_inst.ADJ(4,5)

True

In [13]:
new_states=hvac_inst.Transition(states,actions)

In [14]:
states.get_shape()

TensorShape([Dimension(10), Dimension(6)])

In [15]:
feed_dict={states:S_A_matrix[:10,6:], actions:S_A_matrix[:10,:6]}
state_prim = sess.run(new_states,feed_dict=feed_dict )

In [16]:
print(S_A_matrix[:3,:6])

[[ 10.  10.  10.  10.  10.  10.]
 [ 10.  10.  10.  10.  10.  10.]
 [ 10.  10.  10.  10.  10.  10.]]


In [17]:
print(S_A_matrix[:3,6:])

[[ 10.          10.          10.          10.          10.          10.        ]
 [ 13.76        13.7725      13.76        13.76        13.7725      13.76      ]
 [ 17.01203417  17.04682167  17.01203417  17.01203417  17.04682167
   17.01203417]]


In [18]:
new_rewards = hvac_inst.Reward(states,actions)

In [19]:
feed_dict={states:S_A_matrix[:10,6:], actions:S_A_matrix[:10,:6]}
sess.run(new_rewards,feed_dict=feed_dict )

array([[ -1.20765000e+05],
       [ -1.20539148e+05],
       [ -1.20343578e+05],
       [ -1.20174234e+05],
       [ -6.58814545e+01],
       [ -6.01172969e+04],
       [ -1.00133281e+05],
       [ -8.01341719e+04],
       [ -1.00139266e+05],
       [ -1.20141102e+05]], dtype=float32)

In [20]:
class HVACCell(tf.nn.rnn_cell.RNNCell):

    def __init__(self, adj_outside,adj_hall,adj,rooms,default_settings):
        self._num_state_units = len(rooms)
        self._num_reward_units = 1
        self.hvac = HVAC(adj_outside,adj_hall,adj,rooms,default_settings)

    @property
    def state_size(self):
        return self._num_state_units

    @property
    def output_size(self):
        return self._num_reward_units

    def __call__(self, inputs, state, scope=None):
        next_state =  self.hvac.Transition(state, inputs)
        reward = self.hvac.Reward(state, inputs)      
        return reward, next_state
    

In [21]:
hvac_inst_cell = HVACCell(adj_outside,adj_hall,adj,rooms,default_settings)

In [22]:
a = tf.Variable(tf.constant(0.0, dtype=tf.float32,shape=[2,6]),name="action")
initial_state = hvac_inst_cell.zero_state(2, dtype=tf.float32)+tf.constant([[10,10,10,10,10,10]],dtype=tf.float32)
hvac_inst_cell(a,initial_state )
#print(initial_state.get_shape())

(<tf.Tensor 'pack_28:0' shape=(2, 1) dtype=float32>,
 <tf.Tensor 'pack_25:0' shape=(2, 6) dtype=float32>)

In [23]:
class ActionOptimizer(object):
    def __init__(self,
                a, # Actions
                num_step, # Number of RNN step, this is a fixed step RNN sequence, 12 for navigation
                learning_rate=0.1): 
        self.action = tf.reshape(a,[-1,num_step,6]) #Reshape rewards
        print(self.action)
        self.num_step = num_step
        self.learning_rate = learning_rate
        self._p_create_rnn_graph()
        self._p_create_loss()
        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())
    
    def _p_create_rnn_graph(self):
        cell = HVACCell(adj_outside,adj_hall,adj,rooms,default_settings)
        initial_state = cell.zero_state(1, dtype=tf.float32)+tf.constant([[10,10,10,10,10,10]],dtype=tf.float32)
        print('action batch size:{0}'.format(array_ops.shape(self.action)[0]))
        print('Initial_state shape:{0}'.format(initial_state))
        rnn_outputs, state = tf.nn.dynamic_rnn(cell, self.action, dtype=tf.float32,initial_state=initial_state)
        #need output intermediate states as well
        self.outputs = rnn_outputs
        self.last_state = state
        self.pred = tf.reduce_sum(self.outputs,1)
        print("self.pred:{0}".format(self.pred))
            
    def _p_create_loss(self):

        objective = tf.reduce_mean(self.pred) 
        self.loss = -objective
        print(self.loss.get_shape())
        #self.loss = -objective
        self.optimizer = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss, var_list=[a])
        
    def Optimize(self,epoch=100):
        
        new_loss = self.sess.run([self.loss])
        print('Loss in epoch {0}: {1}'.format("Initial", new_loss)) 
        for epoch in range(epoch):
            training = self.sess.run([self.optimizer])
            self.sess.run(tf.assign(a, tf.clip_by_value(a, 0, 10)))
            if True:
                new_loss = self.sess.run([self.loss])
                print('Loss in epoch {0}: {1}'.format(epoch, new_loss))  
        minimum_costs_id=self.sess.run(tf.argmax(self.pred,0))
        print(minimum_costs_id)
        print('Optimal Action Squence:{0}'.format(self.sess.run(self.action)[minimum_costs_id[0]]))
        print('The last state:{0}'.format(self.sess.run(self.last_state)))
        print('Rewards each time step:{0}'.format(self.sess.run(self.outputs)))

In [24]:
a = tf.Variable(tf.constant(5.0, dtype=tf.float32,shape=[60]),name="action")
rnn_inst = ActionOptimizer(a, 10)  

Tensor("Reshape:0", shape=(1, 10, 6), dtype=float32)
action batch size:Tensor("strided_slice_5:0", shape=(), dtype=int32)
Initial_state shape:Tensor("add_752:0", shape=(1, 6), dtype=float32)
self.pred:Tensor("Sum:0", shape=(1, 1), dtype=float32)
()


In [25]:
rnn_inst.Optimize(200)

Loss in epoch Initial: [843264.75]
Loss in epoch 0: [843168.19]
Loss in epoch 1: [723099.25]
Loss in epoch 2: [723040.06]
Loss in epoch 3: [722986.75]
Loss in epoch 4: [722937.62]
Loss in epoch 5: [722893.25]
Loss in epoch 6: [722854.44]
Loss in epoch 7: [722821.88]
Loss in epoch 8: [682786.5]
Loss in epoch 9: [682755.94]
Loss in epoch 10: [602726.06]
Loss in epoch 11: [602693.69]
Loss in epoch 12: [602665.12]
Loss in epoch 13: [602638.25]
Loss in epoch 14: [602608.25]
Loss in epoch 15: [602579.06]
Loss in epoch 16: [602553.06]
Loss in epoch 17: [602526.88]
Loss in epoch 18: [602498.69]
Loss in epoch 19: [602470.88]
Loss in epoch 20: [602444.19]
Loss in epoch 21: [602419.0]
Loss in epoch 22: [602393.25]
Loss in epoch 23: [602366.31]
Loss in epoch 24: [602339.5]
Loss in epoch 25: [602313.38]
Loss in epoch 26: [562288.94]
Loss in epoch 27: [562262.88]
Loss in epoch 28: [482237.59]
Loss in epoch 29: [482211.41]
Loss in epoch 30: [482185.28]
Loss in epoch 31: [482160.31]
Loss in epoch 32: 