<a href="https://colab.research.google.com/github/turnippy/cisc856project/blob/main/CISC856Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

CISC856 Group 6 Project Winter 2022

Contributors:
* P Saha
* T Liu

In [6]:
import random
import numpy as np
import matplotlib.pyplot as plt

from itertools import product

In [None]:
class Environment:
  def __init__(self, n, c, t, ac, dca, dcb):
    '''
    initialize with:
      n = number of affected areas
      c = capacities of response centers
      t = number of planning periods
      ac = accessibility cost parameter
      dca = deprivation cost parameter 'a'
      dcb = deprivation cost parameter 'b'
      initial state = [0] * n (zero resources allocated intiially)
      initializes time step = 1
    '''
    self.n = n
    self.c = c
    self.t = t #time horizon
    self.state = np.zeros(n, dtype=int)
    self.curr_time = 1
    
  def take_action(self, action, demand):
    '''
    action is a vector (a_1, a_2, ..., a_n),
    where each a_i represents the resource allocation to the 
    i-th affected area, given a total of 'n' affected areas

    demand is a vector (d_1, d_2, ..., d_n),
    where each d_i represents the demand for resources of the
    i-th affected area, given a total of 'n' affected areas

    Following figure (1):
    S_(i,t+1) = S_(i,t) - Y_(i,t) + D_(i,t)
    where Y_(i,t) denotes the resource allocation at time t for the i-th area,
    where D_(i,t) denotes the demand at time t for the i-th area.
    '''
    self.check_action(action, demand)
    self.state = np.add(self.state, action * -1)
    self.state = np.add(self.state, demand)
    self.curr_time += 1

  def check_action(self, action, demand):
    '''
    checks if action and demand are legal under the given environment parameters
    '''
    if action.size != self.n:
      raise ValueError('length of action tuple must equal number of affected areas')
    if action.sum(axis=0) > self.c:
      raise ValueError('resource allocation must not exceed capacity')
    if demand.size != self.n:
      raise ValueError('length of demand tuple must equal number of affected areas')

  def calc_cost(self, action, state, time):
    '''
    calculates the reward for the chosen action in the given state and time step
    action must be a vector with size = n
    state must be a vector with size = n
    time must be an integer value, where 0 < time <= horizon+1
    '''
    if time == 1:
      #following equation (7)
      pass
    elif time == (self.t + 1):
      #following equation (9)
      pass
    else:
      #following equation (8)
      pass

In [None]:
class Agent:
  def __init__(self, k=2000, epsilon=0.5, alpha=0.8, gamma=0.2):
    '''
    initialize with:
      k = number of epochs
      epsilon
      alpha
      gamma
    '''
    self.env = Environment()
    self.k = k
    self.epsilon = epsilon
    self.alpha = alpha
    self.gamma = gamma
    self.initialize_q_table(self.env.n, self.env.c, self.env.t)
  
  def initialize_q_table(self, n, c, t):
    #q-table is maintained as a double hash table
    #the keys of the outer hash is the state tuple
    #the keys of the inner hash the action tuple in the given state
    self.q_table = {}
    #first, generate all possible states. by the problem descriptions, we assume
    #that S_(i,t) >= 0 for all i <= N, for all t <= T
    
    #all_states = list(permutations(range(0, t+1), n))
    #all_actions = list(permutations(range()))

    all_states = [i for i in product(range(0, t+1), repeat=n)]
    all_actions = [i for i in product(range(0, c*n + 1), repeat=n)]

    for tup in all_states:
      if sum(tup) > (t+1) * n:
        #invalid state, as the total demand of the environment cannot exceed
        #(max epoch number) * (number of AA's)
        continue
      self.q_table[tup] = {}

  def update_qtable(self, curr, action, reward, next):
    temp = []
    for a in self.q_table[next].keys():
      temp.append[self.q_table[next][a]]
    self.q_table[curr][action] += self.alpha * (reward + self.gamma * max(temp) - self.q_table[curr][action])

  def train_episode(self, t):
    state = (0,) * self.n
    