In [824]:
import pandas as pd
import numpy as np
import random

In [825]:
MAZE=[[0,0,1,1,1],
      [0,1,1,0,1],
      [1,1,1,0,1],
      [1,0,1,0,1],
      [1,0,1,1,1]]
REWARDS_TABLE = [[0,0,1,1,1],
                 [0,1,1,0,1],
                 [1,1,1,0,1],
                 [1,0,1,0,1],
                 [100,0,1,1,1]]

In [826]:
def init_state_table():
    qtable = []
    for row in range(len(MAZE)):
        qtable_row = []
        for column in range(len(MAZE[row])):
            qtable_row.append(QEntry((row, column)))
        qtable.append(qtable_row)
    return qtable
            

class QEntry:
    def __init__(self, state):
        self.state = state
        self.up = 0
        self.down = 0
        self.left = 0
        self.right = 0
        # self.q_score = 0
    
    def get_state(self):
        return self.state
    def get_positions(self):
        return {"up": self.up, "down": self.down, "left": self.left, "right": self.right}
    # def get_qscore(self):
    #     return self.q_score

    def add_to_positions(self, increment, position):
        match position:
            case "up":
                self.up += increment
            case "down":
                self.down += increment
            case "right":
                self.right += increment
            case "left":
                self.left += increment
            case _:
                raise Exception("No possible moves at ${current_position}")
    # def add_to_qscore(self, increment):
    #     self.q_score += increment
            

# Init Q-Table
q_table = init_state_table()

In [827]:
q_table

[[<__main__.QEntry at 0x22f60da5990>,
  <__main__.QEntry at 0x22f60da5d20>,
  <__main__.QEntry at 0x22f60da5c30>,
  <__main__.QEntry at 0x22f60da5b40>,
  <__main__.QEntry at 0x22f60da5c00>],
 [<__main__.QEntry at 0x22f60da5ae0>,
  <__main__.QEntry at 0x22f60da4550>,
  <__main__.QEntry at 0x22f60da64d0>,
  <__main__.QEntry at 0x22f60da6cb0>,
  <__main__.QEntry at 0x22f60da4220>],
 [<__main__.QEntry at 0x22f60da6530>,
  <__main__.QEntry at 0x22f60da43a0>,
  <__main__.QEntry at 0x22f60da4970>,
  <__main__.QEntry at 0x22f60da6980>,
  <__main__.QEntry at 0x22f60da6ad0>],
 [<__main__.QEntry at 0x22f60da6c20>,
  <__main__.QEntry at 0x22f60da5f60>,
  <__main__.QEntry at 0x22f60da6d70>,
  <__main__.QEntry at 0x22f60da5fc0>,
  <__main__.QEntry at 0x22f60da5ea0>],
 [<__main__.QEntry at 0x22f60da6080>,
  <__main__.QEntry at 0x22f60da6260>,
  <__main__.QEntry at 0x22f60da6a70>,
  <__main__.QEntry at 0x22f60da69b0>,
  <__main__.QEntry at 0x22f60da6470>]]

In [828]:
def get_possible_actions(row_index, column_index):    
    possible = {"up": False, "down": False, "right": False, "left": False}
    #possible = {"up": row_index != 0, "down": row_index != len(MAZE)-1, "right": column_index != 0, "left": column_index != len(MAZE[0])-1}    
    dir_functions = {"up": (lambda: True if MAZE[row_index-1][column_index] == 1 and not row_index == 0 else False),
                     "down": (lambda: True if MAZE[row_index+1][column_index] == 1 and not row_index == len(MAZE)-1 else False),
                     "right": (lambda: True if MAZE[row_index][column_index+1] == 1 and not column_index == len(MAZE[row_index])-1 else False),
                     "left": (lambda: True if MAZE[row_index][column_index-1] == 1 and not column_index == 0 else False)}
    for dir,funct in dir_functions.items():
        try:
            possible[dir] = funct()
        except: pass

    return possible

In [829]:
def get_next_move(dictionary):
    true_keys = [key for key, value in dictionary.items() if value]
    if not true_keys:
        return None
    return random.choice(true_keys)

In [830]:
def iterate_to_next_move(current_position, next_move):
    '''
    current_position: tuple (row_index, column_index)
    '''
    new_pos = [current_position[0], current_position[1]]
    match next_move:
        case "up":
            new_pos[0] -= 1
        case "down":
            new_pos[0] += 1
        case "right":
            new_pos[1] += 1
        case "left":
            new_pos[1] -= 1
        case _:
            raise Exception("No possible moves at ${current_position}")
    
    return tuple(new_pos)

def get_next_move_qtable(current_state, dir):
    '''
    current_state: A tuple containing the coordinates to the old state, (row_index, column_index)
    '''

    next_state = list(current_state)
    match dir:
        case "up":
            next_state[0] = current_state[0] - 1
            next_state[1] = current_state[1]
        case "down":
            next_state[0] = current_state[0] + 1
            next_state[1] = current_state[1]
        case "left":
            next_state[0] = current_state[0]
            next_state[1] = current_state[1] - 1
        case "right":
            next_state[0] = current_state[0]
            next_state[1] = current_state[1] + 1
        case _:
            raise Exception("Direction is invalid")
    next_state = tuple(next_state)
        
    # Find the entry in the q_table that contains the new state
    return q_table[next_state[0]][next_state[1]]
        

In [831]:
# Bellman Equation
alpha = 0.6
gamma = 0.7

bellman_equation = lambda state, old, next : (1 - alpha) * old + alpha * (REWARDS_TABLE[state[0]][state[1]] + gamma * next)

In [862]:
# Moving through the maze
start_row_index, start_column_index = 0,4
end_row_index, end_column_index = 4,0

current_row_index, current_column_index = start_row_index, start_column_index

# while True:
# Get current state
entry = q_table[current_row_index][current_column_index]
state = entry.get_state()
state_q_positions = entry.get_positions()
possible_movements = get_possible_actions(state[0], state[1])

# Get next move
next_move = get_next_move(possible_movements)
display(str(state) + str(state_q_positions) + str(possible_movements) + " " + str(next_move))

display(state_q_positions)
# Using Bellman equation to find the Q-values
old = state_q_positions[next_move]
next_q_obj = get_next_move_qtable(state, next_move)
next_q = max(list(next_q_obj.get_positions().values()))
print(str(next_q_obj.get_state()) + " old: {} next_q: {}".format(old, next_q))
display(bellman_equation(state, old, next_q))
# Update the Q-value in the Q-table
q_table[state[0]][state[1]].add_to_positions(bellman_equation(state, old, next_q), next_move)


"(0, 4){'up': 0, 'down': 165.18010238337024, 'left': 117.5572159881216, 'right': 0}{'up': False, 'down': True, 'right': False, 'left': True} left"

{'up': 0, 'down': 165.18010238337024, 'left': 117.5572159881216, 'right': 0}

(0, 3) old: 117.5572159881216 next_q: 0


47.62288639524864

In [833]:
q_table[current_row_index][current_column_index].get_positions()

{'up': 0, 'down': 0, 'left': 0.6, 'right': 0}

In [834]:
# MAZE=[[0,0,1,1,1],
#       [0,1,1,0,1],
#       [1,1,1,0,1],
#       [1,0,1,0,1],
#       [1,0,1,1,1]]
#REWARDS_TABLE = [[0,0,1,1,1],
#                 [0,1,1,0,1],
#                 [1,1,1,0,1],
#                 [1,0,1,0,1],
#                 [100,0,1,1,1]]

# for entry in q_table:
#     state = entry.get_state()
#     state_movements = entry.get_positions()
    
#     possible_movements = get_possible_actions(state[0], state[1])
#     display(str(possible_movements) + str(next_move(possible_movements)))
    # for key, value in possible_movements.items():
    #     if value == True:
        # old = state_movements[key]
        # get_possible_actions(state, key)
        # next_q_obj = get_next_move_qtable(state, key)
        # next_q = max(list(next_q_obj.get_positions().values()))
        # state_movements[key] = bellman_equation(state, old, next_q)

#     display(str(state) + str(state_movements))

In [835]:
# get_possible_actions(2,1)
# 

# bellman_equation = lambda alpha, gamma, reward : (1 - alpha) * q(s,a) + alpha * (reward + gamma * max(q'))

# bellman_equation = lambda alpha, gamma, reward : (1 - alpha) * q(s,a) + alpha * (reward + gamma * max(q'))