In [559]:
import pandas as pd
import numpy as np
import random as r

In [560]:
MAZE=[[0,0,1,1,1],
      [0,1,1,0,1],
      [1,1,1,0,1],
      [1,0,1,0,1],
      [1,0,1,1,1]]
REWARDS_TABLE = [[0,0,1,1,1],
                 [0,1,1,0,1],
                 [1,1,1,0,1],
                 [1,0,1,0,1],
                 [100,0,1,1,1]]

In [561]:
def init_state_table():
    qtable = []
    for row in range(len(MAZE)):
        for column in range(len(MAZE[row])):
            qtable.append(QEntry((row, column)))
    return qtable
            

class QEntry:
    def __init__(self, state):
        self.state = state
        self.up = 0
        self.down = 0
        self.left = 0
        self.right = 0
    
    def get_state(self):
        return self.state
    def get_positions(self):
        return {"up": self.up, "down": self.down, "left": self.left, "right": self.right}
    
    def update_positions(self, new_positions):
        for key, value in new_positions.values():
            pass
            

# Init Q-Table
q_table = init_state_table()

In [562]:
def get_possible_actions(row_index, column_index):    
    possible = {"up": False, "down": False, "right": False, "left": False}
    #possible = {"up": row_index != 0, "down": row_index != len(MAZE)-1, "right": column_index != 0, "left": column_index != len(MAZE[0])-1}    
    dir_functions = {"up": (lambda: True if MAZE[row_index-1][column_index] == 1 and not row_index == 0 else False),
                     "down": (lambda: True if MAZE[row_index+1][column_index] == 1 and not row_index == len(MAZE)-1 else False),
                     "right": (lambda: True if MAZE[row_index][column_index+1] == 1 and not column_index == len(MAZE[row_index])-1 else False),
                     "left": (lambda: True if MAZE[row_index][column_index-1] == 1 and not column_index == 0 else False)}
    for dir,funct in dir_functions.items():
        try:
            possible[dir] = funct()
        except: pass

    return possible

In [563]:
def pick_random_true_key(dictionary):
    true_keys = [key for key, value in dictionary.items() if value]
    if not true_keys:
        return None
    return r.choice(true_keys)

In [564]:
# def get_q_entry()

def get_next_move_qtable(current_state, dir):
    '''
    current_state: A tuple containing the coordinates to the old state, (row_index, column_index)
    '''

    next_state = list(current_state)
    match dir:
        case "up":
            next_state[0] = current_state[0] - 1
            next_state[1] = current_state[1]
        case "down":
            next_state[0] = current_state[0] + 1
            next_state[1] = current_state[1]
        case "left":
            next_state[0] = current_state[0]
            next_state[1] = current_state[1] - 1
        case "right":
            next_state[0] = current_state[0]
            next_state[1] = current_state[1] + 1
        case _:
            raise Exception("Direction is invalid")
    next_state = tuple(next_state)
        
    # Find the entry in the q_table that contains the new state
    for i in range(len(q_table)):
        # print(str(q_table[i].get_state()) + str(next_state))
        if q_table[i].get_state() == next_state:
            return q_table[i]
        

In [565]:
# Bellman Equation
alpha = 0.6
gamma = 0.7

bellman_equation = lambda state, old, next : (1 - alpha) * old + alpha * (REWARDS_TABLE[state[0]][state[1]] + gamma * next)

In [566]:
# MAZE=[[0,0,1,1,1],
#       [0,1,1,0,1],
#       [1,1,1,0,1],
#       [1,0,1,0,1],
#       [1,0,1,1,1]]
#REWARDS_TABLE = [[0,0,1,1,1],
       #          [0,1,1,0,1],
             #    [1,1,1,0,1],
             #    [1,0,1,0,1],
            #     [100,0,1,1,1]]

for entry in q_table:
    state = entry.get_state()
    state_movements = entry.get_positions()
    
    possible_movements = get_possible_actions(state[0], state[1])
    for key, value in possible_movements.items():
        if value == True:
            old = state_movements[key]
            get_possible_actions(state, key)
            next_q_obj = get_next_move_qtable(state, key)
            next_q = max(list(next_q_obj.get_positions().values()))
            state_movements[key] = bellman_equation(state, old, next_q)

    display(str(state) + str(state_movements))

"(0, 0){'up': 0, 'down': 0, 'left': 0, 'right': 0}"

"(0, 1){'up': 0, 'down': 0.0, 'left': 0, 'right': 0.0}"

"(0, 2){'up': 0, 'down': 0.6, 'left': 0, 'right': 0.6}"

"(0, 3){'up': 0, 'down': 0, 'left': 0.6, 'right': 0.6}"

"(0, 4){'up': 0, 'down': 0.6, 'left': 0.6, 'right': 0}"

"(1, 0){'up': 0, 'down': 0.0, 'left': 0, 'right': 0.0}"

"(1, 1){'up': 0, 'down': 0.6, 'left': 0, 'right': 0.6}"

"(1, 2){'up': 0.6, 'down': 0.6, 'left': 0.6, 'right': 0}"

"(1, 3){'up': 0.0, 'down': 0, 'left': 0.0, 'right': 0.0}"

"(1, 4){'up': 0.6, 'down': 0.6, 'left': 0, 'right': 0}"

"(2, 0){'up': 0, 'down': 0.6, 'left': 0, 'right': 0.6}"

"(2, 1){'up': 0.6, 'down': 0, 'left': 0.6, 'right': 0.6}"

"(2, 2){'up': 0.6, 'down': 0.6, 'left': 0.6, 'right': 0}"

"(2, 3){'up': 0, 'down': 0, 'left': 0.0, 'right': 0.0}"

"(2, 4){'up': 0.6, 'down': 0.6, 'left': 0, 'right': 0}"

"(3, 0){'up': 0.6, 'down': 0.6, 'left': 0, 'right': 0}"

"(3, 1){'up': 0.0, 'down': 0, 'left': 0.0, 'right': 0.0}"

"(3, 2){'up': 0.6, 'down': 0.6, 'left': 0, 'right': 0}"

"(3, 3){'up': 0, 'down': 0.0, 'left': 0.0, 'right': 0.0}"

"(3, 4){'up': 0.6, 'down': 0.6, 'left': 0, 'right': 0}"

"(4, 0){'up': 60.0, 'down': 0, 'left': 0, 'right': 0}"

"(4, 1){'up': 0, 'down': 0, 'left': 0.0, 'right': 0.0}"

"(4, 2){'up': 0.6, 'down': 0, 'left': 0, 'right': 0.6}"

"(4, 3){'up': 0, 'down': 0, 'left': 0.6, 'right': 0.6}"

"(4, 4){'up': 0.6, 'down': 0, 'left': 0.6, 'right': 0}"

In [567]:
get_possible_actions(2,1)

{'up': True, 'down': False, 'right': True, 'left': True}

In [568]:


# bellman_equation = lambda alpha, gamma, reward : (1 - alpha) * q(s,a) + alpha * (reward + gamma * max(q'))

# bellman_equation = lambda alpha, gamma, reward : (1 - alpha) * q(s,a) + alpha * (reward + gamma * max(q'))