In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors

In [None]:
visited_mark = 0.8  # Cells visited by the rat will be painted by gray 0.8
rat_mark = 0.5      # The current rat cell will be painteg by gray 0.5
LEFT = 0
UP = 1
RIGHT = 2
DOWN = 3

# Actions dictionary
actions_dict = {
    LEFT: 'left',
    UP: 'up',
    RIGHT: 'right',
    DOWN: 'down',
}

num_actions = len(actions_dict)

# Exploration factor
epsilon = 0.1

In [None]:
class Qmaze(object):
    def __init__(self, X, Y, objects = [0,1], probability = [0.8,0.2],show_maze_img=1, rat=(0,0)):
        self._maze = self.generate_maze(X,Y,objects,probability, show_maze_img)
        self.target = (X-1, Y-1)   # target cell where the "cheese" is

    def generate_maze(self, X, Y, objects, probability, show_img):
    #Funtion that generates random maze with set of objects that have given probability
    #of appearing. Start (x=0,y=0) and end (x=X,y=Y) points always have to be white (walkable)
        if len(objects) != len(probability):
            print(f'[Error] Objects and probability have different number of items.')
            return None
        elif np.sum(probability) != 1.0:
            print(f'[Error] Sum of probabilities does not equal 1.0.')
            return None
        else:
            cmap = colors.ListedColormap(['white','black','grey','red','green','orange'])
            bounds = [0,0.5,0.8,1,2,3,4]
            norm = colors.BoundaryNorm(bounds,cmap.N)
            self.maze = np.random.choice(objects,(X,Y),p=probability)
            self.maze[0,0] = 0
            self.maze[X-1,Y-1] = 0
            if show_img:
                plt.figure()
                im = plt.imshow(self.maze,interpolation='none', aspect='equal',
                                cmap=cmap,norm=norm)
                ax = plt.gca()
                # Major ticks
                ax.set_xticks(np.arange(0, 10, 1))
                ax.set_yticks(np.arange(0, 10, 1))
                # Labels for major ticks
                ax.set_xticklabels(np.arange(1, 11, 1))
                ax.set_yticklabels(np.arange(1, 11, 1))
                # Minor ticks
                ax.set_xticks(np.arange(-.5, 10, 1), minor=True)
                ax.set_yticks(np.arange(-.5, 10, 1), minor=True)
                # Gridlines based on minor ticks
                ax.grid(which='minor', color='black', linestyle='-', linewidth=1)
                # Remove minor ticks
                ax.tick_params(which='minor', bottom=False, left=False)
            
    def reset(self, rat):
        self.rat = rat
        self.maze = np.copy(self._maze)
        nrows, ncols = self.maze.shape
        row, col = rat
        self.maze[row, col] = rat_mark
        self.state = (row, col, 'start')
        self.min_reward = -0.5 * self.maze.size
        self.total_reward = 0
        self.visited = set()

    def update_state(self, action):
        nrows, ncols = self.maze.shape
        nrow, ncol, nmode = rat_row, rat_col, mode = self.state

        if self.maze[rat_row, rat_col] > 0.0:
            self.visited.add((rat_row, rat_col))  # mark visited cell

        valid_actions = self.valid_actions()
                
        if not valid_actions:
            nmode = 'blocked'
        elif action in valid_actions:
            nmode = 'valid'
            if action == LEFT:
                ncol -= 1
            elif action == UP:
                nrow -= 1
            if action == RIGHT:
                ncol += 1
            elif action == DOWN:
                nrow += 1
        else:                  # invalid action, no change in rat position
            mode = 'invalid'

        # new state
        self.state = (nrow, ncol, nmode)

    def get_reward(self):
        rat_row, rat_col, mode = self.state
        nrows, ncols = self.maze.shape
        if rat_row == nrows-1 and rat_col == ncols-1:
            return 1.0
        if mode == 'blocked':
            return self.min_reward - 1
        if (rat_row, rat_col) in self.visited:
            return -0.25
        if mode == 'invalid':
            return -0.75
        if mode == 'valid':
            return -0.04

    def act(self, action):
        self.update_state(action)
        reward = self.get_reward()
        self.total_reward += reward
        status = self.game_status()
        envstate = self.observe()
        return envstate, reward, status

    def observe(self):
        canvas = self.draw_env()
        envstate = canvas.reshape((1, -1))
        return envstate

    def draw_env(self):
        canvas = np.copy(self.maze)
        nrows, ncols = self.maze.shape
        # clear all visual marks
        for r in range(nrows):
            for c in range(ncols):
                if canvas[r,c] > 0.0:
                    canvas[r,c] = 1.0
        # draw the rat
        row, col, valid = self.state
        canvas[row, col] = rat_mark
        return canvas

    def game_status(self):
        if self.total_reward < self.min_reward:
            return 'lose'
        rat_row, rat_col, mode = self.state
        nrows, ncols = self.maze.shape
        if rat_row == nrows-1 and rat_col == ncols-1:
            return 'win'

        return 'not_over'

    def valid_actions(self, cell=None):
        if cell is None:
            row, col, mode = self.state
        else:
            row, col = cell
        actions = [0, 1, 2, 3]
        nrows, ncols = self.maze.shape
        if row == 0:
            actions.remove(1)
        elif row == nrows-1:
            actions.remove(3)

        if col == 0:
            actions.remove(0)
        elif col == ncols-1:
            actions.remove(2)

        if row>0 and self.maze[row-1,col] == 0.0:
            actions.remove(1)
        if row<nrows-1 and self.maze[row+1,col] == 0.0:
            actions.remove(3)

        if col>0 and self.maze[row,col-1] == 0.0:
            actions.remove(0)
        if col<ncols-1 and self.maze[row,col+1] == 0.0:
            actions.remove(2)

        return actions