In [1]:
import numpy as np
import random
import time

In [2]:
from IPython.display import clear_output

### CREATE A SNAKE GAME

In [3]:
class create_game:
    def __init__(self, dimensions):
        
        # CREATE A MAZE, SNAKE & FOOD
        self.create_maze(dimensions)
        self.create_snake()
        self.create_food()
        
        # START AT SCORE ZERO
        self.score = 0
        
        # DESTROYED STATUS
        self.destroyed = False
        
        # SHOW THE CURRENT MAZE
        self.show()
        
        # CALCULATE STATE
        self.calculate_state()
    
    # SHOW THE CURRENT MAZE
    def show(self):
        clear_output(wait=True)
        
        # LOOP THROUGH MAZE ROWS & COLS
        for row in self.maze:
            for index, value in enumerate(row):

                # SELECT CORRECT SYMBOL
                if value == 0:
                    symbol = '◻'
                elif value == 1:
                    symbol = '◼'
                elif value == 2:
                    symbol = '⛝'

                # PRINT WITH OR WITHOUT LINEBREAK
                if index < len(row) - 1:
                    print(symbol, end='')
                else:
                    print(symbol, end='\n')
        
    # END THE GAME
    def destroy(self):
        self.destroyed = True
        
    # CREATE A MAZE
    def create_maze(self, dimensions):
        self.height = dimensions[0]
        self.width = dimensions[1]
        self.maze = np.zeros((self.height, self.width))
        
    # CREATE FOOD AT A RANDOM POSITION
    def create_food(self):
        
        # FIND ALL EMPTY POSITIONS IN THE MATRIX
        positions = np.argwhere(self.maze == 0)
        
        # PICK A RANDOM OPEN INDEX
        index = random.randint(0, len(positions) - 1)
        
        # SET THE FOODS COORDINATES
        self.food = (positions[index][0], positions[index][1])
        
        # ADD THE FOOD & UPDATE THE FIGURE
        self.maze[self.food[0]][self.food[1]] = 2
    
    # CREATE A SNAKE
    def create_snake(self):
        self.snake = [(1, 3), (1, 2), (1, 1)]
        
        # LOOP THROUGH BODYPARTS & ADD THEM
        for part in self.snake:
            self.maze[part[0]][part[1]] = 1
    
    # EVALUATE CURRENT STATE
    def calculate_state(self):
    
        # CREATE TEMP STATE
        self.state = np.zeros(8, dtype=int)
        
        # CURRENT POSITION
        current_y = self.snake[0][0]
        current_x = self.snake[0][1]
        
        # ALL DIRECTIONS
        directions = ['UP', 'RIGHT', 'DOWN', 'LEFT']
        
        # LOOP THROUGH DIRECTIONS AND SET BOUNDRY VALUES
        for index, direction in enumerate(directions):
            self.state[index] = self.check_block(direction)
        
        # SET FOOD DIRECTIONS VALUES
        self.food_direction()
        
    # FETCH NEW POSITIONAL COORDINATE
    def get_position(self, direction):
        next_y = self.snake[0][0]
        next_x = self.snake[0][1]

        # ADD NEXT MOVE DIRECTION
        if direction == 'DOWN':
            next_y += 1
        elif direction == 'UP':
            next_y -= 1
        elif direction == 'RIGHT':
            next_x += 1
        elif direction == 'LEFT':
            next_x -= 1
            
        return (next_y, next_x)
        
    # CHECK BLOCKED PATHS
    def check_block(self, direction):
        
        # GET NEXT COORD POSITION
        next_y, next_x = self.get_position(direction)
        
        # ADD CHECK FOR SNAKEBODY
        if (next_y, next_x) in self.snake:
            return False
        
        # IF Y IS OUT OF BOUNDS
        if next_y < 0 or next_y > self.height - 1:
            return False
        
        # IF X IS OUT OF BOUNDS
        elif next_x < 0 or next_x > self.width - 1:
            return False
        
        else:
            return True
        
    # EVALUATE FOOD DIRECTION - UP RIGHT DOWN LEFT
    def food_direction(self):
        distance = np.array(self.food) - np.array(self.snake[0])
        
        # CHECK & SET EACH DIRECTION
        if distance[0] < 0:
            self.state[4] = 1
        elif distance[0] > 0:
            self.state[6] = 1
        if distance[1] > 0:
            self.state[5] = 1
        elif distance[1] < 0:
            self.state[7] = 1
        
    # CALCULATE STATE VALUE
    def calculate_state_value(self):
        stateNum = 0
        
        for i in range(len(self.state)):
            stateNum += 2**i*self.state[i]
            
        return stateNum
    
    # MOVE THE SNAKE
    def move_snake(self, direction):
        
        # DEFAULT TO ZERO REWARD
        reward = 0

        # GET THE NEW POSITION
        next_y, next_x = self.get_position(direction)

        # THE THE SNAKE HITS ITSELF
        if ((next_y, next_x) in self.snake) or (next_y < 0 or next_y > self.height - 1) or (next_x < 0 or next_x > self.width - 1):
            self.destroy()
            reward = -2

        # EAT FOOD & GROW
        elif (self.maze[next_y][next_x] == 2):

            # GROW THE SNAKE BY UPDATING THE HEADS POSITION
            self.snake.insert(0, (next_y, next_x))
            self.maze[next_y][next_x] = 1

            # INCREASE THE SCORE & CREATE NEW FOOD
            self.score += 1
            self.create_food()
            
            # SET REWARD
            reward = 2

            # SHOW UPDATED MAZE
            self.show()

        # OTHERWISE, MOVE
        else:
            
            # SET REWARD IF SNAKE MOVES CLOSET TO THE FOOD
            if (direction == 'DOWN' and self.state[4:][2] == 1) or (direction == 'UP' and self.state[4:][0] == 1) or (direction == 'RIGHT' and self.state[4:][1] == 1) or (direction == 'LEFT' and self.state[4:][3] == 1):
                reward = 1

            # MOVE SNAKE HEAD
            self.snake.insert(0, (next_y, next_x))

            # REMOVE SNAKE TAIL
            tail = self.snake.pop()

            # RENDER NEW HEAD & REMOVE OLD TAIL
            self.maze[next_y][next_x] = 1
            self.maze[tail[0]][tail[1]] = 0

            # SHOW UPDATED MAZE
            self.show()
        
        # CALCULATE NEW STATE
        self.calculate_state()
        
        return self.calculate_state_value(), reward

### RL STUFF

In [8]:
def train(dimensions, epochs, epsilon):
    
    # AVAILABLE ACTIONS
    actions = ['UP', 'DOWN', 'LEFT', 'RIGHT']
    
    # PLAY X AMOUNT OF GAMES
    for index in range(epochs):
        game = create_game(dimensions)

        # WHILE THE GAME IS NOT DESTROYED
        while not game.destroyed:

            # GENERATE A RANDOM VALUE
            random = np.random.random()

            # IF IT IS WITHIN EPISONS RANGE, PICK THE ACTION RANDOMLY
            if random <= epsilon:
                action = np.random.choice(actions)

            # OTHERWISE, PICK THE ACTION STRATEGICALLY
            elif random > epsilon:
                action = np.random.choice(actions)

            # PERFORM THE ACTION
            state_value, reward = game.move_snake(action)
            time.sleep(0.01)

In [9]:
train(**{
    'dimensions': [10, 10],
    'epochs': 100,
    'epsilon': 0.8
})

◻◻◻◻◻◻◻◻◻◻
◻◼◼◼◻◻◻◻◻◻
◻◻◻◻◻◻◻◻◻◻
◻◻◻◻◻◻◻◻◻◻
◻◻◻◻◻◻◻◻◻◻
◻◻◻◻◻◻◻◻◻◻
◻◻◻◻◻◻◻◻◻◻
◻◻◻◻◻◻◻◻◻◻
◻◻◻◻◻◻◻⛝◻◻
◻◻◻◻◻◻◻◻◻◻
-2
