# Snake deep q-learning


In [3]:
import torch
import random
import torch.nn as nn
import numpy as np
import os
from collections import deque, namedtuple
import pygame
from enum import Enum
import torch.optim as optim
import torch.nn.functional as F
import os

# Initialize pygame
pygame.init()
# Define font style
font_style = pygame.font.SysFont("bahnschrift", 25)

class MoveDirection(Enum):
    EAST = 1
    WEST = 2
    NORTH = 3
    SOUTH = 4

Coordinate = namedtuple('Coordinate', 'x, y')

# RGB color definitions
COL_WHITE = (255, 255, 255)
COL_RED = (200, 0, 0)
COL_BLUE_PRIMARY = (0, 0, 255)
COL_BLUE_SECONDARY = (0, 100, 255)
COL_BLACK = (0, 0, 0)

TILE_SIZE = 20
GAME_SPEED = 40

class SnakeAI:

    def __init__(self, width=640, height=480):
        self.width = width
        self.height = height
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption('Snake AI')
        self.ticker = pygame.time.Clock()
        self.reset_game()

    def reset_game(self):
        self.heading = MoveDirection.EAST
        self.head = Coordinate(self.width / 2, self.height / 2)
        self.snake_body = [self.head,
                           Coordinate(self.head.x - TILE_SIZE, self.head.y),
                           Coordinate(self.head.x - 2 * TILE_SIZE, self.head.y)]
        self.score = 0
        self.food = None
        self.place_food()
        self.iterations = 0

    def place_food(self):
        x = random.randint(0, (self.width - TILE_SIZE) // TILE_SIZE) * TILE_SIZE
        y = random.randint(0, (self.height - TILE_SIZE) // TILE_SIZE) * TILE_SIZE
        self.food = Coordinate(x, y)
        if self.food in self.snake_body:
            self.place_food()

    def play_turn(self, choice):
        self.iterations += 1

        # Check for game quit event
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()

        # Move snake
        self.perform_move(choice)
        self.snake_body.insert(0, self.head)

        # Check for collision or timeout
        reward = 0
        game_end = False
        if self.check_collision() or self.iterations > 100 * len(self.snake_body):
            game_end = True
            reward = -10
            return reward, game_end, self.score

        # Check for food consumption
        if self.head == self.food:
            self.score += 1
            reward = 10
            self.place_food()
        else:
            self.snake_body.pop()

        # Refresh display
        self.refresh_display()
        self.ticker.tick(GAME_SPEED)

        return reward, game_end, self.score

    def check_collision(self, point=None):
        if point is None:
            point = self.head

        # Boundary collision
        if point.x > self.width - TILE_SIZE or point.x < 0 or point.y > self.height - TILE_SIZE or point.y < 0:
            return True
        # Self-collision
        if point in self.snake_body[1:]:
            return True
        return False

    def refresh_display(self):
        self.screen.fill(COL_BLACK)

        for part in self.snake_body:
            pygame.draw.rect(self.screen, COL_BLUE_PRIMARY, pygame.Rect(part.x, part.y, TILE_SIZE, TILE_SIZE))
            pygame.draw.rect(self.screen, COL_BLUE_SECONDARY, pygame.Rect(part.x + 4, part.y + 4, 12, 12))

        pygame.draw.rect(self.screen, COL_RED, pygame.Rect(self.food.x, self.food.y, TILE_SIZE, TILE_SIZE))

        score_text = font_style.render("Score: " + str(self.score), True, COL_WHITE)
        self.screen.blit(score_text, [0, 0])
        pygame.display.flip()

    def perform_move(self, choice):
        directions = [MoveDirection.EAST, MoveDirection.SOUTH, MoveDirection.WEST, MoveDirection.NORTH]
        idx = directions.index(self.heading)

        if np.array_equal(choice, [1, 0, 0]):
            new_heading = directions[idx]  # Keep same direction
        elif np.array_equal(choice, [0, 1, 0]):
            new_heading = directions[(idx + 1) % 4]  # Right turn
        else:  # [0, 0, 1]
            new_heading = directions[(idx - 1) % 4]  # Left turn

        self.heading = new_heading
        x, y = self.head.x, self.head.y
        if self.heading == MoveDirection.EAST:
            x += TILE_SIZE
        elif self.heading == MoveDirection.WEST:
            x -= TILE_SIZE
        elif self.heading == MoveDirection.SOUTH:
            y += TILE_SIZE
        elif self.heading == MoveDirection.NORTH:
            y -= TILE_SIZE

        self.head = Coordinate(x, y)

# Deep Q-Learning Model
class DeepQNet(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super(DeepQNet, self).__init__()
        self.layer1 = nn.Linear(input_dims, hidden_dims)
        self.layer2 = nn.Linear(hidden_dims, output_dims)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = self.layer2(x)
        return x

    def save(self, filename='model.pth'):
        model_dir = './model'
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        torch.save(self.state_dict(), os.path.join(model_dir, filename))

class QLearningTrainer:
    def __init__(self, net_model, learning_rate, discount):
        self.lr = learning_rate
        self.discount = discount
        self.model = net_model
        self.optimizer = optim.Adam(net_model.parameters(), lr=self.lr)
        self.loss_fn = nn.MSELoss()

    def train_iteration(self, current_state, action_taken, reward_received, next_state, game_done):
        current_state = torch.tensor(current_state, dtype=torch.float)
        next_state = torch.tensor(next_state, dtype=torch.float)
        action_taken = torch.tensor(action_taken, dtype=torch.long)
        reward_received = torch.tensor(reward_received, dtype=torch.float)

        if len(current_state.shape) == 1:
            current_state = torch.unsqueeze(current_state, 0)
            next_state = torch.unsqueeze(next_state, 0)
            action_taken = torch.unsqueeze(action_taken, 0)
            reward_received = torch.unsqueeze(reward_received, 0)
            game_done = (game_done,)

        predictions = self.model(current_state)
        target_values = predictions.clone()

        for idx in range(len(game_done)):
            Q_value = reward_received[idx]
            if not game_done[idx]:
                Q_value += self.discount * torch.max(self.model(next_state[idx]))
            target_values[idx][torch.argmax(action_taken[idx]).item()] = Q_value

        self.optimizer.zero_grad()
        loss = self.loss_fn(target_values, predictions)
        loss.backward()
        self.optimizer.step()

MAX_MEM = 100_000
BATCH = 1000
LEARN_RATE = 0.001

class DQNAgent:
    def __init__(self):
        self.games_played = 0
        self.epsilon = 0  # randomness factor
        self.gamma = 0.9  # discount factor
        self.memory = deque(maxlen=MAX_MEM)
        self.model = DeepQNet(11, 256, 3)
        self.trainer = QLearningTrainer(self.model, learning_rate=LEARN_RATE, discount=self.gamma)

    def extract_state(self, game):
        head = game.snake_body[0]
        left = Coordinate(head.x - TILE_SIZE, head.y)
        right = Coordinate(head.x + TILE_SIZE, head.y)
        up = Coordinate(head.x, head.y - TILE_SIZE)
        down = Coordinate(head.x, head.y + TILE_SIZE)

        dir_left = game.heading == MoveDirection.WEST
        dir_right = game.heading == MoveDirection.EAST
        dir_up = game.heading == MoveDirection.NORTH
        dir_down = game.heading == MoveDirection.SOUTH

        state = [
            (dir_right and game.check_collision(right)) or
            (dir_left and game.check_collision(left)) or
            (dir_up and game.check_collision(up)) or
            (dir_down and game.check_collision(down)),

            (dir_up and game.check_collision(right)) or
            (dir_down and game.check_collision(left)) or
            (dir_left and game.check_collision(up)) or
            (dir_right and game.check_collision(down)),

            (dir_down and game.check_collision(right)) or
            (dir_up and game.check_collision(left)) or
            (dir_right and game.check_collision(up)) or
            (dir_left and game.check_collision(down)),

            dir_left, dir_right, dir_up, dir_down,

            game.food.x < game.head.x,
            game.food.x > game.head.x,
            game.food.y < game.head.y,
            game.food.y > game.head.y
        ]

        return np.array(state, dtype=int)

    def store_experience(self, state, action, reward, next_state, game_done):
        self.memory.append((state, action, reward, next_state, game_done))

    def train_long_term(self):
        if len(self.memory) > BATCH:
            sample = random.sample(self.memory, BATCH)
        else:
            sample = self.memory

        states, actions, rewards, next_states, dones = zip(*sample)
        self.trainer.train_iteration(states, actions, rewards, next_states, dones)

    def train_short_term(self, state, action, reward, next_state, done):
        self.trainer.train_iteration(state, action, reward, next_state, done)

    def decide_action(self, state):
        self.epsilon = 80 - self.games_played
        final_move = [0, 0, 0]
        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state_tensor = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state_tensor)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move

def play_snake_ai():
    record = 0
    agent = DQNAgent()
    game = SnakeAI()
    while True:
        state = agent.extract_state(game)
        move = agent.decide_action(state)
        reward, done, score = game.play_turn(move)
        new_state = agent.extract_state(game)
        agent.train_short_term(state, move, reward, new_state, done)
        agent.store_experience(state, move, reward, new_state, done)

        if done:
            game.reset_game()
            agent.games_played += 1
            agent.train_long_term()

            if score > record:
                record = score
                agent.model.save()

            print('Game', agent.games_played, 'Score', score, 'Record:', record, 'Epsilon:', agent.epsilon)

if __name__ == '__main__':
    play_snake_ai()


  current_state = torch.tensor(current_state, dtype=torch.float)


Game 1 Score 0 Record: 0 Epsilon: 80
Game 2 Score 0 Record: 0 Epsilon: 79
Game 3 Score 0 Record: 0 Epsilon: 78
Game 4 Score 0 Record: 0 Epsilon: 77
Game 5 Score 0 Record: 0 Epsilon: 76
Game 6 Score 0 Record: 0 Epsilon: 75
Game 7 Score 0 Record: 0 Epsilon: 74
Game 8 Score 0 Record: 0 Epsilon: 73
Game 9 Score 0 Record: 0 Epsilon: 72
Game 10 Score 0 Record: 0 Epsilon: 71
Game 11 Score 0 Record: 0 Epsilon: 70
Game 12 Score 0 Record: 0 Epsilon: 69
Game 13 Score 0 Record: 0 Epsilon: 68
Game 14 Score 0 Record: 0 Epsilon: 67
Game 15 Score 0 Record: 0 Epsilon: 66
Game 16 Score 0 Record: 0 Epsilon: 65
Game 17 Score 0 Record: 0 Epsilon: 64
Game 18 Score 0 Record: 0 Epsilon: 63
Game 19 Score 1 Record: 1 Epsilon: 62
Game 20 Score 1 Record: 1 Epsilon: 61
Game 21 Score 0 Record: 1 Epsilon: 60
Game 22 Score 0 Record: 1 Epsilon: 59
Game 23 Score 0 Record: 1 Epsilon: 58
Game 24 Score 0 Record: 1 Epsilon: 57
Game 25 Score 0 Record: 1 Epsilon: 56
Game 26 Score 0 Record: 1 Epsilon: 55
Game 27 Score 0 Recor

error: display Surface quit

: 

In [1]:
#### final one!!!! some adjustment but works quite well than before!

import pygame
import time
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import deque
import os
import matplotlib.pyplot as plt
import heapq
# Pygame setup
pygame.init()
# Colors and window dimensions
blanc, jaune, noir, rouge, vert, bleu = (255, 255, 255), (255, 255, 102), (0, 0, 0), (213, 50, 80), (0, 255, 0), (50, 153, 213)
largeur_ecran, hauteur_ecran, taille_bloc, vitesse_serpent = 600, 400, 20, 40
fenetre = pygame.display.set_mode((largeur_ecran, hauteur_ecran))
pygame.display.set_caption("Snake Game - DQN")
horloge = pygame.time.Clock()
police = pygame.font.SysFont("bahnschrift", 25)

# Display score
def afficher_score(score):
    valeur = police.render("Score: " + str(score), True, jaune)
    fenetre.blit(valeur, [0, 0])

# Neural Network for DQN with two selectable architectures

class Linear_QNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, complex_model=False):
        super().__init__()
        
        # Define network architecture based on model complexity
        if complex_model:
            # Complex model with two hidden layers
            self.fc1 = nn.Linear(input_size, hidden_size)
            self.fc2 = nn.Linear(hidden_size, hidden_size * 2)
            self.fc3 = nn.Linear(hidden_size * 2, output_size)
            self.complex = True
        else:
            # Simple model with one hidden layer
            self.fc1 = nn.Linear(input_size, hidden_size)
            self.fc2 = nn.Linear(hidden_size, output_size)
            self.complex = False

    def forward(self, x):
        # First layer with ReLU activation
        x = F.relu(self.fc1(x))
        
        # Pass through additional layer if complex model
        if self.complex:
            x = F.relu(self.fc2(x))
            x = self.fc3(x)  # Output layer
        else:
            x = self.fc2(x)  # Output layer for simple model
        return x

    def save(self, file_name='model.pth'):
        # Save model weights to specified directory
        model_folder_path = './models'
        if not os.path.exists(model_folder_path):
            os.makedirs(model_folder_path)
        
        file_path = os.path.join(model_folder_path, file_name)
        torch.save(self.state_dict(), file_path)

# Q-learning Trainer Class
class QTrainer:
    def __init__(self, model, lr, gamma):
        self.model = model
        self.gamma = gamma
        self.optimizer = optim.Adam(model.parameters(), lr=lr)
        self.criterion = nn.MSELoss()

    def train_step(self, state, action, reward, next_state, done):
        # Convert inputs to tensors, adding batch dimension if needed
        state = torch.FloatTensor(state).unsqueeze(0) if len(state.shape) == 1 else torch.FloatTensor(state)
        next_state = torch.FloatTensor(next_state).unsqueeze(0) if len(next_state.shape) == 1 else torch.FloatTensor(next_state)
        action = torch.LongTensor([action]) if isinstance(action, int) else torch.LongTensor(action)
        reward = torch.FloatTensor([reward]) if isinstance(reward, (int, float)) else torch.FloatTensor(reward)
        done = torch.FloatTensor([done]) if isinstance(done, bool) else torch.FloatTensor(done)

        # Predicted Q values for current state-action pairs
        pred = self.model(state).gather(1, action.view(-1, 1))

        # Calculate target Q values
        with torch.no_grad():
            next_q_values = self.model(next_state).max(1)[0]
            target_q_values = reward + (1 - done) * self.gamma * next_q_values
        
        # Compute loss
        loss = self.criterion(pred, target_q_values.unsqueeze(1))

        # Backpropagation
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()


# Paramètres pour le DQN (modifiés)
HIDDEN_SIZE = 128  # Increased hidden size
OUTPUT_SIZE = 4
BATCH_SIZE = 64  # Increased batch size
MEMORY_SIZE = 200000  # Increased memory size
UPDATE_TARGET_EVERY = 4  # Update target network less frequently
START_TRAINING_THRESHOLD = BATCH_SIZE * 10
MAX_EPISODES = 1000
# Agent with model, training, and actions
class AIAgent:
    def __init__(self, input_size, hidden_size, output_size, gamma=0.99, lr=0.001, epsilon_start=1.0, epsilon_end=0.01, epsilon_decay=0.9, complex_model=False, max_episodes=1000, batch_size=128):
        self.n_games = 0
        self.epsilon = epsilon_start
        self.epsilon_start = epsilon_start
        self.epsilon_end = epsilon_end
        self.epsilon_decay = epsilon_decay
        self.gamma = gamma
        self.memory = deque(maxlen=200000)
        
        # Model initialization
        self.model = Linear_QNet(input_size, hidden_size, output_size)
        self.target_model = Linear_QNet(input_size, hidden_size, output_size)
        self.target_model.load_state_dict(self.model.state_dict())
        self.trainer = QTrainer(self.model, lr=lr, gamma=gamma)
        
        # Tracking episodes and batch size for training
        self.games_played = 0
        self.episode_memory = []
        self.max_episodes = max_episodes
        self.batch_size = batch_size

    def get_state(self, snake, food):
        head = snake[-1]
        point_l = [head[0] - taille_bloc, head[1]]
        point_r = [head[0] + taille_bloc, head[1]]
        point_u = [head[0], head[1] - taille_bloc]
        point_d = [head[0], head[1] + taille_bloc]
        
        dir_l = head[0] > snake[-2][0] if len(snake) > 1 else False
        dir_r = head[0] < snake[-2][0] if len(snake) > 1 else False
        dir_u = head[1] > snake[-2][1] if len(snake) > 1 else False
        dir_d = head[1] < snake[-2][1] if len(snake) > 1 else False

        state = [
            # Danger straight
            (dir_r and self.is_collision(point_r, snake)) or 
            (dir_l and self.is_collision(point_l, snake)) or 
            (dir_u and self.is_collision(point_u, snake)) or 
            (dir_d and self.is_collision(point_d, snake)),

            # Danger right
            (dir_u and self.is_collision(point_r, snake)) or 
            (dir_d and self.is_collision(point_l, snake)) or 
            (dir_l and self.is_collision(point_u, snake)) or 
            (dir_r and self.is_collision(point_d, snake)),

            # Danger left
            (dir_d and self.is_collision(point_r, snake)) or 
            (dir_u and self.is_collision(point_l, snake)) or 
            (dir_r and self.is_collision(point_u, snake)) or 
            (dir_l and self.is_collision(point_d, snake)),
            
            # Move direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,
            
            # Food location 
            food[0] < head[0],  # food left
            food[0] > head[0],  # food right
            food[1] < head[1],  # food up
            food[1] > head[1]   # food down
        ]

        return np.array(state, dtype=int)

    def is_collision(self, point, snake):
        if point[0] < 0 or point[0] >= largeur_ecran or point[1] < 0 or point[1] >= hauteur_ecran:
            return True
        if point in snake[:-1]:
            return True
        return False
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def train_long_memory(self):
        if len(self.memory) < self.batch_size:
            return
        
        batch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        
        # Train on batch
        for state, action, reward, next_state, done in zip(states, actions, rewards, next_states, dones):
            self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        self.epsilon = 80 - self.games_played
        if random.randint(0, 200) < self.epsilon:
            return random.randint(0, 2)
        else:
            state_tensor = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state_tensor)
            return torch.argmax(prediction).item()

   
    def store_episode(self, state, action, reward, next_state, done):
        self.episode_memory.append((state, action, reward, next_state, done))

    def train_on_episode(self):
        for experience in self.episode_memory:
            self.remember(*experience)
        
        # Train only if enough memory is stored
        if len(self.memory) >= self.batch_size:
            self.train_long_memory()
        
        # Clear episode memory after training
        self.episode_memory.clear()

# RGB color values
WHITE = (255, 255, 255)
RED = (200,0,0)
BLUE1 = (0, 0, 255)
BLUE2 = (0, 100, 255)
BLACK = (0,0,0)
def jeu():
    agent = AIAgent(11, HIDDEN_SIZE, OUTPUT_SIZE, complex_model=True)  
    nb_episodes, total_score, high_score = 0, 0, 0
    scores, avg_scores = [], []
    
    while nb_episodes < MAX_EPISODES:
        game_over = False
        score = 0
        serpent = [[largeur_ecran / 2, hauteur_ecran / 2]]
        x_nourriture, y_nourriture = round(random.randrange(0, largeur_ecran - taille_bloc) / taille_bloc) * taille_bloc, round(random.randrange(0, hauteur_ecran - taille_bloc) / taille_bloc) * taille_bloc
        steps_without_food = 0

        while not game_over:
            # Get the state and decide on an action
            state = agent.get_state(serpent, (x_nourriture, y_nourriture))
            action = agent.get_action(state)

            # Move snake based on action
            x_change, y_change = [(0, -taille_bloc), (0, taille_bloc), (-taille_bloc, 0), (taille_bloc, 0)][action]
            nouvelle_tete = [serpent[-1][0] + x_change, serpent[-1][1] + y_change]
            serpent.append(nouvelle_tete)

            # Check for game over
            if (nouvelle_tete[0] < 0 or nouvelle_tete[0] >= largeur_ecran or
                nouvelle_tete[1] < 0 or nouvelle_tete[1] >= hauteur_ecran or
                nouvelle_tete in serpent[:-1]):
                game_over = True
                reward = -10
            else:
                reward = -0.01  # Small survival reward

                # Check for food consumption
                if nouvelle_tete[0] == x_nourriture and nouvelle_tete[1] == y_nourriture:
                    reward = 50  # Eating food reward
                    score += 1
                    x_nourriture = round(random.randrange(0, largeur_ecran - taille_bloc) / taille_bloc) * taille_bloc
                    y_nourriture = round(random.randrange(0, hauteur_ecran - taille_bloc) / taille_bloc) * taille_bloc
                    steps_without_food = 0
                else:
                    serpent.pop(0)  # Snake moves without growing

                    # Reward adjustment for distance to food
                    if len(serpent) > 1:
                        distance_before = np.linalg.norm(np.array(serpent[-2]) - np.array([x_nourriture, y_nourriture]))
                    else:
                        distance_before = np.linalg.norm(np.array(serpent[-1]) - np.array([x_nourriture, y_nourriture]))
                    distance_after = np.linalg.norm(np.array(nouvelle_tete) - np.array([x_nourriture, y_nourriture]))
                    reward += 10 if distance_after < distance_before else -0.5
                    # distance_before = np.linalg.norm(np.array(serpent[-2]) - np.array([x_nourriture, y_nourriture]))
                    # distance_after = np.linalg.norm(np.array(nouvelle_tete) - np.array([x_nourriture, y_nourriture]))
                    # reward += 10 if distance_after < distance_before else -0.5
                    reward += 0.1  # Additional survival reward

                    steps_without_food += 1
                    if steps_without_food > 100:  # Penalize long survival without eating
                        reward -= 1
                        steps_without_food = 0

            # Prepare for the next step
            next_state = agent.get_state(serpent, (x_nourriture, y_nourriture))
            agent.store_episode(state, action, reward, next_state, game_over)

            # Update Pygame window
            fenetre.fill(BLACK)
            pygame.draw.rect(fenetre, RED, [x_nourriture, y_nourriture, taille_bloc, taille_bloc])
            for bloc in serpent:
                pygame.draw.rect(fenetre, WHITE, [bloc[0], bloc[1], taille_bloc, taille_bloc])
            afficher_score(score)
            pygame.display.update()
            horloge.tick(vitesse_serpent)

        # Training and score tracking
        agent.train_on_episode()
        nb_episodes += 1
        agent.games_played+=1
        total_score += score
        high_score = max(high_score, score)
        avg_score = total_score / nb_episodes
        scores.append(score)
        avg_scores.append(avg_score)

        # Update epsilon for exploration-exploitation balance
        # agent.update_epsilon(nb_episodes)

        print(f"Episode: {nb_episodes}/{MAX_EPISODES}, Score: {score}, Avg Score: {avg_score:.2f}, High Score: {high_score}, Epsilon: {agent.epsilon:.4f}")

        # Sync target network and save model periodically
        if nb_episodes % UPDATE_TARGET_EVERY == 0:
            agent.target_model.load_state_dict(agent.model.state_dict())
        if nb_episodes % 100 == 0:
            agent.model.save(f'snake_dqn_model_episode_{nb_episodes}.pth')
    
    pygame.quit()
    agent.plot_history(scores, avg_scores)
    print(f"Training completed. Total episodes: {nb_episodes}, Final Avg Score: {avg_score:.2f}, High Score: {high_score}")

# Run the game
jeu()

pygame 2.6.1 (SDL 2.28.4, Python 3.9.15)
Hello from the pygame community. https://www.pygame.org/contribute.html
Episode: 1/1000, Score: 0, Avg Score: 0.00, High Score: 0, Epsilon: 80.0000
Episode: 2/1000, Score: 1, Avg Score: 0.50, High Score: 1, Epsilon: 79.0000
Episode: 3/1000, Score: 0, Avg Score: 0.33, High Score: 1, Epsilon: 78.0000
Episode: 4/1000, Score: 0, Avg Score: 0.25, High Score: 1, Epsilon: 77.0000
Episode: 5/1000, Score: 1, Avg Score: 0.40, High Score: 1, Epsilon: 76.0000
Episode: 6/1000, Score: 0, Avg Score: 0.33, High Score: 1, Epsilon: 75.0000
Episode: 7/1000, Score: 0, Avg Score: 0.29, High Score: 1, Epsilon: 74.0000
Episode: 8/1000, Score: 0, Avg Score: 0.25, High Score: 1, Epsilon: 73.0000
Episode: 9/1000, Score: 0, Avg Score: 0.22, High Score: 1, Epsilon: 72.0000
Episode: 10/1000, Score: 1, Avg Score: 0.30, High Score: 1, Epsilon: 71.0000
Episode: 11/1000, Score: 1, Avg Score: 0.36, High Score: 1, Epsilon: 70.0000
Episode: 12/1000, Score: 0, Avg Score: 0.33, High

: 

In [1]:
import torch
import random
import numpy as np
from collections import deque

In [2]:
import pygame
import random
from enum import Enum
from collections import namedtuple
import numpy as np

pygame.init()
# font = pygame.font.Font('arial.ttf', 25)
#font = pygame.font.SysFont('arial', 25)
police = pygame.font.SysFont("bahnschrift", 25)

class Direction(Enum):
    RIGHT = 1
    LEFT = 2
    UP = 3
    DOWN = 4

Point = namedtuple('Point', 'x, y')

# rgb colors
WHITE = (255, 255, 255)
RED = (200,0,0)
BLUE1 = (0, 0, 255)
BLUE2 = (0, 100, 255)
BLACK = (0,0,0)

BLOCK_SIZE = 20
SPEED = 40

class SnakeGameAI:

    def __init__(self, w=640, h=480):
        self.w = w
        self.h = h
        # init display
        self.display = pygame.display.set_mode((self.w, self.h))
        pygame.display.set_caption('Snake')
        self.clock = pygame.time.Clock()
        self.reset()


    def reset(self):
        # init game state
        self.direction = Direction.RIGHT

        self.head = Point(self.w/2, self.h/2)
        self.snake = [self.head,
                      Point(self.head.x-BLOCK_SIZE, self.head.y),
                      Point(self.head.x-(2*BLOCK_SIZE), self.head.y)]

        self.score = 0
        self.food = None
        self._place_food()
        self.frame_iteration = 0


    def _place_food(self):
        x = random.randint(0, (self.w-BLOCK_SIZE )//BLOCK_SIZE )*BLOCK_SIZE
        y = random.randint(0, (self.h-BLOCK_SIZE )//BLOCK_SIZE )*BLOCK_SIZE
        self.food = Point(x, y)
        if self.food in self.snake:
            self._place_food()


    def play_step(self, action):
        self.frame_iteration += 1
        # 1. collect user input
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        
        # 2. move
        self._move(action) # update the head
        self.snake.insert(0, self.head)
        
        # 3. check if game over
        reward = 0
        game_over = False
        if self.is_collision() or self.frame_iteration > 100*len(self.snake):
            game_over = True
            reward = -10
            return reward, game_over, self.score

        # 4. place new food or just move
        if self.head == self.food:
            self.score += 1
            reward = 10
            self._place_food()
        else:
            self.snake.pop()
        
        # 5. update ui and clock
        self._update_ui()
        self.clock.tick(SPEED)
        # 6. return game over and score
        return reward, game_over, self.score


    def is_collision(self, pt=None):
        if pt is None:
            pt = self.head
        # hits boundary
        if pt.x > self.w - BLOCK_SIZE or pt.x < 0 or pt.y > self.h - BLOCK_SIZE or pt.y < 0:
            return True
        # hits itself
        if pt in self.snake[1:]:
            return True

        return False


    def _update_ui(self):
        self.display.fill(BLACK)

        for pt in self.snake:
            pygame.draw.rect(self.display, BLUE1, pygame.Rect(pt.x, pt.y, BLOCK_SIZE, BLOCK_SIZE))
            pygame.draw.rect(self.display, BLUE2, pygame.Rect(pt.x+4, pt.y+4, 12, 12))

        pygame.draw.rect(self.display, RED, pygame.Rect(self.food.x, self.food.y, BLOCK_SIZE, BLOCK_SIZE))

        text = police.render("Score: " + str(self.score), True, WHITE)
        
        self.display.blit(text, [0, 0])
        pygame.display.flip()


    def _move(self, action):
        # [straight, right, left]

        clock_wise = [Direction.RIGHT, Direction.DOWN, Direction.LEFT, Direction.UP]
        idx = clock_wise.index(self.direction)

        if np.array_equal(action, [1, 0, 0]):
            new_dir = clock_wise[idx] # no change
        elif np.array_equal(action, [0, 1, 0]):
            next_idx = (idx + 1) % 4
            new_dir = clock_wise[next_idx] # right turn r -> d -> l -> u
        else: # [0, 0, 1]
            next_idx = (idx - 1) % 4
            new_dir = clock_wise[next_idx] # left turn r -> u -> l -> d

        self.direction = new_dir

        x = self.head.x
        y = self.head.y
        if self.direction == Direction.RIGHT:
            x += BLOCK_SIZE
        elif self.direction == Direction.LEFT:
            x -= BLOCK_SIZE
        elif self.direction == Direction.DOWN:
            y += BLOCK_SIZE
        elif self.direction == Direction.UP:
            y -= BLOCK_SIZE

        self.head = Point(x, y)

pygame 2.6.1 (SDL 2.28.4, Python 3.9.15)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import os


class Linear_QNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = self.linear2(x)
        return x

    def save(self, file_name='model.pth'):
        model_folder_path = './model'
        if not os.path.exists(model_folder_path):
            os.makedirs(model_folder_path)

        file_name = os.path.join(model_folder_path, file_name)
        torch.save(self.state_dict(), file_name)


class QTrainer:
    def __init__(self, model, lr, gamma):
        self.lr = lr
        self.gamma = gamma
        self.model = model
        self.optimizer = optim.Adam(model.parameters(), lr=self.lr)
        self.criterion = nn.MSELoss()

    def train_step(self, state, action, reward, next_state, done):
        state = torch.tensor(state, dtype=torch.float)
        next_state = torch.tensor(next_state, dtype=torch.float)
        action = torch.tensor(action, dtype=torch.long)
        reward = torch.tensor(reward, dtype=torch.float)
        # (n, x)

        if len(state.shape) == 1:
            # (1, x)
            state = torch.unsqueeze(state, 0)
            next_state = torch.unsqueeze(next_state, 0)
            action = torch.unsqueeze(action, 0)
            reward = torch.unsqueeze(reward, 0)
            done = (done, )

        # 1: predicted Q values with current state
        pred = self.model(state)

        target = pred.clone()
        for idx in range(len(done)):
            Q_new = reward[idx]
            if not done[idx]:
                Q_new = reward[idx] + self.gamma * torch.max(self.model(next_state[idx]))

            target[idx][torch.argmax(action[idx]).item()] = Q_new
    
        # 2: Q_new = r + y * max(next_predicted Q value) -> only do this if not done
        # pred.clone()
        # preds[argmax(action)] = Q_new
        self.optimizer.zero_grad()
        loss = self.criterion(target, pred)
        loss.backward()

        self.optimizer.step()

In [4]:

from collections import deque
MAX_MEMORY = 100_000
BATCH_SIZE = 1000
LR = 0.001

class Agent:

    def __init__(self):
        self.n_games = 0
        self.epsilon = 0 # randomness
        self.gamma = 0.9 # discount rate
        self.memory = deque(maxlen=MAX_MEMORY) # popleft()
        self.model = Linear_QNet(11, 256, 3)
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)


    def get_state(self, game):
        head = game.snake[0]
        point_l = Point(head.x - 20, head.y)
        point_r = Point(head.x + 20, head.y)
        point_u = Point(head.x, head.y - 20)
        point_d = Point(head.x, head.y + 20)
        
        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        state = [
            # Danger straight
            (dir_r and game.is_collision(point_r)) or 
            (dir_l and game.is_collision(point_l)) or 
            (dir_u and game.is_collision(point_u)) or 
            (dir_d and game.is_collision(point_d)),

            # Danger right
            (dir_u and game.is_collision(point_r)) or 
            (dir_d and game.is_collision(point_l)) or 
            (dir_l and game.is_collision(point_u)) or 
            (dir_r and game.is_collision(point_d)),

            # Danger left
            (dir_d and game.is_collision(point_r)) or 
            (dir_u and game.is_collision(point_l)) or 
            (dir_r and game.is_collision(point_u)) or 
            (dir_l and game.is_collision(point_d)),
            
            # Move direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,
            
            # Food location 
            game.food.x < game.head.x,  # food left
            game.food.x > game.head.x,  # food right
            game.food.y < game.head.y,  # food up
            game.food.y > game.head.y  # food down
            ]

        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done)) # popleft if MAX_MEMORY is reached

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE) # list of tuples
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        # random moves: tradeoff exploration / exploitation
        self.epsilon = 80 - self.n_games
        final_move = [0,0,0]
        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move


def train():
    record = 0
    agent = Agent()
    game = SnakeGameAI()
    while True:
        # get old state
        state_old = agent.get_state(game)

        # get move
        final_move = agent.get_action(state_old)

        # perform move and get new state
        reward, done, score = game.play_step(final_move)
        state_new = agent.get_state(game)

        # train short memory
        agent.train_short_memory(state_old, final_move, reward, state_new, done)

        # remember
        agent.remember(state_old, final_move, reward, state_new, done)

        if done:
            # train long memory, plot result
            game.reset()
            agent.n_games += 1
            agent.train_long_memory()

            if score > record:
                record = score
                agent.model.save()

            print('Game', agent.n_games, 'Score', score, 'Record:', record,'Epsilon:' ,agent.epsilon)



if __name__ == '__main__':
    train()

  state = torch.tensor(state, dtype=torch.float)


Game 1 Score 0 Record: 0 Epsilon: 80
Game 2 Score 0 Record: 0 Epsilon: 79
Game 3 Score 0 Record: 0 Epsilon: 78
Game 4 Score 0 Record: 0 Epsilon: 77
Game 5 Score 0 Record: 0 Epsilon: 76
Game 6 Score 0 Record: 0 Epsilon: 75
Game 7 Score 1 Record: 1 Epsilon: 74
Game 8 Score 0 Record: 1 Epsilon: 73
Game 9 Score 0 Record: 1 Epsilon: 72
Game 10 Score 0 Record: 1 Epsilon: 71
Game 11 Score 0 Record: 1 Epsilon: 70
Game 12 Score 1 Record: 1 Epsilon: 69
Game 13 Score 1 Record: 1 Epsilon: 68
Game 14 Score 0 Record: 1 Epsilon: 67
Game 15 Score 1 Record: 1 Epsilon: 66
Game 16 Score 0 Record: 1 Epsilon: 65
Game 17 Score 0 Record: 1 Epsilon: 64
Game 18 Score 1 Record: 1 Epsilon: 63
Game 19 Score 1 Record: 1 Epsilon: 62
Game 20 Score 1 Record: 1 Epsilon: 61
Game 21 Score 1 Record: 1 Epsilon: 60
Game 22 Score 1 Record: 1 Epsilon: 59
Game 23 Score 0 Record: 1 Epsilon: 58
Game 24 Score 1 Record: 1 Epsilon: 57
Game 25 Score 3 Record: 3 Epsilon: 56
Game 26 Score 0 Record: 3 Epsilon: 55
Game 27 Score 3 Recor

error: display Surface quit

: 

In [None]:
import pygame
import random
import numpy as np

class SnakeEnv:
    def __init__(self):
        pygame.init()
        self.largeur_ecran = 600
        self.hauteur_ecran = 400
        self.taille_bloc = 10
        self.vitesse_serpent = 15
        self.fenetre = pygame.display.set_mode((self.largeur_ecran, self.hauteur_ecran))
        pygame.display.set_caption("Snake AI")
        self.horloge = pygame.time.Clock()

        # Définir les actions possibles : [0: Gauche, 1: Droite, 2: Haut, 3: Bas]
        self.actions = [0, 1, 2, 3]

        self.reset()

    def reset(self):
        """ Réinitialiser l'état du jeu """
        self.x = self.largeur_ecran / 2
        self.y = self.hauteur_ecran / 2
        self.x_changement = 0
        self.y_changement = 0
        self.serpent = [[self.x, self.y]]
        self.longueur_serpent = 1
        self.nourriture = self.generer_nourriture()
        self.score = 0
        return self.get_state()

    def generer_nourriture(self):
        """ Générer une nouvelle position de nourriture """
        return [round(random.randrange(0, self.largeur_ecran - self.taille_bloc) / 10.0) * 10.0,
                round(random.randrange(0, self.hauteur_ecran - self.taille_bloc) / 10.0) * 10.0]

    def get_state(self):
        """ Retourner un vecteur d'état représentant la situation actuelle """
        return np.array([self.x, self.y, self.nourriture[0], self.nourriture[1]])

    def step(self, action):
        """ Faire avancer le jeu d'un pas en fonction de l'action choisie """
        # Changer la direction en fonction de l'action
        if action == 0:  # Gauche
            self.x_changement = -self.taille_bloc
            self.y_changement = 0
        elif action == 1:  # Droite
            self.x_changement = self.taille_bloc
            self.y_changement = 0
        elif action == 2:  # Haut
            self.y_changement = -self.taille_bloc
            self.x_changement = 0
        elif action == 3:  # Bas
            self.y_changement = self.taille_bloc
            self.x_changement = 0

        # Mettre à jour la position du serpent
        self.x += self.x_changement
        self.y += self.y_changement
        self.serpent.append([self.x, self.y])

        if len(self.serpent) > self.longueur_serpent:
            del self.serpent[0]

        # Vérifier la collision avec les bords ou avec soi-même
        done = False
        if self.x >= self.largeur_ecran or self.x < 0 or self.y >= self.hauteur_ecran or self.y < 0:
            done = True
        for bloc in self.serpent[:-1]:
            if bloc == [self.x, self.y]:
                done = True

        # Gestion de la nourriture
        reward = 0
        if self.x == self.nourriture[0] and self.y == self.nourriture[1]:
            self.nourriture = self.generer_nourriture()
            self.longueur_serpent += 1
            reward = 10  # Récompense pour avoir mangé la nourriture
            self.score += 1

        # Faible pénalité pour chaque mouvement 
        reward -= 0.1

        return self.get_state(), reward, done

    def render(self):
        """ Dessiner l'état actuel du jeu """
        self.fenetre.fill((0, 0, 0))  # Fond noir
        # Dessiner la nourriture
        pygame.draw.rect(self.fenetre, (0, 255, 0), [self.nourriture[0], self.nourriture[1], self.taille_bloc, self.taille_bloc])
        # Dessiner le serpent
        for bloc in self.serpent:
            pygame.draw.rect(self.fenetre, (255, 255, 255), [bloc[0], bloc[1], self.taille_bloc, self.taille_bloc])

        pygame.display.update()
        self.horloge.tick(self.vitesse_serpent)




In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from collections import deque
import random

class DQN:
    def __init__(self, env):
        self.env = env
        self.memory = deque(maxlen=2000)
        
        # Hyperparamètres de DQN
        self.gamma = 0.95  # Facteur de discount
        self.epsilon = 1.0  # Facteur d'exploration
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.batch_size = 64

        # Construction du modèle après initialisation des hyperparamètres
        self.model = self.build_model()

    def build_model(self):
        """Construit le modèle de réseau neuronal pour l'agent DQN"""
        model = Sequential()
        model.add(Dense(24, input_shape=(4,), activation='relu'))  # Correction de input_shape
        model.add(Dense(24, activation='relu'))
        model.add(Dense(4, activation='linear'))  # 4 sorties pour chaque action possible
        
        # Assure que la variable learning_rate est bien utilisée ici
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))  # Correction
        return model

    def remember(self, state, action, reward, next_state, done):
        """Enregistre une expérience dans la mémoire"""
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        """Choisit une action selon epsilon-greedy"""
        if np.random.rand() <= self.epsilon:
            return random.choice(self.env.actions)  # Exploration
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])  # Exploitation

    def replay(self):
        """Entraîne le modèle sur un échantillon de la mémoire"""
        if len(self.memory) < self.batch_size:
            return
        
        # Sélectionne un minibatch aléatoire
       


In [4]:
env = SnakeEnv()  # Remplace par ton environnement Snake
agent = DQN(env)

episodes = 1000 # Nombre d'épisodes d'entraînement
batch_size = 32  # Taille des lots pour l'entraînement

for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, 4])

    for time in range(500):  # Limite de temps pour chaque épisode
        action = agent.act(state)  # Choisir une action
        next_state, reward, done = env.step(action)  # Exécuter l'action
        reward = reward if not done else -10  # Pénalité en cas de défaite
        next_state = np.reshape(next_state, [1, 4])
        
        agent.remember(state, action, reward, next_state, done)  # Mémoriser la transition
        state = next_state
        
        if done:
            print(f"Episode {e}/{episodes} - Score : {time}")
            break
        
        if len(agent.memory) > batch_size:
            agent.replay()  # Entraîner le modèle avec un batch


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Episode 1/1000 - Score : 85
Episode 4/1000 - Score : 216
Episode 5/1000 - Score : 355
Episode 6/1000 - Score : 234
Episode 10/1000 - Score : 200
Episode 11/1000 - Score : 469
Episode 16/1000 - Score : 447
Episode 20/1000 - Score : 332
Episode 22/1000 - Score : 128
Episode 24/1000 - Score : 474
Episode 26/1000 - Score : 496
Episode 28/1000 - Score : 304
Episode 30/1000 - Score : 138
Episode 31/1000 - Score : 421
Episode 32/1000 - Score : 484
Episode 34/1000 - Score : 133
Episode 38/1000 - Score : 368
Episode 40/1000 - Score : 227
Episode 41/1000 - Score : 329
Episode 42/1000 - Score : 350
Episode 44/1000 - Score : 394
Episode 47/1000 - Score : 177
Episode 58/1000 - Score : 142
Episode 60/1000 - Score : 368
Episode 61/1000 - Score : 332
Episode 67/1000 - Score : 479
Episode 68/1000 - Score : 339
Episode 70/1000 - Score : 413
Episode 71/1000 - Score : 396
Episode 74/1000 - Score : 402
Episode 77/1000 - Score : 316
Episode 79/1000 - Score : 446
Episode 80/1000 - Score : 473
Episode 85/1000

In [5]:
import pygame  # Assurez-vous d'importer pygame

def play_snake(env, agent, episodes=1, render=True):
    """Fait jouer l'agent au jeu Snake."""
    for episode in range(episodes):
        state = env.reset()  # Réinitialiser l'environnement
        state = np.reshape(state, [1, 4])  # Ajuster la forme de l'état
        
        done = False
        total_reward = 0
        
        while not done:
            if render:
                env.render()  # Affiche le jeu à chaque étape
            
            # L'agent choisit une action
            action = agent.act(state)
            
            # Exécute cette action dans l'environnement
            next_state, reward, done = env.step(action)
            next_state = np.reshape(next_state, [1, 4])
            
            # Passe à l'état suivant
            state = next_state
            total_reward += reward
            
            # Si le jeu est terminé (le serpent est mort)
            if done:
                print(f"Episode: {episode+1}, Score: {total_reward}")
                break

    # Fermer Pygame correctement
    pygame.quit()  # Ajoutez cette ligne pour fermer Pygame


play_snake(env, agent, episodes=5, render=True)


KeyboardInterrupt: 

: 