In [5]:
import numpy as np
import gymnasium as gym
from collections import defaultdict
import ale_py
import random
from tqdm import tqdm

class QLearningAgent:
    def __init__(self, state_space, action_space, learning_rate=0.1, discount=0.99, epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.1):
        self.action_space = action_space
        self.learning_rate = learning_rate
        self.discount = discount
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        
        # Initialize Q-table as a defaultdict
        self.q_table = defaultdict(lambda: np.zeros(action_space))
    
    def get_action(self, state):
        if np.random.rand() < self.epsilon:
            return random.randrange(self.action_space)  # Exploration
        return np.argmax(self.q_table[state])  # Exploitation
    
    def update_q_table(self, state, action, reward, next_state, done):
        max_future_q = 0 if done else np.max(self.q_table[next_state])
        target = reward + self.discount * max_future_q
        self.q_table[state][action] += self.learning_rate * (target - self.q_table[state][action])
    
    def decay_epsilon(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

class PacmanQLearning:
    def __init__(self, env_name='ALE/Pacman-v5', view='human'):
        self.env = gym.make(env_name, frameskip=1, render_mode=view)
        self.env = gym.wrappers.AtariPreprocessing(self.env, frame_skip=4, grayscale_obs=True, screen_size=84)
        self.env.reset()
        
        self.state_space = (84, 84)  # Simplified state representation
        self.action_space = self.env.action_space.n
        
        self.agent = QLearningAgent(self.state_space, self.action_space)
    
    def preprocess_state(self, state):
        return tuple(state.flatten())  # Flatten the state for use as a key in the Q-table
    
    def train(self, episodes=1000, statistics=True, save_path='./'):
        scores = []
        
        for e in tqdm(range(episodes)):
            state, _ = self.env.reset()
            state = self.preprocess_state(state)
            
            done = False
            score = 0
            
            while not done:
                action = self.agent.get_action(state)
                next_state, reward, done, _, _ = self.env.step(action)
                next_state = self.preprocess_state(next_state)
                
                # Update Q-table
                self.agent.update_q_table(state, action, reward, next_state, done)
                
                state = next_state
                score += reward
            
            # Decay epsilon after each episode
            self.agent.decay_epsilon()
            
            # Logging
            scores.append(score)
            
            if statistics:
                print(f"Episode: {e+1}/{episodes}, Score: {score}, Epsilon: {self.agent.epsilon:.3f}")
        
        # Save Q-table for future use
        np.save(save_path + 'q_table.npy', dict(self.agent.q_table))
        print("Training complete. Q-table saved.")
    
    def play(self, episodes=1):
        for episode in range(episodes):
            state, _ = self.env.reset()
            state = self.preprocess_state(state)
            
            done = False
            score = 0
            
            print(f"Starting Episode {episode + 1}")
            
            while not done:
                self.env.render()  # Render the environment
                action = self.agent.get_action(state)  # Use the agent's policy
                next_state, reward, done, _, _ = self.env.step(action)
                next_state = self.preprocess_state(next_state)
                
                state = next_state
                score += reward
            
            print(f"Episode {episode + 1} ended with score: {score}")
        self.env.close()

# Initialize Pacman Q-learning
pacman = PacmanQLearning()

# Train the agent
pacman.train(episodes=5, statistics=True, save_path='./')

# Load the Q-table (if saved previously)
try:
    q_table_data = np.load('./q_table.npy', allow_pickle=True).item()
    pacman.agent.q_table.update(q_table_data)
    print("Q-table loaded successfully.")
except FileNotFoundError:
    print("No Q-table found. Starting fresh.")

# Watch Pac-Man play
pacman.play(episodes=5)


 20%|██        | 1/5 [00:21<01:27, 21.86s/it]

Episode: 1/5, Score: 20.0, Epsilon: 0.995


 40%|████      | 2/5 [00:47<01:11, 23.82s/it]

Episode: 2/5, Score: 21.0, Epsilon: 0.990


 60%|██████    | 3/5 [01:12<00:49, 24.58s/it]

Episode: 3/5, Score: 19.0, Epsilon: 0.985


 80%|████████  | 4/5 [01:31<00:22, 22.55s/it]

Episode: 4/5, Score: 7.0, Epsilon: 0.980


100%|██████████| 5/5 [01:49<00:00, 21.87s/it]

Episode: 5/5, Score: 10.0, Epsilon: 0.975





Training complete. Q-table saved.
Q-table loaded successfully.
Starting Episode 1


KeyboardInterrupt: 

In [None]:
#replay the game