In [1]:
import gym
import numpy as np
import pygame  # Ensure Pygame is initialized
import os

# Initialize pygame to avoid rendering issues
pygame.init()

# Optional: Use this if running on headless systems or remote setups
# os.environ["SDL_VIDEODRIVER"] = "dummy"

# Initialize the Taxi environment
env = gym.make("Taxi-v3", render_mode="human")


# Initialize the Q-table with zeros
q_table = np.zeros([env.observation_space.n, env.action_space.n])

# Hyperparameters
alpha = 0.1      # Learning rate
gamma = 0.6      # Discount factor
epsilon = 0.1    # Exploration-exploitation trade-off
num_episodes = 1000  # Number of episodes for training

# Training the agent
for episode in range(num_episodes):
    state, _ = env.reset() if isinstance(env.reset(), tuple) else (env.reset(), None)
    done = False

    while not done:
        state = int(state)  # Ensure state is an integer

        # Choose action (explore vs exploit)
        if np.random.uniform(0, 1) < epsilon:
            action = env.action_space.sample()  # Explore
        else:
            action = np.argmax(q_table[state])  # Exploit

        # Perform the action and get the next state and reward
        next_state, reward, done, info = env.step(action)


        # Update the Q-table
        old_value = q_table[state, action]
        next_max = np.max(q_table[next_state])
        new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
        q_table[state, action] = new_value

        state = next_state  # Move to the next state

print("Training completed.")

# Test the trained agent
state, _ = env.reset() if isinstance(env.reset(), tuple) else (env.reset(), None)
env.render()  # Initial render for graphical view

done = False
total_reward = 0

while not done:
    state = int(state)  # Ensure state is an integer

    action = np.argmax(q_table[state])  # Select best action
    next_state, reward, done, info = env.step(action)
    total_reward += reward

    # Use custom rendering by capturing the environmentâ€™s output
    output = env.render()  # Render the environment state
    print(output)  # Print the rendered environment

    state = next_state  # Update state

print(f"Total reward: {total_reward}")


  from pkg_resources import resource_stream, resource_exists
  deprecation(
  deprecation(
  if not isinstance(terminated, (bool, np.bool8)):


: 