In [10]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pygame
import random
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# Cria o jogo:

In [106]:

MAX_VEL_X = 5.0
MAX_VEL_Y = 5.0

class LunarLandingEnv(gym.Env):
    metadata = {'render_modes': ['human'], 'render_fps': 60}

    def __init__(self):
        super(LunarLandingEnv, self).__init__()

        # Define action and observation space
        # Actions: 0 - Do nothing, 1 - Thrust Up, 2 - Thrust Left, 3 - Thrust Right
        self.action_space = spaces.Discrete(4)

        # Observations: x, y, velocity_x, velocity_y, fuel, landing_center_norm
        low = np.array([-1, -1, -1, -1, 0, -1], dtype=np.float32)
        high = np.array([1, 1, 1, 1, 1, 1], dtype=np.float32)
        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        # Game parameters
        self.gravity = 0.05
        self.thrust_vertical = 0.1
        self.thrust_horizontal = 0.05
        self.max_fuel = 1000  # Increased fuel

        # Pygame initialization
        self.screen_width = 800
        self.screen_height = 600
        self.screen = None
        self.clock = None
        self.is_open = True

        # Game outcome
        self.last_outcome = None  # Will be 'victory' or 'defeat' when the game ends

        self.reset()

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.x = self.screen_width / 2  # Start at the center
        self.y = 50
        self.vel_x = 0.0
        self.vel_y = 0.0
        self.fuel = self.max_fuel
        self.landing_zone = self._choose_landing_zone()
        self.current_step = 0  # Step counter
        self.max_steps = 1000  # Max steps per episode
        self.last_outcome = None  # Reset outcome
        return self._get_obs(), {}

    def step(self, action):
        # Apply action
        if action == 1 and self.fuel > 0:  # Thrust Up
            self.vel_y -= self.thrust_vertical
            self.fuel -= 1
        elif action == 2 and self.fuel > 0:  # Thrust Left
            self.vel_x -= self.thrust_horizontal
            self.fuel -= 1
        elif action == 3 and self.fuel > 0:  # Thrust Right
            self.vel_x += self.thrust_horizontal
            self.fuel -= 1

        # Update physics
        self.vel_y += self.gravity
        self.y += self.vel_y
        self.x += self.vel_x

        # Impose horizontal limits
        self.x = max(20, min(self.x, 780))

        # Increment step counter
        self.current_step += 1

        # Initialize reward and constants
        reward = 0.0
        k_y = 5  # Weight for vertical velocity adjustment
        k_x = 4   # Weight for horizontal velocity adjustment
        k_dir = 3  # Weight to penalize wrong direction
        k_dist = 1  # Weight for distance from center

        # Calculate landing zone center
        landing_center = (self.landing_zone[0] + self.landing_zone[1]) / 2
        distance_to_center_x = self.x - landing_center

        # Reward for low and positive Y velocity
        if 0.5 <= self.vel_y <= 1:
            reward += k_y  # Max reward for ideal velocity
        else:
            reward -= k_y * abs(self.vel_y - 0.75)  # Penalty proportional to deviation from 0.75

        if self.x < landing_center:  # Lander is to the left
            if self.vel_x < 0.4:
                if action == 3:  # Pushing to the right
                    reward += k_x
                elif action == 2:  # Pushing to the left
                    reward -= k_dir

        elif self.x > landing_center:  # Lander is to the right
            if self.vel_x > -0.4:
                if action == 2:  # Pushing to the left
                    reward += k_x
                elif action == 3:  # Pushing to the right
                    reward -= k_dir

        # Normalize reward between -10 and 10
        reward = max(-10, min(10, reward))

        # Check for termination
        terminated = False
        if self.y >= 530:  # Lander has reached the platform height
            self.y = 530  # Adjust position to exactly on platform
            # Determine if landing is successful
            landing_velocity_threshold = 0.8  # units
            if self.x > self.landing_zone[0] and self.x < self.landing_zone[1] and abs(self.vel_y) <= landing_velocity_threshold:
                # Successful landing
                self.last_outcome = 'victory'
            else:
                # Unsuccessful landing
                self.last_outcome = 'defeat'
            self.vel_y = 0  # Stop vertical movement
            self.vel_x = 0  # Stop horizontal movement

            terminated = True

        elif self.current_step >= self.max_steps:
            terminated = True
            self.last_outcome = 'defeat'
        elif self.y > self.screen_height or self.x <= 20 or self.x >= 780:
            terminated = True
            reward = -10  # Max penalty for going out of bounds
            self.last_outcome = 'defeat'

        # Return observations, reward, and episode info
        return self._get_obs(), reward, terminated, False, {}

    def render(self):
        if self.screen is None:
            pygame.init()
            pygame.display.init()
            self.screen = pygame.display.set_mode(
                (self.screen_width, self.screen_height)
            )
        if self.clock is None:
            self.clock = pygame.time.Clock()

        if self.last_outcome is None:
            # Normal game rendering
            self.screen.fill((0, 0, 0))  # Clear screen

            # Draw game elements
            self._draw_stars()
            self._draw_terrain()
            self._draw_lander()

            # Show game state information
            landing_center = (self.landing_zone[0] + self.landing_zone[1]) / 2
            distance_to_center = self.x - landing_center

            # Display rocket coordinates and state
            font = pygame.font.Font(None, 36)
            info_text = [
                f"Rocket X: {self.x:.2f}",
                f"Rocket Y: {self.y:.2f}",
                f"Distance to Center: {distance_to_center:.2f}",
                f"Velocity X: {self.vel_x:.2f}",
                f"Velocity Y: {self.vel_y:.2f}",
                f"Fuel: {self.fuel}",
                f"Landing Center X: {landing_center:.2f}",
            ]
            for i, text in enumerate(info_text):
                surface = font.render(text, True, (255, 255, 255))
                self.screen.blit(surface, (10, 10 + i * 30))

            # Visual marker for landing platform center
            pygame.draw.circle(
                self.screen, (0, 255, 0), (int(landing_center), 550), 5
            )

            # Display rocket's current coordinates next to it
            coord_text = f"({int(self.x)}, {int(self.y)})"
            coord_surface = font.render(coord_text, True, (255, 255, 255))
            self.screen.blit(coord_surface, (int(self.x) + 20, int(self.y) - 40))

            # Update display
            pygame.display.flip()
            self.clock.tick(self.metadata["render_fps"])
        else:
            # Draw victory or defeat screen
            self._draw_end_screen()

    def _draw_end_screen(self):
        # Clear screen
        self.screen.fill((0, 0, 0))

        # Victory or defeat text
        font = pygame.font.Font(None, 74)
        if self.last_outcome == 'victory':
            text = font.render("YOU WIN!", True, (0, 255, 0))
        else:
            text = font.render("GAME OVER", True, (255, 0, 0))
        text_rect = text.get_rect(center=(self.screen_width // 2, self.screen_height // 2))
        self.screen.blit(text, text_rect)

        # Update display
        pygame.display.flip()

    def close(self):
        if self.screen is not None:
            pygame.display.quit()
            pygame.quit()
            self.is_open = False

    # Helper methods
    def _choose_landing_zone(self):
        start = random.randint(100, 600)
        width = random.randint(50, 150)
        end = start + width
        return (start, end)

    def _get_obs(self):
        x_norm = (self.x - self.screen_width / 2) / (self.screen_width / 2)
        y_norm = (self.y - self.screen_height / 2) / (self.screen_height / 2)
        vel_x_norm = self.vel_x / MAX_VEL_X
        vel_y_norm = self.vel_y / MAX_VEL_Y
        fuel_norm = self.fuel / self.max_fuel
        landing_center_norm = (self.landing_zone[0] + self.landing_zone[1]) / 2 / self.screen_width
        return np.array(
            [x_norm, y_norm, vel_x_norm, vel_y_norm, fuel_norm, landing_center_norm],
            dtype=np.float32,
        )

    def _draw_stars(self):
        # Draw static stars
        if not hasattr(self, "stars"):
            self.stars = [
                (random.randint(0, 800), random.randint(0, 600)) for _ in range(50)
            ]
        for star in self.stars:
            pygame.draw.circle(self.screen, (255, 255, 255), star, 2)

    def _draw_terrain(self):
        # Draw terrain and landing zone
        pygame.draw.polygon(
            self.screen,
            (100, 100, 100),
            [
                (0, 550),
                (200, 450),
                (400, 500),
                (600, 450),
                (800, 550),
                (800, 600),
                (0, 600),
            ],
        )
        # Draw landing zone
        start, end = self.landing_zone
        pygame.draw.rect(
            self.screen, (150, 150, 150), (start, 550, end - start, 50)
        )
        # Flags
        pygame.draw.line(self.screen, (255, 255, 255), (start, 550), (start, 520), 2)
        pygame.draw.polygon(
            self.screen,
            (255, 255, 0),
            [(start, 520), (start + 15, 527), (start, 534)],
        )
        pygame.draw.line(self.screen, (255, 255, 255), (end, 550), (end, 520), 2)
        pygame.draw.polygon(
            self.screen,
            (255, 255, 0),
            [(end, 520), (end - 15, 527), (end, 534)],
        )

    def _draw_lander(self):
        x = int(self.x)
        y = int(self.y)

        # Draw the lander
        pygame.draw.rect(
            self.screen, (200, 200, 200), (x - 10, y - 30, 20, 60)
        )  # Main body
        pygame.draw.polygon(
            self.screen,
            (150, 150, 150),
            [(x - 10, y + 20), (x - 20, y + 30), (x - 10, y + 30)],
        )  # Left wing
        pygame.draw.polygon(
            self.screen,
            (150, 150, 150),
            [(x + 10, y + 20), (x + 20, y + 30), (x + 10, y + 30)],
        )  # Right wing
        pygame.draw.polygon(
            self.screen,
            (255, 0, 0),
            [(x, y - 40), (x - 10, y - 30), (x + 10, y - 30)],
        )  # Top cone

        # Draw fuel level indicator
        fuel_percentage = self.fuel / self.max_fuel
        fuel_bar_height = 50
        fuel_bar_width = 10
        pygame.draw.rect(
            self.screen,
            (255, 255, 255),
            (x - 25, y - 30, fuel_bar_width, fuel_bar_height),
            1,
        )  # Fuel bar border
        pygame.draw.rect(
            self.screen,
            (0, 255, 0),
            (
                x - 25,
                y - 30 + fuel_bar_height * (1 - fuel_percentage),
                fuel_bar_width,
                fuel_bar_height * fuel_percentage,
            ),
        )  # Fuel bar

# Para treinar o bot (pode demorar dependendo do número de iterações definido em total_timesteps):

In [64]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# Callback para registrar recompensas
from stable_baselines3.common.callbacks import BaseCallback
import numpy as np

class RewardLoggerCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(RewardLoggerCallback, self).__init__(verbose)

    def _on_step(self) -> bool:
        if len(self.locals["infos"]) > 0 and "episode" in self.locals["infos"][0]:
            episode_rewards = [info["episode"]["r"] for info in self.locals["infos"] if "episode" in info]
            if episode_rewards:
                mean_reward = np.mean(episode_rewards)
                self.logger.record("reward/mean", mean_reward)
        return True

from stable_baselines3.common.vec_env import SubprocVecEnv

env = SubprocVecEnv([lambda: LunarLandingEnv() for _ in range(4)])
# Criar o modelo
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    tensorboard_log="./ppo_lunar_landing_tensorboard/",
    learning_rate=3e-4,
    n_steps=2048,
    batch_size=64,
    gae_lambda=0.95,
    gamma=0.99,
    ent_coef=0.0,
    clip_range=0.2,
)
# Treinar o agente
model.learn(total_timesteps=500_000, callback=RewardLoggerCallback())
# Salvar o modelo
model.save("ppo_lunar_landing")


Using cpu device
Logging to ./ppo_lunar_landing_tensorboard/PPO_58
-----------------------------
| time/              |      |
|    fps             | 9771 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 8192 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 4075        |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.008953445 |
|    clip_fraction        | 0.0852      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0.00252     |
|    learning_rate        | 0.0003      |
|    loss                 | 4.83e+03    |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00239    |
|    value_loss           | 1.05e+04    |
---------

# Para ver o bot jogando:

In [107]:
# Load the model
model = PPO.load("ppo_lunar_landing")

# Create a new environment for testing
env = LunarLandingEnv()

# Run the agent
obs, _ = env.reset()
done = False
while not done:
    action, _ = model.predict(obs)
    obs, reward, done, _, _ = env.step(action)
    env.render()
env.close()

# Para jogar como humano (rode as próximas duas células):

In [99]:
def human_play():
    import pygame
    from pygame.locals import K_UP, K_LEFT, K_RIGHT, K_ESCAPE, QUIT

    env = LunarLandingEnv()
    env.reset()

    # Inicializar Pygame
    pygame.init()
    screen = pygame.display.set_mode((env.screen_width, env.screen_height))
    pygame.display.set_caption("Lunar Landing - Human Play")
    clock = pygame.time.Clock()

    done = False
    action = 0  # Ação inicial: Fazer nada

    while not done:
        for event in pygame.event.get():
            if event.type == QUIT:
                done = True
            elif event.type == pygame.KEYDOWN:
                if event.key == K_ESCAPE:  # Sair do jogo
                    done = True
                elif event.key == K_UP:  # Empuxo para cima
                    action = 1
                elif event.key == K_LEFT:  # Empuxo para esquerda
                    action = 2
                elif event.key == K_RIGHT:  # Empuxo para direita
                    action = 3
            elif event.type == pygame.KEYUP:
                if event.key in [K_UP, K_LEFT, K_RIGHT]:  # Parar de aplicar força
                    action = 0

        # Atualizar o ambiente com a ação selecionada
        obs, reward, terminated, truncated, info = env.step(action)

        # Renderizar o ambiente
        env.render()

        # Verificar se o episódio terminou
        if terminated or truncated:
            print("Episódio terminou!")
            print(f"Recompensa final: {reward}")
            break

        # Limitar a taxa de quadros
        clock.tick(env.metadata["render_fps"])

    # Fechar o ambiente
    env.close()
    pygame.quit()



In [105]:
human_play()

Episódio terminou!
Recompensa final: -1.2500000000000366


# Para ver o bot jogando em 4 telas ao mesmo tempo (rode as próximas duas células):

In [None]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pygame
import random

MAX_VEL_X = 5.0
MAX_VEL_Y = 5.0

class LunarLandingEnv(gym.Env):
    metadata = {'render_modes': ['human'], 'render_fps': 60}

    def __init__(self):
        super(LunarLandingEnv, self).__init__()

        # Define action and observation space
        self.action_space = spaces.Discrete(4)

        # Observations: x, y, velocity_x, velocity_y, fuel, landing_center_norm
        low = np.array([-1, -1, -1, -1, 0, -1], dtype=np.float32)
        high = np.array([1, 1, 1, 1, 1, 1], dtype=np.float32)
        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        # Game parameters
        self.gravity = 0.05
        self.thrust_vertical = 0.1
        self.thrust_horizontal = 0.05
        self.max_fuel = 1000  # Increased fuel

        # Screen dimensions
        self.screen_width = 800
        self.screen_height = 600
        self.clock = None
        self.is_open = True

        # Game outcome
        self.last_outcome = None  # Will be 'victory' or 'defeat' when the game ends

        self.reset()

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.x = self.screen_width / 2  # Start at the center
        self.y = 50
        self.vel_x = 0.0
        self.vel_y = 0.0
        self.fuel = self.max_fuel
        self.landing_zone = self._choose_landing_zone()
        self.current_step = 0  # Step counter
        self.max_steps = 1000  # Max steps per episode
        self.last_outcome = None  # Reset outcome
        return self._get_obs(), {}

    def step(self, action):
        # Apply action
        if action == 1 and self.fuel > 0:  # Thrust Up
            self.vel_y -= self.thrust_vertical
            self.fuel -= 1
        elif action == 2 and self.fuel > 0:  # Thrust Left
            self.vel_x -= self.thrust_horizontal
            self.fuel -= 1
        elif action == 3 and self.fuel > 0:  # Thrust Right
            self.vel_x += self.thrust_horizontal
            self.fuel -= 1

        # Update physics
        self.vel_y += self.gravity
        self.y += self.vel_y
        self.x += self.vel_x

        # Impose horizontal limits
        self.x = max(20, min(self.x, 780))

        # Increment step counter
        self.current_step += 1

        # Initialize reward and constants
        reward = 0.0
        k_y = 5  # Weight for vertical velocity adjustment
        k_x = 4   # Weight for horizontal velocity adjustment
        k_dir = 3  # Weight to penalize wrong direction
        k_dist = 1  # Weight for distance from center

        # Calculate landing zone center
        landing_center = (self.landing_zone[0] + self.landing_zone[1]) / 2
        distance_to_center_x = self.x - landing_center

        # Reward for low and positive Y velocity
        if 0.5 <= self.vel_y <= 1:
            reward += k_y  # Max reward for ideal velocity
        else:
            reward -= k_y * abs(self.vel_y - 0.75)  # Penalty proportional to deviation from 0.75

        if self.x < landing_center:  # Lander is to the left
            if self.vel_x < 0.4:
                if action == 3:  # Pushing to the right
                    reward += k_x
                elif action == 2:  # Pushing to the left
                    reward -= k_dir

        elif self.x > landing_center:  # Lander is to the right
            if self.vel_x > -0.4:
                if action == 2:  # Pushing to the left
                    reward += k_x
                elif action == 3:  # Pushing to the right
                    reward -= k_dir

        # Normalize reward between -10 and 10
        reward = max(-10, min(10, reward))

        # Check for termination
        terminated = False
        if self.y >= 530:  # Lander has reached the platform height
            self.y = 530  # Adjust position to exactly on platform
            # Determine if landing is successful
            landing_velocity_threshold = 0.8  # units
            if self.x > self.landing_zone[0] and self.x < self.landing_zone[1] and abs(self.vel_y) <= landing_velocity_threshold:
                # Successful landing
                self.last_outcome = 'victory'
            else:
                # Unsuccessful landing
                self.last_outcome = 'defeat'
            self.vel_y = 0  # Stop vertical movement
            self.vel_x = 0  # Stop horizontal movement

            terminated = True

        elif self.current_step >= self.max_steps:
            terminated = True
            self.last_outcome = 'defeat'
        elif self.y > self.screen_height or self.x <= 20 or self.x >= 780:
            terminated = True
            reward = -10  # Max penalty for going out of bounds
            self.last_outcome = 'defeat'

        # Return observations, reward, and episode info
        return self._get_obs(), reward, terminated, False, {}

    def render(self, surface):
        if self.clock is None:
            self.clock = pygame.time.Clock()

        if self.last_outcome is None:
            # Normal game rendering
            surface.fill((0, 0, 0))  # Clear surface

            # Draw game elements onto the surface
            self._draw_stars(surface)
            self._draw_terrain(surface)
            self._draw_lander(surface)

            # Show game state information
            landing_center = (self.landing_zone[0] + self.landing_zone[1]) / 2
            distance_to_center = self.x - landing_center

            # Display rocket coordinates and state
            font = pygame.font.Font(None, 36)
            info_text = [
                f"Rocket X: {self.x:.2f}",
                f"Rocket Y: {self.y:.2f}",
                f"Distance to Center: {distance_to_center:.2f}",
                f"Velocity X: {self.vel_x:.2f}",
                f"Velocity Y: {self.vel_y:.2f}",
                f"Fuel: {self.fuel}",
                f"Landing Center X: {landing_center:.2f}",
            ]
            for i, text in enumerate(info_text):
                surface_text = font.render(text, True, (255, 255, 255))
                surface.blit(surface_text, (10, 10 + i * 30))

            # Visual marker for landing platform center
            pygame.draw.circle(
                surface, (0, 255, 0), (int(landing_center), 550), 5
            )

            # Display rocket's current coordinates next to it
            coord_text = f"({int(self.x)}, {int(self.y)})"
            coord_surface = font.render(coord_text, True, (255, 255, 255))
            surface.blit(coord_surface, (int(self.x) + 20, int(self.y) - 40))

        else:
            # Draw victory or defeat screen
            self._draw_end_screen(surface)

    def _draw_end_screen(self, surface):
        # Clear surface
        surface.fill((0, 0, 0))

        # Victory or defeat text
        font = pygame.font.Font(None, 74)
        if self.last_outcome == 'victory':
            text = font.render("YOU WIN!", True, (0, 255, 0))
        else:
            text = font.render("GAME OVER", True, (255, 0, 0))
        text_rect = text.get_rect(center=(self.screen_width // 2, self.screen_height // 2))
        surface.blit(text, text_rect)

    def close(self):
        # No need to close any Pygame windows here
        self.is_open = False

    # Helper methods
    def _choose_landing_zone(self):
        start = random.randint(100, 600)
        width = random.randint(50, 150)
        end = start + width
        return (start, end)

    def _get_obs(self):
        x_norm = (self.x - self.screen_width / 2) / (self.screen_width / 2)
        y_norm = (self.y - self.screen_height / 2) / (self.screen_height / 2)
        vel_x_norm = self.vel_x / MAX_VEL_X
        vel_y_norm = self.vel_y / MAX_VEL_Y
        fuel_norm = self.fuel / self.max_fuel
        landing_center_norm = (self.landing_zone[0] + self.landing_zone[1]) / 2 / self.screen_width
        return np.array(
            [x_norm, y_norm, vel_x_norm, vel_y_norm, fuel_norm, landing_center_norm],
            dtype=np.float32,
        )

    def _draw_stars(self, surface):
        # Draw static stars onto the surface
        if not hasattr(self, "stars"):
            self.stars = [
                (random.randint(0, self.screen_width), random.randint(0, self.screen_height)) for _ in range(50)
            ]
        for star in self.stars:
            pygame.draw.circle(surface, (255, 255, 255), star, 2)

    def _draw_terrain(self, surface):
        # Draw terrain and landing zone onto the surface
        pygame.draw.polygon(
            surface,
            (100, 100, 100),
            [
                (0, 550),
                (200, 450),
                (400, 500),
                (600, 450),
                (800, 550),
                (800, 600),
                (0, 600),
            ],
        )
        # Draw landing zone
        start, end = self.landing_zone
        pygame.draw.rect(
            surface, (150, 150, 150), (start, 550, end - start, 50)
        )
        # Flags
        pygame.draw.line(surface, (255, 255, 255), (start, 550), (start, 520), 2)
        pygame.draw.polygon(
            surface,
            (255, 255, 0),
            [(start, 520), (start + 15, 527), (start, 534)],
        )
        pygame.draw.line(surface, (255, 255, 255), (end, 550), (end, 520), 2)
        pygame.draw.polygon(
            surface,
            (255, 255, 0),
            [(end, 520), (end - 15, 527), (end, 534)],
        )

    def _draw_lander(self, surface):
        x = int(self.x)
        y = int(self.y)

        # Draw the lander onto the surface
        pygame.draw.rect(
            surface, (200, 200, 200), (x - 10, y - 30, 20, 60)
        )  # Main body
        pygame.draw.polygon(
            surface,
            (150, 150, 150),
            [(x - 10, y + 20), (x - 20, y + 30), (x - 10, y + 30)],
        )  # Left wing
        pygame.draw.polygon(
            surface,
            (150, 150, 150),
            [(x + 10, y + 20), (x + 20, y + 30), (x + 10, y + 30)],
        )  # Right wing
        pygame.draw.polygon(
            surface,
            (255, 0, 0),
            [(x, y - 40), (x - 10, y - 30), (x + 10, y - 30)],
        )  # Top cone

        # Draw fuel level indicator
        fuel_percentage = self.fuel / self.max_fuel
        fuel_bar_height = 50
        fuel_bar_width = 10
        pygame.draw.rect(
            surface,
            (255, 255, 255),
            (x - 25, y - 30, fuel_bar_width, fuel_bar_height),
            1,
        )  # Fuel bar border
        pygame.draw.rect(
            surface,
            (0, 255, 0),
            (
                x - 25,
                y - 30 + fuel_bar_height * (1 - fuel_percentage),
                fuel_bar_width,
                fuel_bar_height * fuel_percentage,
            ),
        )  # Fuel bar

In [70]:
import pygame
import numpy as np

# Certifique-se de que o modelo PPO esteja carregado
model = PPO.load("ppo_lunar_landing")

# Inicializar Pygame
pygame.init()

# Dimensões da tela
SCREEN_WIDTH = 800
SCREEN_HEIGHT = 600

# Criar janela principal
main_screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
pygame.display.set_caption("Lunar Lander - 4 Instâncias")

# Criar superfícies para cada ambiente
surface_width = SCREEN_WIDTH
surface_height = SCREEN_HEIGHT

surfaces = [
    pygame.Surface((surface_width, surface_height)),
    pygame.Surface((surface_width, surface_height)),
    pygame.Surface((surface_width, surface_height)),
    pygame.Surface((surface_width, surface_height))
]

# Criar 4 ambientes
envs = [LunarLandingEnv() for _ in range(4)]
obs = [env.reset()[0] for env in envs]
dones = [False for _ in range(4)]

clock = pygame.time.Clock()
running = True

while running and not all(dones):
    # Lidar com eventos
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

    # Atualizar e renderizar cada ambiente
    for i in range(4):
        if not dones[i]:
            action, _ = model.predict(obs[i])
            obs[i], reward, done, _, _ = envs[i].step(action)
            dones[i] = done
            # Renderizar o ambiente em sua superfície
            envs[i].render(surfaces[i])

    # Desenhar cada superfície na tela principal
    # Superior esquerda
    scaled_surface = pygame.transform.scale(surfaces[0], (SCREEN_WIDTH // 2, SCREEN_HEIGHT // 2))
    main_screen.blit(scaled_surface, (0, 0))
    # Superior direita
    scaled_surface = pygame.transform.scale(surfaces[1], (SCREEN_WIDTH // 2, SCREEN_HEIGHT // 2))
    main_screen.blit(scaled_surface, (SCREEN_WIDTH // 2, 0))
    # Inferior esquerda
    scaled_surface = pygame.transform.scale(surfaces[2], (SCREEN_WIDTH // 2, SCREEN_HEIGHT // 2))
    main_screen.blit(scaled_surface, (0, SCREEN_HEIGHT // 2))
    # Inferior direita
    scaled_surface = pygame.transform.scale(surfaces[3], (SCREEN_WIDTH // 2, SCREEN_HEIGHT // 2))
    main_screen.blit(scaled_surface, (SCREEN_WIDTH // 2, SCREEN_HEIGHT // 2))

    # Atualizar a tela
    pygame.display.flip()
    clock.tick(60)  # Limitar a 60 FPS

# Fechar ambientes
for env in envs:
    env.close()
pygame.quit()