## **DDQN Lunar Lander Inference with GIF saving**

### Imports and Setup

In [1]:
import gymnasium as gym
from PIL import Image
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from itertools import count

### Environment Setup

In [2]:
env = gym.make("LunarLander-v3", render_mode='rgb_array')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


### Define Model

In [3]:
class DDQN(nn.Module):
    def __init__(self, n_observations, n_actions):
        super().__init__()
        self.layer1 = nn.Linear(n_observations, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, n_actions)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        return self.layer3(x)

### Load Trained Model

In [4]:
state_size = env.observation_space.shape[0]
action_size = env.action_space.n

policy_net = DDQN(state_size, action_size).to(device)
model_path = os.path.join("..", "models", "ddqn_lunarlander_v2.pth")
policy_net.load_state_dict(torch.load(model_path, map_location=device))
policy_net.eval()
print("Model loaded successfully!")

Model loaded successfully!


### GIF saving function

In [6]:
def save_frames_as_gif(frames, path=os.path.join("..", "gif_ddqn_v2"), filename='LUNARLANDER_DQN_SUCCESS.gif'):
    images = [Image.fromarray(frame) for frame in frames]
    gif_path = os.path.join(path, filename)
    images[0].save(gif_path, save_all=True, append_images=images[1:], duration=50, loop=0)
    print(f"Saved GIF to: {gif_path}")

### Run N episodes and save GIF only if success

In [7]:
num_episodes_to_try = 20

for e_test in range(num_episodes_to_try):
    state, _ = env.reset()
    state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
    total_reward = 0
    frames = []

    for t in count():
        with torch.no_grad():
            action = policy_net(state).max(1).indices.view(1, 1)

        next_state, reward, terminated, truncated, _ = env.step(action.item())
        frame = env.render()
        frames.append(frame)

        next_state = torch.tensor(next_state, dtype=torch.float32, device=device).unsqueeze(0)
        state = next_state
        total_reward += reward

        if terminated or truncated:
            print(f"Test Episode: {e_test+1}/{num_episodes_to_try}, Reward: {total_reward:.2f}, Steps: {t}")

            # --- Save GIF if successful landing ---
            if total_reward >= 200:
                print("✅ Successful landing detected! Saving GIF...")
                save_frames_as_gif(frames, filename=f'LUNARLANDER_DQN_SUCCESS_EP{e_test+1}.gif')
                env.close()
                break  # Stop after first success, or remove this to keep trying more

            break

Test Episode: 1/20, Reward: 220.21, Steps: 333
✅ Successful landing detected! Saving GIF...
Saved GIF to: ../gif_ddqn_v2/LUNARLANDER_DQN_SUCCESS_EP1.gif
Test Episode: 2/20, Reward: 253.59, Steps: 341
✅ Successful landing detected! Saving GIF...
Saved GIF to: ../gif_ddqn_v2/LUNARLANDER_DQN_SUCCESS_EP2.gif
Test Episode: 3/20, Reward: 135.77, Steps: 999
Test Episode: 4/20, Reward: 238.25, Steps: 250
✅ Successful landing detected! Saving GIF...
Saved GIF to: ../gif_ddqn_v2/LUNARLANDER_DQN_SUCCESS_EP4.gif
Test Episode: 5/20, Reward: 230.49, Steps: 271
✅ Successful landing detected! Saving GIF...
Saved GIF to: ../gif_ddqn_v2/LUNARLANDER_DQN_SUCCESS_EP5.gif
Test Episode: 6/20, Reward: 282.21, Steps: 231
✅ Successful landing detected! Saving GIF...
Saved GIF to: ../gif_ddqn_v2/LUNARLANDER_DQN_SUCCESS_EP6.gif
Test Episode: 7/20, Reward: 249.79, Steps: 309
✅ Successful landing detected! Saving GIF...
Saved GIF to: ../gif_ddqn_v2/LUNARLANDER_DQN_SUCCESS_EP7.gif
Test Episode: 8/20, Reward: 240.28,