## **Lunar Lander Inference - DQN**

### Imports and Setup

In [1]:
import gymnasium as gym
import torch
import numpy as np
from itertools import count
import matplotlib.pyplot as plt
import time

### GPU Check

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


### Environment Setup

In [3]:
env = gym.make("LunarLander-v3", render_mode="human")
state_size = env.observation_space.shape[0]
action_size = env.action_space.n

### Define Model

In [5]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

class DQN(nn.Module):
    def __init__(self, n_observations, n_actions):
        super().__init__()
        self.layer1 = nn.Linear(n_observations, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, n_actions)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        return self.layer3(x)

# Instantiate model
policy_net = DQN(state_size, action_size).to(device)

# Load model
model_path = os.path.join("..", "models", "ddqn_lunarlander.pth")
policy_net.load_state_dict(torch.load(model_path, map_location=device))
policy_net.eval()

print("Model loaded successfully!")

Model loaded successfully!


### Test Agent

In [None]:
# Testing the trained agent for 10 episodes
for e_test in range(10):
    state, _ = env.reset()
    state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
    total_reward = 0

    for t in count():
        with torch.no_grad():
            action = policy_net(state).max(1).indices.view(1, 1)

        next_state, reward, terminated, truncated, _ = env.step(action.item())
        next_state = torch.tensor(next_state, dtype=torch.float32, device=device).unsqueeze(0)
        state = next_state
        total_reward += reward

        if terminated or truncated:
            print(f"Test Episode: {e_test+1}/10, Reward: {total_reward:.2f}, Steps: {t}")
            break

env.close()

Test Episode: 1/10, Reward: 113.91, Steps: 999
Test Episode: 2/10, Reward: 179.83, Steps: 548
Test Episode: 3/10, Reward: 17.89, Steps: 999
Test Episode: 4/10, Reward: -13.40, Steps: 999
Test Episode: 5/10, Reward: 19.41, Steps: 999
Test Episode: 6/10, Reward: -10.01, Steps: 999
Test Episode: 7/10, Reward: 224.26, Steps: 254
Test Episode: 8/10, Reward: 273.23, Steps: 209
Test Episode: 9/10, Reward: 13.64, Steps: 999
Test Episode: 10/10, Reward: 240.94, Steps: 375


: 