<a href="https://colab.research.google.com/github/victorkobani/example/blob/master/Lunar_Lander_Environment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Installing required dependencies

In [None]:
!apt-get update
!apt-get install -y swig cmake ffmpeg freeglut3-dev xvfb

Installing more dependencies

In [None]:
!pip install "gymnasium[box2d]"
!pip install "stable-baselines3[extra]==2.7.0"
!pip install "huggingface_sb3>=3.0"
!pip install "moviepy==2.2.1"

Import policy, RL agent

In [None]:
import gymnasium as gym

from stable_baselines3 import DQN

Create the Gym env and instantiate the agent

In [None]:
model = DQN(
    "MlpPolicy",
    "LunarLander-v3",
    verbose=1,
    exploration_final_eps=0.1,
    target_update_interval=250,
)

We load a helper function to evaluate the agent:

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

Let's evaluate the un-trained agent, this should be a random agent.

In [None]:
# Separate env for evaluation
eval_env = gym.make("LunarLander-v3")

# Random Agent, before training
mean_reward, std_reward = evaluate_policy(
    model,
    eval_env,
    n_eval_episodes=20,
    deterministic=True,
)

print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

Train the agent and save it

In [None]:
# Train the agent
model.learn(total_timesteps=int(1e6), log_interval=400, progress_bar=True)
# Save the agent
model.save("dqn_lunar_v3")
del model  # delete trained model to demonstrate loading

Load the trained agent

In [None]:
model = DQN.load("dqn_lunar_v3")

In [None]:
# Evaluate the trained agent
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=20, deterministic=True)

print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

Record the video of trained agent

In [None]:
import os
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv
from IPython.display import HTML
from base64 import b64encode

env_id = "LunarLander-v3"
video_folder = "logs/videos/"
video_length = 6000
os.makedirs(video_folder, exist_ok=True)

# Create the base environment
vec_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])

obs = vec_env.reset()

# Record the video starting at the first step
vec_env = VecVideoRecorder(vec_env, video_folder,
                       record_video_trigger=lambda x: x == 0, video_length=video_length,
                       name_prefix=f"random-agent-{env_id}")

vec_env.reset()
for _ in range(video_length + 1):
  action, _state = model.predict(obs)
  obs, _, _, _ = vec_env.step(action)
# Save the video
vec_env.close()

mp4 = open('./logs/videos/random-agent-LunarLander-v3-step-0-to-step-6000.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)