<a href="https://colab.research.google.com/github/victorkobani/Deep-Reinforcement-Learning/blob/main/Lunar_Lander_Standalone_A2C_Script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

INSTALL DEPENDENCIES

In [None]:
# Installing required dependencies
!apt-get update
!apt-get install -y swig cmake ffmpeg freeglut3-dev xvfb

# Installing more dependencies
!pip install "gymnasium[box2d]"
!pip install "stable-baselines3[extra]>=2.7.0"
!pip install "huggingface_sb3>=3.0"
!pip install "moviepy>=2.2.1"

IMPORTS

In [None]:
import gymnasium as gym
import os
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines3 import A2C
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv
from IPython.display import HTML, display
from base64 import b64encode

CREATE GYM ENVIRONMENT AND INSTANTIATE AGENT

In [None]:
model = A2C(
    "MlpPolicy",
    "LunarLander-v3",
    verbose=1,
)

EVALUATE UNTRAINED AGENT

In [None]:
# Let's evaluate the un-trained agent, this should be a random agent.
eval_env = gym.make("LunarLander-v3")
mean_reward, std_reward = evaluate_policy(
    model,
    eval_env,
    n_eval_episodes=20,
    deterministic=True,
)

print(f"Untrained A2C mean_reward={mean_reward:.2f} +/- {std_reward}")

SETUP CALLBACK AND TRAIN THE AGENT

In [None]:
# Setting up the callback for logging performance

# Create a directory for logs
log_dir = "/tmp/a2c_gym_logs/"
os.makedirs(log_dir, exist_ok=True)

# Wrap the evaluation environment with a Monitor for the callback.
eval_env_monitored = Monitor(gym.make("LunarLander-v3"))

# Create the EvalCallback
eval_callback = EvalCallback(
    eval_env_monitored,
    best_model_save_path=os.path.join(log_dir, 'best_model'),
    log_path=os.path.join(log_dir, 'results'),
    eval_freq=5000, # Evaluate the agent every 5000 steps
    deterministic=True,
    render=False
)

# Train the agent and save it
print("\n--- Starting A2C Training ---")
# Pass the callback to the learn method
model.learn(total_timesteps=int(1e6), log_interval=400, progress_bar=True, callback=eval_callback)
model.save("a2c_lunar_v3")
del model  # delete trained model to demonstrate loading

 LOAD AND EVALUATE TRAINED AGENT

In [None]:
print("\n--- Loading and Evaluating Final A2C Model ---")
model = A2C.load("a2c_lunar_v3")

mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=20, deterministic=True)
print(f"Trained A2C mean_reward={mean_reward:.2f} +/- {std_reward}")

PLOT THE RESULTS

In [None]:
# Adding the plotting logic

print("\n--- Plotting A2C Training Progress ---")

# Construct the correct path to the log file
results_path = os.path.join(log_dir, "results")
log_file = os.path.join(results_path, "evaluations.npz")

if os.path.exists(log_file):
    print(f"Loading log file from: {log_file}")
    # Load the saved data
    data = np.load(log_file)

    timesteps = data['timesteps']
    mean_rewards = data['results'][:, 0]

    # Create the plot
    plt.figure(figsize=(12, 6))
    plt.title("Standalone A2C Training Performance on LunarLander-v3")
    plt.xlabel("Training Timesteps")
    plt.ylabel("Average Reward")
    plt.plot(timesteps, mean_rewards)
    plt.axhline(y=200, color='r', linestyle='--', label='Success Threshold (200)')
    plt.legend()
    plt.grid(True)
    plt.show()
else:
    print(f"Log file not found at {log_file}. Cannot plot results.")

RECORD VIDEO OF TRAINED AGENT

In [None]:
print("\n--- Recording Video of Trained A2C Agent ---")

env_id = "LunarLander-v3"
video_folder = "logs/videos/"
video_length = 6000
os.makedirs(video_folder, exist_ok=True)

# Create the base environment
vec_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])
name_prefix = f"a2c-agent-{env_id}"

# Record the video starting at the first step
vec_env = VecVideoRecorder(vec_env, video_folder,
                       record_video_trigger=lambda x: x == 0, video_length=video_length,
                       name_prefix=name_prefix)

obs = vec_env.reset()

for _ in range(video_length + 1):
  action, _state = model.predict(obs, deterministic=True)
  obs, _, _, _ = vec_env.step(action)
# Save the video
vec_env.close()

video_filename = f"{video_folder}{name_prefix}-step-0-to-step-{video_length}.mp4"

if os.path.exists(video_filename):
    mp4 = open(video_filename,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    display(HTML(f"""
    <video width=400 controls>
          <source src="{data_url}" type="video/mp4">
    </video>
    """))
else:
    print(f"Video file not found at {video_filename}")