In [None]:
! apt install swig cmake -q
! pip install stable-baselines3==2.0.0a5 swig gymnasium[box2d] huggingface_sb3 -q
! sudo apt-get update -q
! apt install python3-opengl ffmpeg xvfb -q
! pip3 install pyvirtualdisplay -q

In [None]:
from pyvirtualdisplay import Display

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

<pyvirtualdisplay.display.Display at 0x79ef5949db10>

In [None]:
import gymnasium as gym

from huggingface_sb3 import load_from_hub, package_to_hub
from huggingface_hub import notebook_login
from stable_baselines3 import PPO, DQN, A2C
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

In [None]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
POLICY = "MlpPolicy"

## PPO (Proximal Policy Optimization)

In [None]:
total_timesteps = 1000000
env = gym.make("LunarLander-v2")

model = PPO(
    policy=POLICY,
    env=env,
    n_steps=1024,
    batch_size=32,
    n_epochs=5,
    gamma=0.999,
    gae_lambda=0.98,
    ent_coef=0.01,
    verbose=0
    )
# We use MultiLayerPerceptron (MLPPolicy) because the input is a vector,
# if we had frames as input we would use CnnPolicy

In [None]:
model.learn(total_timesteps=total_timesteps, progress_bar=True)

model_name = "/content/ppo-LunarLander-v2"
model.save(model_name)

Output()

In [None]:
eval_env = Monitor(gym.make("LunarLander-v2"))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

mean_reward=267.12 +/- 20.190092729862148


In [None]:
env_id = "LunarLander-v2"

model_architecture = "PPO"

repo_id = "zypchn/ppo-Lunar-Lander"

commit_message = f"Upload {env_id} with {model_architecture} trained agent"

eval_env = DummyVecEnv([lambda: Monitor(gym.make(env_id, render_mode="rgb_array"))])

package_to_hub(
    model=model,
    model_name=model_name,
    model_architecture=model_architecture,
    env_id=env_id,
    eval_env=eval_env,
    repo_id=repo_id,
    commit_message=commit_message
)

## DQN (Deep Q Network)

In [None]:
total_timesteps = 1000000
env = gym.make("LunarLander-v2")

In [None]:
model = DQN(
    policy=POLICY,
    env=env,
    batch_size=128,
    gamma=0.999,
    learning_rate=3e-4,
    learning_starts=1_000,
    buffer_size=200_000,
    exploration_final_eps=0.1,
    exploration_fraction=0.3,
    policy_kwargs=dict(net_arch=[256, 256]),
    verbose=0
)

In [None]:
model.learn(total_timesteps=total_timesteps, progress_bar=True)

model_name = "/content/dqn-LunarLander-v2"
model.save(model_name)

Output()

In [None]:
eval_env = Monitor(gym.make("LunarLander-v2"))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

mean_reward=276.12 +/- 12.400879722973757


In [None]:
env_id = "LunarLander-v2"

model_architecture = "DQN"

repo_id = "zypchn/dqn-Lunar-Lander"

commit_message = f"Upload {env_id} with {model_architecture} trained agent"

eval_env = DummyVecEnv([lambda: Monitor(gym.make(env_id, render_mode="rgb_array"))])

package_to_hub(
    model=model,
    model_name=model_name,
    model_architecture=model_architecture,
    env_id=env_id,
    eval_env=eval_env,
    repo_id=repo_id,
    commit_message=commit_message
)

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m
Saving video to /tmp/tmp9bff2wt0/-step-0-to-step-1000.mp4
Moviepy - Building video /tmp/tmp9bff2wt0/-step-0-to-step-1000.mp4.
Moviepy - Writing video /tmp/tmp9bff2wt0/-step-0-to-step-1000.mp4





Moviepy - Done !
Moviepy - video ready /tmp/tmp9bff2wt0/-step-0-to-step-1000.mp4
[38;5;4mℹ Pushing repo zypchn/dqn-Lunar-Lander to the Hugging Face Hub[0m


replay.mp4:   0%|          | 0.00/144k [00:00<?, ?B/s]

[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/zypchn/dqn-Lunar-Lander/tree/main/[0m


CommitInfo(commit_url='https://huggingface.co/zypchn/dqn-Lunar-Lander/commit/7165f087c4d64f11284e296a360f4a89d9726c10', commit_message='Upload LunarLander-v2 with DQN trained agent', commit_description='', oid='7165f087c4d64f11284e296a360f4a89d9726c10', pr_url=None, repo_url=RepoUrl('https://huggingface.co/zypchn/dqn-Lunar-Lander', endpoint='https://huggingface.co', repo_type='model', repo_id='zypchn/dqn-Lunar-Lander'), pr_revision=None, pr_num=None)

## A2C

In [None]:
total_timesteps = 1000000
env = make_vec_env("LunarLander-v2", n_envs=8)

In [None]:
model = A2C(
    policy=POLICY,
    env=env,
    gamma=0.999,
    gae_lambda=0.95,
    n_steps=16,
    ent_coef=0.05,
    vf_coef=0.25,
    max_grad_norm=0.5,
    policy_kwargs=dict(net_arch=[256, 256]),
    verbose=0
)

In [None]:
model.learn(total_timesteps=total_timesteps, progress_bar=True)

model_name = "/content/a2c-LunarLander-v2"
model.save(model_name)

Output()

In [None]:
eval_env = Monitor(gym.make("LunarLander-v2"))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

mean_reward=59.95 +/- 123.98633475169582


In [None]:
env_id = "LunarLander-v2"

model_architecture = "A2C"

repo_id = "zypchn/a2c-Lunar-Lander"

commit_message = f"Upload {env_id} with {model_architecture} trained agent"

eval_env = DummyVecEnv([lambda: Monitor(gym.make(env_id, render_mode="rgb_array"))])

package_to_hub(
    model=model,
    model_name=model_name,
    model_architecture=model_architecture,
    env_id=env_id,
    eval_env=eval_env,
    repo_id=repo_id,
    commit_message=commit_message
)

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Saving video to /tmp/tmpun_k5o2_/-step-0-to-step-1000.mp4


  """


Moviepy - Building video /tmp/tmpun_k5o2_/-step-0-to-step-1000.mp4.
Moviepy - Writing video /tmp/tmpun_k5o2_/-step-0-to-step-1000.mp4





Moviepy - Done !
Moviepy - video ready /tmp/tmpun_k5o2_/-step-0-to-step-1000.mp4
[38;5;4mℹ Pushing repo zypchn/a2c-Lunar-Lander to the Hugging Face Hub[0m


replay.mp4:   0%|          | 0.00/165k [00:00<?, ?B/s]

[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/zypchn/a2c-Lunar-Lander/tree/main/[0m


CommitInfo(commit_url='https://huggingface.co/zypchn/a2c-Lunar-Lander/commit/8d3d34102a7b1a8b718f6f5fa4c0028891fffc6b', commit_message='Upload LunarLander-v2 with A2C trained agent', commit_description='', oid='8d3d34102a7b1a8b718f6f5fa4c0028891fffc6b', pr_url=None, repo_url=RepoUrl('https://huggingface.co/zypchn/a2c-Lunar-Lander', endpoint='https://huggingface.co', repo_type='model', repo_id='zypchn/a2c-Lunar-Lander'), pr_revision=None, pr_num=None)