In [3]:
import gym

from huggingface_sb3 import load_from_hub, package_to_hub, push_to_hub
from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.

from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env

from itertools import product
from tqdm import tqdm
import pandas as pd

In [4]:
def train_model(n_steps=2048, n_epochs=10, batch_size=256, discount_factor_gamma=0.99, total_timesteps=1e6):
    training_env= make_vec_env('LunarLander-v2', n_envs=16)
    model = PPO(
        policy = 'MlpPolicy',
        env = training_env,
        n_steps = n_steps,
        batch_size = batch_size,
        n_epochs = n_epochs,
        gamma = discount_factor_gamma,
        gae_lambda = 0.98,
        ent_coef = 0.01,
        verbose=0)

    model.learn(total_timesteps=total_timesteps)

    eval_env = gym.make("LunarLander-v2")
    mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
    
    return model, mean_reward, std_reward

In [8]:
# n_steps = [512, 1024, 2048,]
# n_epochs=[5,10, 20]
# discount_factor_gamma = [0.95, 0.99, 0.999]

n_steps = [1024]
n_epochs=[20]
discount_factor_gamma = [0.999]

combinations = list(product(n_steps, n_epochs, discount_factor_gamma))

In [9]:
results = []
best_mean_reward = 0
best_std_reward = 1000
best_model=None
best_params = None

for c in combinations:
    steps = c[0]
    epochs = c[1]
    discount = c[2]
    params = {
        'n_steps': steps,
        'n_epochs': epochs,
        'discount_factor_gamma': discount,        
    }
    model, mean_reward, std_reward = train_model(**params)
    print('params:')
    print(params)
    print(f'reward: {mean_reward} +/- {std_reward}')
    results.append({**params, **{'mean_reward': mean_reward, 'std_reward': std_reward}})
    df = pd.DataFrame(results)
    
    df.to_csv('lunar-tuning-results.csv', index=None)
    
    if mean_reward > best_mean_reward:
        best_model = model
        best_mean_reward = mean_reward
        best_std_reward = std_reward
        best_params = params



params:
{'n_steps': 1024, 'n_epochs': 20, 'discount_factor_gamma': 0.999}
reward: 264.06695057134584 +/- 14.82053438797129


In [10]:
import gym
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

from huggingface_sb3 import package_to_hub

# TODO: Define the name of the environment
env_id = "LunarLander-v2"
# Create the evaluation env
eval_env = DummyVecEnv([lambda: gym.make(env_id)])
model_name="lunar v2"

# TODO: Define the model architecture we used
model_architecture = "PPO"

## TODO: Define a repo_id
## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
repo_id = "wwymak/ppo-LunarLander-v2"

## TODO: Define the commit message
commit_message = f"lunar lander tuned, 1e6 timesteps, params: {best_params}"

# method save, evaluate, generate a model card and record a replay video of your agent before pushing the repo to the hub
package_to_hub(model=best_model, # Our trained model
               model_name=model_name, # The name of our trained model 
               model_architecture=model_architecture, # The model architecture we used: in our case PPO
               env_id=env_id, # Name of the environment
               eval_env=eval_env, # Evaluation Environment
               repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
               commit_message=commit_message)


[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: If you encounter a bug, please open an issue and use
push_to_hub instead.[0m


/home/wwymak/code_experiments/deep-rl-class/unit1/hub/ppo-LunarLander-v2 is already a clone of https://huggingface.co/wwymak/ppo-LunarLander-v2. Make sure you pull the latest changes with `repo.git_pull()`.


Saving video to /home/wwymak/code_experiments/deep-rl-class/unit1/-step-0-to-step-1000.mp4


ffmpeg version 5.0.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 10.3.0 (conda-forge gcc 10.3.0-16)
  configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1650807798678/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1650807798678/_build_env/bin/x86_64-conda-linux-gnu-cc --disable-doc --disable-openssl --enable-demuxer=dash --enable-gnutls --enable-gpl --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-vaapi --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-libvpx --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame --pkg-config=/home/conda/feedstock_root/build_artifacts/ffmpeg_1650807798678/_build_env/bin/pkg-config
  

[38;5;4mℹ Pushing repo ppo-LunarLander-v2 to the Hugging Face Hub[0m


Upload file replay.mp4:  16%|#6        | 32.0k/197k [00:00<?, ?B/s]

Exception in thread Thread-9:
Traceback (most recent call last):
  File "/home/wwymak/anaconda3/envs/deeprl/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/home/wwymak/anaconda3/envs/deeprl/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/wwymak/anaconda3/envs/deeprl/lib/python3.8/site-packages/huggingface_hub/repository.py", line 379, in output_progress
    state, file_progress, byte_progress, filename = line.split()
ValueError: too many values to unpack (expected 4)
remote: Enforcing permissions...        
remote: Allowed refs: all        
To https://huggingface.co/wwymak/ppo-LunarLander-v2
   8e6ae38..d5487bb  main -> main



[38;5;4mℹ Your model is pushed to the hub. You can view your model here:
https://huggingface.co/wwymak/ppo-LunarLander-v2[0m


'https://huggingface.co/wwymak/ppo-LunarLander-v2'