#Installation

In [None]:
%tensorflow_version 1.x

In [None]:
!pip install stable-baselines[mpi]==2.10.1
!pip install gym box2d box2d-kengz
!apt-get install xvfb ffmpeg

#Imports

In [None]:
import gym
import stable_baselines

from stable_baselines import SAC # PPO2 SAC TD3 ACKTR
from stable_baselines.sac import MlpPolicy
from stable_baselines.common.evaluation import evaluate_policy
from stable_baselines.common.vec_env import VecVideoRecorder, DummyVecEnv
from stable_baselines.bench import Monitor

#Hyperparameters

In [None]:
env_id = 'BipedalWalker-v3'
# env_id = 'BipedalWalkerHardcore-v3'
model_id = 'SAC'
video_folder = 'drive/MyDrive/result/videos'
video_length = 5000
log_path = 'drive/MyDrive/result/tensorboard/'

env = gym.make(env_id)
env = Monitor(env, filename=None)
env = DummyVecEnv([lambda: env])
obs = env.reset()

In [None]:
model = SAC(MlpPolicy, env, verbose=1, ent_coef=0.005, learning_rate=3e-4, learning_starts=1000, buffer_size=1000000, tensorboard_log=log_path)
# For BipedalWalkerHardcore-v3
# model = SAC(MlpPolicy, env, verbose=1, ent_coef=0.005, learning_rate=3e-4, learning_starts=1000, buffer_size=2000000, tensorboard_log=log_path)

#Visualization

In [None]:
import os
import base64
import IPython
from pathlib import Path
from IPython import display as ipythondisplay

os.system("Xvfb :1 -screen 0 1024x768x24 &")
os.environ['DISPLAY'] = ':1'

def record_video(env_id, model, video_length, prefix, video_folder):
  eval_env = DummyVecEnv([lambda: gym.make(env_id)])
  eval_env = VecVideoRecorder(eval_env, video_folder=video_folder,
                              record_video_trigger=lambda step: step == 0, video_length=video_length,
                              name_prefix=prefix)

  obs = eval_env.reset()
  for _ in range(video_length):
    action, _ = model.predict(obs)
    obs, _, _, _ = eval_env.step(action)

  eval_env.close()


def show_videos(video_path, prefix):
  html = []
  for mp4 in Path(video_path).glob("{}*.mp4".format(prefix)):
      video_b64 = base64.b64encode(mp4.read_bytes())
      html.append('''<video alt="{}" autoplay 
                    loop controls style="height: 400px;">
                    <source src="data:video/mp4;base64,{}" type="video/mp4" />
                </video>'''.format(mp4, video_b64.decode('ascii')))
  ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))

#Train

In [None]:
def run(timesteps): 

  model.learn(timesteps)
  model.save('drive/MyDrive/result/' + model_id + str(timesteps))

  mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=100)
  print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

  record_video(env_id, model, video_length=video_length, prefix=model_id, video_folder=video_folder)
  show_videos(video_path=video_folder, prefix=timesteps)

In [None]:
%load_ext tensorboard
%tensorboard --logdir {log_path}

In [None]:
run(timesteps = 2000000) # 1e6

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
| policy_loss             | -15.50317  |
| qf1_loss                | 0.151758   |
| qf2_loss                | 0.08672227 |
| time_elapsed            | 6530       |
| total timesteps         | 1035362    |
| value_loss              | 0.03610701 |
----------------------------------------
----------------------------------------
| current_lr              | 0.0003     |
| entropy                 | 1.053937   |
| ep_rewmean              | 258        |
| episodes                | 1184       |
| eplenmean               | 789        |
| fps                     | 158        |
| mean 100 episode reward | 258        |
| n_updates               | 1037525    |
| policy_loss             | -16.136055 |
| qf1_loss                | 0.14858082 |
| qf2_loss                | 0.14109492 |
| time_elapsed            | 6549       |
| total timesteps         | 1038524    |
| value_loss              | 0.1542914  |
---------------------------------



Saving video to  /content/drive/MyDrive/result/videos/SAC-step-0-to-step-5000.mp4


#References
1. https://github.com/hill-a/stable-baselines
2. https://stable-baselines.readthedocs.io/en/master/index.html
3. https://github.com/araffin/rl-baselines-zoo/tree/master/hyperparams
4. https://colab.research.google.com/drive/1kl7124cI89-yn-Stio8GiLpJV0jec6Kt

