<a href="https://colab.research.google.com/github/whitestones011/deep_learning/blob/master/rl_qym_baselines3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reinforcement learning with Gym and Baselines3 Zoo

**Cart pole**

A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces in the left and right direction on the cart.

In [1]:
%%capture
# install dependencies needed for recording videos
!apt-get install -y xvfb x11-utils
!pip install pyvirtualdisplay==0.2.*
# install gym and baselines3
!pip install gym
!pip install rl_zoo3

In [3]:
import os
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

In [4]:
# Virtual display
import glob
import io
from base64 import b64encode
from IPython.display import HTML
from pyvirtualdisplay import Display
from IPython import display as ipythondisplay
from gym.wrappers.monitoring.video_recorder import VideoRecorder

In [7]:
display = Display(visible=0, size=(1400, 900))
_ = display.start()

In [72]:
def show_video(videopath=None):
  if not videopath:
    mp4list = glob.glob('video/*.mp4')
    if len(mp4list) > 0:
        videopath = mp4list[0]
    else:
      print("Could not find video")

  video = io.open(videopath, 'rb').read()

  base64_encoded_mp4 = b64encode(video).decode('ascii')
  ipythondisplay.display(
      HTML(
          data='''
          <video alt="test" autoplay controls style="width: 400px; height: 200px;" id="theVideo">
            <source src="data:video/mp4;base64,{0}" type="video/mp4" />
          </video>
          <script>
          video = document.getElementById("theVideo")
          video.playbackRate = 0.25;
          </script>
          '''.format(base64_encoded_mp4)
          )
    )

In [15]:
# !mkdir video

In [8]:
env = gym.make('CartPole-v1',render_mode="rgb_array")

In [79]:
video_path = 'video/carpole.mp4'
video = VideoRecorder(env, video_path)

# returns an initial observation
env.reset()

trials = 10

for trial in range(trials):

  env.render()
  video.capture_frame()

  observation, info = env.reset()
  score, terminated = 0, False

  while not terminated:
      action = env.action_space.sample()  # agent policy that uses the observation and info
      observation, reward, terminated, truncated, info = env.step(action)
      score = score + reward
      env.render()

      if terminated or truncated:
          observation, info = env.reset()

  print('Trial', trial, score)

video.close()
env.close()

Trial 0 38.0
Trial 1 14.0
Trial 2 34.0
Trial 3 10.0
Trial 4 14.0
Trial 5 21.0
Trial 6 13.0
Trial 7 31.0
Trial 8 13.0
Trial 9 24.0
Moviepy - Building video video/carpole.mp4.
Moviepy - Writing video video/carpole.mp4



                                                   

Moviepy - Done !
Moviepy - video ready video/carpole.mp4




In [73]:
show_video()

video/text.mp4
