In [1]:
!pip install stable-baselines3[extra]



In [2]:
import os
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

In [4]:
environ_name = 'CartPole-v0'
env = gym.make(environ_name) #github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py

In [11]:
episodes = 5
for episode in range(episodes):
    state = env.reset()
    done = False
    score = 0
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score += reward
    print(f'Episode: {episode}, Score: {score}')
env.close()

Episode: 0, Score: 17.0
Episode: 1, Score: 35.0
Episode: 2, Score: 19.0
Episode: 3, Score: 15.0
Episode: 4, Score: 19.0


In [12]:
env.action_space

Discrete(2)

In [13]:
env.observation_space

Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)

In [14]:
log_path = os.path.join('Training', 'Logs')

In [17]:
env = gym.make(environ_name)
env = DummyVecEnv([lambda: env])
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

Using cpu device


In [20]:
model.learn(total_timesteps=20000)

Logging to Training/Logs/PPO_2
-----------------------------
| time/              |      |
|    fps             | 3685 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 2289        |
|    iterations           | 2           |
|    time_elapsed         | 1           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.009074492 |
|    clip_fraction        | 0.0338      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.565      |
|    explained_variance   | 0.58        |
|    learning_rate        | 0.0003      |
|    loss                 | 44.2        |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.00338    |
|    value_loss           | 90.2        |
-----------------------------------------
---

<stable_baselines3.ppo.ppo.PPO at 0x7fb2a6fd0e20>

In [21]:
PPO_PATH = os.path.join('Training', 'Saved Models', 'PPO_Model_Cartpole')

In [22]:
model.save(PPO_PATH)



In [23]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)



(200.0, 0.0)

In [29]:
episodes = 5
for episode in range(episodes):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        env.render()
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        score += reward
    print(f'Episode: {episode}, Score: {score}')
env.close()

Episode: 0, Score: [200.]
Episode: 1, Score: [200.]
Episode: 2, Score: [200.]
Episode: 3, Score: [200.]
Episode: 4, Score: [163.]


In [31]:
!tensorboard --logdir={log_path}

TensorFlow installation not found - running with reduced feature set.

NOTE: Using experimental fast data loading logic. To disable, pass
    "--load_fast=false" and report issues on GitHub. More details:
    https://github.com/tensorflow/tensorboard/issues/4784

Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.5.0 at http://localhost:6006/ (Press CTRL+C to quit)
^C


In [32]:
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold

In [33]:
save_path =  os.path.join('Training', 'Saved Models')

In [34]:
stop_callback = StopTrainingOnRewardThreshold(reward_threshold=200, verbose=1)
eval_callback = EvalCallback(env, callback_on_new_best=stop_callback, eval_freq=10000, best_model_save_path=save_path, verbose=1)

In [35]:
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

Using cpu device


In [36]:
model.learn(total_timesteps=100000, callback=eval_callback)

Logging to Training/Logs/PPO_3
-----------------------------
| time/              |      |
|    fps             | 3624 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 2322        |
|    iterations           | 2           |
|    time_elapsed         | 1           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.007756236 |
|    clip_fraction        | 0.101       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.686      |
|    explained_variance   | -0.00883    |
|    learning_rate        | 0.0003      |
|    loss                 | 5.75        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0154     |
|    value_loss           | 51.5        |
-----------------------------------------
---

<stable_baselines3.ppo.ppo.PPO at 0x7fb2a5374c10>

In [39]:
net_arch = [dict(pi=[128,128,128,128], vf=[128,128,128,128])]

In [40]:
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path, policy_kwargs={'net_arch': net_arch})

Using cpu device


In [41]:
model.learn(total_timesteps=100000, callback=eval_callback)

Logging to Training/Logs/PPO_4
-----------------------------
| time/              |      |
|    fps             | 3101 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1735        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.013258992 |
|    clip_fraction        | 0.194       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.682      |
|    explained_variance   | -0.00131    |
|    learning_rate        | 0.0003      |
|    loss                 | 3.18        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0232     |
|    value_loss           | 20.1        |
-----------------------------------------
---

------------------------------------------
| time/                   |              |
|    fps                  | 1161         |
|    iterations           | 12           |
|    time_elapsed         | 21           |
|    total_timesteps      | 24576        |
| train/                  |              |
|    approx_kl            | 0.0031358013 |
|    clip_fraction        | 0.046        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.532       |
|    explained_variance   | 0.601        |
|    learning_rate        | 0.0003       |
|    loss                 | 17.8         |
|    n_updates            | 110          |
|    policy_gradient_loss | -0.00302     |
|    value_loss           | 91.5         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1160         |
|    iterations           | 13           |
|    time_elapsed         | 22           |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 1137         |
|    iterations           | 23           |
|    time_elapsed         | 41           |
|    total_timesteps      | 47104        |
| train/                  |              |
|    approx_kl            | 0.0021809763 |
|    clip_fraction        | 0.034        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.487       |
|    explained_variance   | 0.66         |
|    learning_rate        | 0.0003       |
|    loss                 | 7.71         |
|    n_updates            | 220          |
|    policy_gradient_loss | -0.00102     |
|    value_loss           | 77.2         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1135         |
|    iterations           | 24           |
|    time_elapsed         | 43           |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 1125        |
|    iterations           | 34          |
|    time_elapsed         | 61          |
|    total_timesteps      | 69632       |
| train/                  |             |
|    approx_kl            | 0.004273788 |
|    clip_fraction        | 0.033       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.393      |
|    explained_variance   | 0.221       |
|    learning_rate        | 0.0003      |
|    loss                 | 10.4        |
|    n_updates            | 330         |
|    policy_gradient_loss | -0.000919   |
|    value_loss           | 142         |
-----------------------------------------
Eval num_timesteps=70000, episode_reward=200.00 +/- 0.00
Episode length: 200.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 200          |
|    mean_reward          

-----------------------------------------
| time/                   |             |
|    fps                  | 1118        |
|    iterations           | 45          |
|    time_elapsed         | 82          |
|    total_timesteps      | 92160       |
| train/                  |             |
|    approx_kl            | 0.004671069 |
|    clip_fraction        | 0.0392      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.388      |
|    explained_variance   | 0.0772      |
|    learning_rate        | 0.0003      |
|    loss                 | 112         |
|    n_updates            | 440         |
|    policy_gradient_loss | -0.00273    |
|    value_loss           | 174         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1119        |
|    iterations           | 46          |
|    time_elapsed         | 84          |
|    total_timesteps      | 94208 

<stable_baselines3.ppo.ppo.PPO at 0x7fb2a5299be0>

In [42]:
from stable_baselines3 import DQN

In [44]:
model = DQN('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

Using cpu device


In [45]:
model.learn(total_timesteps=100000, callback=eval_callback)

Logging to Training/Logs/DQN_1
----------------------------------
| rollout/            |          |
|    exploration rate | 0.994    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 11723    |
|    time_elapsed     | 0        |
|    total timesteps  | 68       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.984    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 15306    |
|    time_elapsed     | 0        |
|    total timesteps  | 165      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.974    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 16018    |
|    time_elapsed     | 0        |
|    total timesteps  | 273      |
----------------------------------
------------------------

----------------------------------
| rollout/            |          |
|    exploration rate | 0.776    |
| time/               |          |
|    episodes         | 108      |
|    fps              | 17798    |
|    time_elapsed     | 0        |
|    total timesteps  | 2362     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.766    |
| time/               |          |
|    episodes         | 112      |
|    fps              | 17817    |
|    time_elapsed     | 0        |
|    total timesteps  | 2459     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.759    |
| time/               |          |
|    episodes         | 116      |
|    fps              | 17822    |
|    time_elapsed     | 0        |
|    total timesteps  | 2537     |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.544    |
| time/               |          |
|    episodes         | 216      |
|    fps              | 18075    |
|    time_elapsed     | 0        |
|    total timesteps  | 4800     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.533    |
| time/               |          |
|    episodes         | 220      |
|    fps              | 18108    |
|    time_elapsed     | 0        |
|    total timesteps  | 4911     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.524    |
| time/               |          |
|    episodes         | 224      |
|    fps              | 18095    |
|    time_elapsed     | 0        |
|    total timesteps  | 5012     |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.307    |
| time/               |          |
|    episodes         | 324      |
|    fps              | 18184    |
|    time_elapsed     | 0        |
|    total timesteps  | 7290     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.299    |
| time/               |          |
|    episodes         | 328      |
|    fps              | 18138    |
|    time_elapsed     | 0        |
|    total timesteps  | 7384     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.29     |
| time/               |          |
|    episodes         | 332      |
|    fps              | 18154    |
|    time_elapsed     | 0        |
|    total timesteps  | 7471     |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.0914   |
| time/               |          |
|    episodes         | 432      |
|    fps              | 17893    |
|    time_elapsed     | 0        |
|    total timesteps  | 9564     |
----------------------------------
Eval num_timesteps=9648, episode_reward=9.80 +/- 0.40
Episode length: 9.80 +/- 0.40
----------------------------------
| eval/               |          |
|    mean_ep_length   | 9.8      |
|    mean_reward      | 9.8      |
| rollout/            |          |
|    exploration rate | 0.0795   |
| time/               |          |
|    episodes         | 436      |
|    fps              | 17662    |
|    time_elapsed     | 0        |
|    total timesteps  | 9690     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.0721   |
| time/               |          |
|    episodes         | 440      |
|    f

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 536      |
|    fps              | 17679    |
|    time_elapsed     | 0        |
|    total timesteps  | 12041    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 540      |
|    fps              | 17670    |
|    time_elapsed     | 0        |
|    total timesteps  | 12108    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 544      |
|    fps              | 17672    |
|    time_elapsed     | 0        |
|    total timesteps  | 12208    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 644      |
|    fps              | 17780    |
|    time_elapsed     | 0        |
|    total timesteps  | 14518    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 648      |
|    fps              | 17782    |
|    time_elapsed     | 0        |
|    total timesteps  | 14610    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 652      |
|    fps              | 17786    |
|    time_elapsed     | 0        |
|    total timesteps  | 14688    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 752      |
|    fps              | 17863    |
|    time_elapsed     | 0        |
|    total timesteps  | 16829    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 756      |
|    fps              | 17854    |
|    time_elapsed     | 0        |
|    total timesteps  | 16883    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 760      |
|    fps              | 17863    |
|    time_elapsed     | 0        |
|    total timesteps  | 16992    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 860      |
|    fps              | 17867    |
|    time_elapsed     | 1        |
|    total timesteps  | 19210    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 864      |
|    fps              | 17866    |
|    time_elapsed     | 1        |
|    total timesteps  | 19293    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 868      |
|    fps              | 17872    |
|    time_elapsed     | 1        |
|    total timesteps  | 19409    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 964      |
|    fps              | 17712    |
|    time_elapsed     | 1        |
|    total timesteps  | 21474    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 968      |
|    fps              | 17715    |
|    time_elapsed     | 1        |
|    total timesteps  | 21563    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 972      |
|    fps              | 17712    |
|    time_elapsed     | 1        |
|    total timesteps  | 21650    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1072     |
|    fps              | 17679    |
|    time_elapsed     | 1        |
|    total timesteps  | 23753    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1076     |
|    fps              | 17690    |
|    time_elapsed     | 1        |
|    total timesteps  | 23871    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1080     |
|    fps              | 17699    |
|    time_elapsed     | 1        |
|    total timesteps  | 23972    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1180     |
|    fps              | 17665    |
|    time_elapsed     | 1        |
|    total timesteps  | 26307    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1184     |
|    fps              | 17656    |
|    time_elapsed     | 1        |
|    total timesteps  | 26374    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1188     |
|    fps              | 17654    |
|    time_elapsed     | 1        |
|    total timesteps  | 26452    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1288     |
|    fps              | 17675    |
|    time_elapsed     | 1        |
|    total timesteps  | 28654    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1292     |
|    fps              | 17670    |
|    time_elapsed     | 1        |
|    total timesteps  | 28708    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1296     |
|    fps              | 17674    |
|    time_elapsed     | 1        |
|    total timesteps  | 28799    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1392     |
|    fps              | 17590    |
|    time_elapsed     | 1        |
|    total timesteps  | 31082    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1396     |
|    fps              | 17591    |
|    time_elapsed     | 1        |
|    total timesteps  | 31175    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1400     |
|    fps              | 17595    |
|    time_elapsed     | 1        |
|    total timesteps  | 31273    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1500     |
|    fps              | 17636    |
|    time_elapsed     | 1        |
|    total timesteps  | 33535    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1504     |
|    fps              | 17634    |
|    time_elapsed     | 1        |
|    total timesteps  | 33595    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1508     |
|    fps              | 17647    |
|    time_elapsed     | 1        |
|    total timesteps  | 33749    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1608     |
|    fps              | 17642    |
|    time_elapsed     | 2        |
|    total timesteps  | 35815    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1612     |
|    fps              | 17642    |
|    time_elapsed     | 2        |
|    total timesteps  | 35904    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1616     |
|    fps              | 17643    |
|    time_elapsed     | 2        |
|    total timesteps  | 35986    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1716     |
|    fps              | 17653    |
|    time_elapsed     | 2        |
|    total timesteps  | 38055    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1720     |
|    fps              | 17657    |
|    time_elapsed     | 2        |
|    total timesteps  | 38131    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1724     |
|    fps              | 17661    |
|    time_elapsed     | 2        |
|    total timesteps  | 38227    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1820     |
|    fps              | 17654    |
|    time_elapsed     | 2        |
|    total timesteps  | 40445    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1824     |
|    fps              | 17654    |
|    time_elapsed     | 2        |
|    total timesteps  | 40553    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1828     |
|    fps              | 17656    |
|    time_elapsed     | 2        |
|    total timesteps  | 40672    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1928     |
|    fps              | 17677    |
|    time_elapsed     | 2        |
|    total timesteps  | 42860    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1932     |
|    fps              | 17676    |
|    time_elapsed     | 2        |
|    total timesteps  | 42942    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1936     |
|    fps              | 17663    |
|    time_elapsed     | 2        |
|    total timesteps  | 43029    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2036     |
|    fps              | 17672    |
|    time_elapsed     | 2        |
|    total timesteps  | 45388    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2040     |
|    fps              | 17673    |
|    time_elapsed     | 2        |
|    total timesteps  | 45454    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2044     |
|    fps              | 17674    |
|    time_elapsed     | 2        |
|    total timesteps  | 45526    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2144     |
|    fps              | 17704    |
|    time_elapsed     | 2        |
|    total timesteps  | 47790    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2148     |
|    fps              | 17706    |
|    time_elapsed     | 2        |
|    total timesteps  | 47903    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2152     |
|    fps              | 17706    |
|    time_elapsed     | 2        |
|    total timesteps  | 48001    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2244     |
|    fps              | 17504    |
|    time_elapsed     | 2        |
|    total timesteps  | 50062    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.593    |
|    n_updates        | 15       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2248     |
|    fps              | 17405    |
|    time_elapsed     | 2        |
|    total timesteps  | 50098    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.539    |
|    n_updates        | 24       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2320     |
|    fps              | 15863    |
|    time_elapsed     | 3        |
|    total timesteps  | 50802    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.199    |
|    n_updates        | 200      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2324     |
|    fps              | 15780    |
|    time_elapsed     | 3        |
|    total timesteps  | 50843    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.21     |
|    n_updates        | 210      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2396     |
|    fps              | 14495    |
|    time_elapsed     | 3        |
|    total timesteps  | 51526    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.022    |
|    n_updates        | 381      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2400     |
|    fps              | 14434    |
|    time_elapsed     | 3        |
|    total timesteps  | 51561    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0185   |
|    n_updates        | 390      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2472     |
|    fps              | 12432    |
|    time_elapsed     | 4        |
|    total timesteps  | 53097    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000413 |
|    n_updates        | 774      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2476     |
|    fps              | 12338    |
|    time_elapsed     | 4        |
|    total timesteps  | 53186    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00487  |
|    n_updates        | 796      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2548     |
|    fps              | 11256    |
|    time_elapsed     | 4        |
|    total timesteps  | 54344    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00193  |
|    n_updates        | 1085     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2552     |
|    fps              | 11195    |
|    time_elapsed     | 4        |
|    total timesteps  | 54402    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00376  |
|    n_updates        | 1100     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2624     |
|    fps              | 10516    |
|    time_elapsed     | 5        |
|    total timesteps  | 55240    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000183 |
|    n_updates        | 1309     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2628     |
|    fps              | 10490    |
|    time_elapsed     | 5        |
|    total timesteps  | 55278    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000775 |
|    n_updates        | 1319     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2700     |
|    fps              | 9977     |
|    time_elapsed     | 5        |
|    total timesteps  | 55980    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000462 |
|    n_updates        | 1494     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2704     |
|    fps              | 9952     |
|    time_elapsed     | 5        |
|    total timesteps  | 56017    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000285 |
|    n_updates        | 1504     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2776     |
|    fps              | 9509     |
|    time_elapsed     | 5        |
|    total timesteps  | 56723    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000295 |
|    n_updates        | 1680     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2780     |
|    fps              | 9487     |
|    time_elapsed     | 5        |
|    total timesteps  | 56764    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00306  |
|    n_updates        | 1690     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2852     |
|    fps              | 9100     |
|    time_elapsed     | 6        |
|    total timesteps  | 57452    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000375 |
|    n_updates        | 1862     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2856     |
|    fps              | 9080     |
|    time_elapsed     | 6        |
|    total timesteps  | 57492    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000638 |
|    n_updates        | 1872     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2928     |
|    fps              | 8739     |
|    time_elapsed     | 6        |
|    total timesteps  | 58181    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00218  |
|    n_updates        | 2045     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2932     |
|    fps              | 8719     |
|    time_elapsed     | 6        |
|    total timesteps  | 58221    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00174  |
|    n_updates        | 2055     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3004     |
|    fps              | 8393     |
|    time_elapsed     | 7        |
|    total timesteps  | 58924    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000153 |
|    n_updates        | 2230     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3008     |
|    fps              | 8379     |
|    time_elapsed     | 7        |
|    total timesteps  | 58965    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00202  |
|    n_updates        | 2241     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3080     |
|    fps              | 8115     |
|    time_elapsed     | 7        |
|    total timesteps  | 59646    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000281 |
|    n_updates        | 2411     |
----------------------------------
Eval num_timesteps=59648, episode_reward=9.20 +/- 1.17
Episode length: 9.20 +/- 1.17
----------------------------------
| eval/               |          |
|    mean_ep_length   | 9.2      |
|    mean_reward      | 9.2      |
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3084     |
|    fps              | 8088     |
|    time_elapsed     | 7        |
|    total timesteps  | 59689    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3152     |
|    fps              | 7824     |
|    time_elapsed     | 7        |
|    total timesteps  | 60400    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0204   |
|    n_updates        | 2599     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3156     |
|    fps              | 7809     |
|    time_elapsed     | 7        |
|    total timesteps  | 60436    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0737   |
|    n_updates        | 2608     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3228     |
|    fps              | 7561     |
|    time_elapsed     | 8        |
|    total timesteps  | 61156    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.017    |
|    n_updates        | 2788     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3232     |
|    fps              | 7549     |
|    time_elapsed     | 8        |
|    total timesteps  | 61196    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00297  |
|    n_updates        | 2798     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3304     |
|    fps              | 7337     |
|    time_elapsed     | 8        |
|    total timesteps  | 61892    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0209   |
|    n_updates        | 2972     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3308     |
|    fps              | 7327     |
|    time_elapsed     | 8        |
|    total timesteps  | 61932    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0209   |
|    n_updates        | 2982     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3380     |
|    fps              | 7138     |
|    time_elapsed     | 8        |
|    total timesteps  | 62615    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0339   |
|    n_updates        | 3153     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3384     |
|    fps              | 7131     |
|    time_elapsed     | 8        |
|    total timesteps  | 62652    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0223   |
|    n_updates        | 3162     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3456     |
|    fps              | 6936     |
|    time_elapsed     | 9        |
|    total timesteps  | 63354    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0583   |
|    n_updates        | 3338     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3460     |
|    fps              | 6924     |
|    time_elapsed     | 9        |
|    total timesteps  | 63396    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0145   |
|    n_updates        | 3348     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3532     |
|    fps              | 6754     |
|    time_elapsed     | 9        |
|    total timesteps  | 64088    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0309   |
|    n_updates        | 3521     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3536     |
|    fps              | 6744     |
|    time_elapsed     | 9        |
|    total timesteps  | 64126    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0237   |
|    n_updates        | 3531     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3608     |
|    fps              | 6589     |
|    time_elapsed     | 9        |
|    total timesteps  | 64812    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0287   |
|    n_updates        | 3702     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3612     |
|    fps              | 6581     |
|    time_elapsed     | 9        |
|    total timesteps  | 64853    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0119   |
|    n_updates        | 3713     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3684     |
|    fps              | 6442     |
|    time_elapsed     | 10       |
|    total timesteps  | 65538    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00181  |
|    n_updates        | 3884     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3688     |
|    fps              | 6434     |
|    time_elapsed     | 10       |
|    total timesteps  | 65576    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0472   |
|    n_updates        | 3893     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3760     |
|    fps              | 6297     |
|    time_elapsed     | 10       |
|    total timesteps  | 66271    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0345   |
|    n_updates        | 4067     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3764     |
|    fps              | 6290     |
|    time_elapsed     | 10       |
|    total timesteps  | 66313    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0288   |
|    n_updates        | 4078     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3836     |
|    fps              | 6158     |
|    time_elapsed     | 10       |
|    total timesteps  | 67000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0177   |
|    n_updates        | 4249     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3840     |
|    fps              | 6150     |
|    time_elapsed     | 10       |
|    total timesteps  | 67040    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00466  |
|    n_updates        | 4259     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3912     |
|    fps              | 6036     |
|    time_elapsed     | 11       |
|    total timesteps  | 67738    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.041    |
|    n_updates        | 4434     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3916     |
|    fps              | 6031     |
|    time_elapsed     | 11       |
|    total timesteps  | 67773    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0171   |
|    n_updates        | 4443     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3988     |
|    fps              | 5918     |
|    time_elapsed     | 11       |
|    total timesteps  | 68454    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0468   |
|    n_updates        | 4613     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 3992     |
|    fps              | 5911     |
|    time_elapsed     | 11       |
|    total timesteps  | 68495    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0406   |
|    n_updates        | 4623     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4064     |
|    fps              | 5806     |
|    time_elapsed     | 11       |
|    total timesteps  | 69184    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0129   |
|    n_updates        | 4795     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4068     |
|    fps              | 5799     |
|    time_elapsed     | 11       |
|    total timesteps  | 69221    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0534   |
|    n_updates        | 4805     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4136     |
|    fps              | 5705     |
|    time_elapsed     | 12       |
|    total timesteps  | 69882    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0162   |
|    n_updates        | 4970     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4140     |
|    fps              | 5698     |
|    time_elapsed     | 12       |
|    total timesteps  | 69922    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.034    |
|    n_updates        | 4980     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4212     |
|    fps              | 5598     |
|    time_elapsed     | 12       |
|    total timesteps  | 70652    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0775   |
|    n_updates        | 5162     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4216     |
|    fps              | 5592     |
|    time_elapsed     | 12       |
|    total timesteps  | 70697    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.108    |
|    n_updates        | 5174     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4288     |
|    fps              | 5495     |
|    time_elapsed     | 13       |
|    total timesteps  | 71453    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0464   |
|    n_updates        | 5363     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4292     |
|    fps              | 5489     |
|    time_elapsed     | 13       |
|    total timesteps  | 71493    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0502   |
|    n_updates        | 5373     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4364     |
|    fps              | 5395     |
|    time_elapsed     | 13       |
|    total timesteps  | 72224    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0302   |
|    n_updates        | 5555     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4368     |
|    fps              | 5391     |
|    time_elapsed     | 13       |
|    total timesteps  | 72266    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.008    |
|    n_updates        | 5566     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4440     |
|    fps              | 5306     |
|    time_elapsed     | 13       |
|    total timesteps  | 72984    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0196   |
|    n_updates        | 5745     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4444     |
|    fps              | 5300     |
|    time_elapsed     | 13       |
|    total timesteps  | 73023    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0959   |
|    n_updates        | 5755     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4516     |
|    fps              | 5228     |
|    time_elapsed     | 14       |
|    total timesteps  | 73725    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0861   |
|    n_updates        | 5931     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4520     |
|    fps              | 5225     |
|    time_elapsed     | 14       |
|    total timesteps  | 73766    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.121    |
|    n_updates        | 5941     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4592     |
|    fps              | 5152     |
|    time_elapsed     | 14       |
|    total timesteps  | 74496    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0625   |
|    n_updates        | 6123     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4596     |
|    fps              | 5147     |
|    time_elapsed     | 14       |
|    total timesteps  | 74534    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0697   |
|    n_updates        | 6133     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4668     |
|    fps              | 5082     |
|    time_elapsed     | 14       |
|    total timesteps  | 75240    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0198   |
|    n_updates        | 6309     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4672     |
|    fps              | 5079     |
|    time_elapsed     | 14       |
|    total timesteps  | 75279    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0465   |
|    n_updates        | 6319     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4744     |
|    fps              | 5019     |
|    time_elapsed     | 15       |
|    total timesteps  | 75961    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0795   |
|    n_updates        | 6490     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4748     |
|    fps              | 5016     |
|    time_elapsed     | 15       |
|    total timesteps  | 76000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0245   |
|    n_updates        | 6499     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4820     |
|    fps              | 4946     |
|    time_elapsed     | 15       |
|    total timesteps  | 76705    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0634   |
|    n_updates        | 6676     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4824     |
|    fps              | 4942     |
|    time_elapsed     | 15       |
|    total timesteps  | 76743    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.103    |
|    n_updates        | 6685     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4896     |
|    fps              | 4880     |
|    time_elapsed     | 15       |
|    total timesteps  | 77463    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0108   |
|    n_updates        | 6865     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4900     |
|    fps              | 4877     |
|    time_elapsed     | 15       |
|    total timesteps  | 77501    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0324   |
|    n_updates        | 6875     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4972     |
|    fps              | 4822     |
|    time_elapsed     | 16       |
|    total timesteps  | 78191    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.019    |
|    n_updates        | 7047     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 4976     |
|    fps              | 4819     |
|    time_elapsed     | 16       |
|    total timesteps  | 78232    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0429   |
|    n_updates        | 7057     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5048     |
|    fps              | 4751     |
|    time_elapsed     | 16       |
|    total timesteps  | 78926    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0946   |
|    n_updates        | 7231     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5052     |
|    fps              | 4748     |
|    time_elapsed     | 16       |
|    total timesteps  | 78965    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0201   |
|    n_updates        | 7241     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

Eval num_timesteps=79648, episode_reward=9.40 +/- 0.49
Episode length: 9.40 +/- 0.49
----------------------------------
| eval/               |          |
|    mean_ep_length   | 9.4      |
|    mean_reward      | 9.4      |
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5124     |
|    fps              | 4691     |
|    time_elapsed     | 16       |
|    total timesteps  | 79666    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0914   |
|    n_updates        | 7416     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5128     |
|    fps              | 4687     |
|    time_elapsed     | 17       |
|    total timesteps  | 79702    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5196     |
|    fps              | 4633     |
|    time_elapsed     | 17       |
|    total timesteps  | 80350    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.116    |
|    n_updates        | 7587     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5200     |
|    fps              | 4630     |
|    time_elapsed     | 17       |
|    total timesteps  | 80389    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0474   |
|    n_updates        | 7597     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5272     |
|    fps              | 4548     |
|    time_elapsed     | 17       |
|    total timesteps  | 81178    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0206   |
|    n_updates        | 7794     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5276     |
|    fps              | 4540     |
|    time_elapsed     | 17       |
|    total timesteps  | 81223    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.102    |
|    n_updates        | 7805     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5348     |
|    fps              | 4478     |
|    time_elapsed     | 18       |
|    total timesteps  | 82018    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.146    |
|    n_updates        | 8004     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5352     |
|    fps              | 4476     |
|    time_elapsed     | 18       |
|    total timesteps  | 82063    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0983   |
|    n_updates        | 8015     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5424     |
|    fps              | 4299     |
|    time_elapsed     | 19       |
|    total timesteps  | 84871    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0176   |
|    n_updates        | 8717     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5428     |
|    fps              | 4271     |
|    time_elapsed     | 20       |
|    total timesteps  | 85476    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.105    |
|    n_updates        | 8868     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5496     |
|    fps              | 3989     |
|    time_elapsed     | 22       |
|    total timesteps  | 91674    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0682   |
|    n_updates        | 10418    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 5500     |
|    fps              | 3977     |
|    time_elapsed     | 23       |
|    total timesteps  | 91992    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0491   |
|    n_updates        | 10497    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

<stable_baselines3.dqn.dqn.DQN at 0x7fb2a52483a0>

In [46]:
import gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os

In [51]:
!python -m atari_py.import_roms ./ROMS

copying asterix.bin from ./ROMS/Asterix (AKA Taz) (1984) (Atari, Jerome Domurat, Steve Woita) (CX2696).bin to /home/vinny/anaconda3/lib/python3.8/site-packages/atari_py/atari_roms/asterix.bin
copying space_invaders.bin from ./ROMS/Space Invaders (1980) (Atari, Richard Maurer - Sears) (CX2632 - 49-75153) ~.bin to /home/vinny/anaconda3/lib/python3.8/site-packages/atari_py/atari_roms/space_invaders.bin
copying surround.bin from ./ROMS/Surround - Chase (Blockade) (1977) (Atari, Alan Miller - Sears) (CX2641 - 99807, 49-75105) ~.bin to /home/vinny/anaconda3/lib/python3.8/site-packages/atari_py/atari_roms/surround.bin
copying venture.bin from ./ROMS/Venture (1982) (Coleco, Joseph Biel) (2457) ~.bin to /home/vinny/anaconda3/lib/python3.8/site-packages/atari_py/atari_roms/venture.bin
copying kung_fu_master.bin from ./ROMS/Kung-Fu Master (1987) (Activision - Imagineering, Dan Kitchen, Garry Kitchen) (AG-039-04) ~.bin to /home/vinny/anaconda3/lib/python3.8/site-packages/atari_py/atari_roms/ku

copying gopher.bin from ./ROMS/Gopher (Gopher Attack) (1982) (U.S. Games Corporation - JWDA, Sylvia Day, Todd Marshall, Robin McDaniel, Henry Will IV) (VC2001) ~.bin to /home/vinny/anaconda3/lib/python3.8/site-packages/atari_py/atari_roms/gopher.bin
copying jamesbond.bin from ./ROMS/James Bond 007 (James Bond Agent 007) (1984) (Parker Brothers - On-Time Software, Joe Gaucher, Louis Marbel) (PB5110) ~.bin to /home/vinny/anaconda3/lib/python3.8/site-packages/atari_py/atari_roms/jamesbond.bin
copying video_pinball.bin from ./ROMS/Video Pinball - Arcade Pinball (1981) (Atari, Bob Smith - Sears) (CX2648 - 49-75161) ~.bin to /home/vinny/anaconda3/lib/python3.8/site-packages/atari_py/atari_roms/video_pinball.bin
copying gravitar.bin from ./ROMS/Gravitar (1983) (Atari, Dan Hitchens, Mimi Nyden) (CX2685) ~.bin to /home/vinny/anaconda3/lib/python3.8/site-packages/atari_py/atari_roms/gravitar.bin
copying berzerk.bin from ./ROMS/Berzerk (1982) (Atari, Dan Hitchens - Sears) (CX2650 - 49-75168) 

In [59]:
env = gym.make('Breakout-v0')

In [60]:
env.observation_space

Box(0, 255, (210, 160, 3), uint8)

In [61]:
episodes = 5
for episode in range(episodes):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score += reward
    print(f'Episode: {episode}, Score: {score}')
env.close()

Episode: 0, Score: 4.0
Episode: 1, Score: 0.0
Episode: 2, Score: 3.0
Episode: 3, Score: 1.0
Episode: 4, Score: 1.0


In [62]:
env.close()

In [66]:
env = make_atari_env('Breakout-v0', n_envs=8, seed=0)
env = VecFrameStack(env, n_stack=4)

In [67]:
log_path = os.path.join('Training', 'Logs')
model = A2C('CnnPolicy', env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [68]:
model.learn(total_timesteps=100000)

Logging to Training/Logs/A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 270      |
|    ep_rew_mean        | 1.39     |
| time/                 |          |
|    fps                | 300      |
|    iterations         | 100      |
|    time_elapsed       | 13       |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | 0.471    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.0386   |
|    value_loss         | 0.0678   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 281      |
|    ep_rew_mean        | 1.55     |
| time/                 |          |
|    fps                | 308      |
|    iterations         | 200      |
|    time_elapsed       | 25       |
|    total_timesteps    | 8000     |
| train

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 439      |
|    ep_rew_mean        | 5.09     |
| time/                 |          |
|    fps                | 339      |
|    iterations         | 1400     |
|    time_elapsed       | 165      |
|    total_timesteps    | 56000    |
| train/                |          |
|    entropy_loss       | -0.695   |
|    explained_variance | 0.966    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1399     |
|    policy_loss        | -0.0395  |
|    value_loss         | 0.0224   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 448      |
|    ep_rew_mean        | 5.17     |
| time/                 |          |
|    fps                | 340      |
|    iterations         | 1500     |
|    time_elapsed       | 176      |
|    total_timesteps    | 60000    |
| train/                |          |
|

<stable_baselines3.a2c.a2c.A2C at 0x7fb2943bf700>

In [69]:
a2c_path = os.path.join('Training', 'Saved Models', 'A2C_Breakout_Model')
model.save(a2c_path)

In [70]:
del model
model = A2C.load(a2c_path, env)

Wrapping the env in a VecTransposeImage.


In [71]:
env = make_atari_env('Breakout-v0', n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)

In [73]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)
env.close()

In [75]:
!pip install gym[box2d] pyglet

Collecting box2d-py~=2.3.5; extra == "box2d"
  Using cached box2d-py-2.3.8.tar.gz (374 kB)
Building wheels for collected packages: box2d-py
  Building wheel for box2d-py (setup.py) ... [?25ldone
[?25h  Created wheel for box2d-py: filename=box2d_py-2.3.8-cp38-cp38-linux_x86_64.whl size=3085491 sha256=d7b460384aa8716c18ff732cc8f92e29056de77b5f5326155402d5b4ee43c6ef
  Stored in directory: /home/vinny/.cache/pip/wheels/cc/4f/d6/44eb0a9e6fea384e58f19cb0c4125e46a23af2b33fe3a7e81c
Successfully built box2d-py
Installing collected packages: box2d-py
Successfully installed box2d-py-2.3.8


In [76]:
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import os

In [77]:
env = gym.make('CarRacing-v0')



In [79]:
env.reset()
env.close()

Track generation: 1167..1463 -> 296-tiles track


In [80]:
env.action_space

Box(-1.0, 1.0, (3,), float32)

In [81]:
env.observation_space

Box(0, 255, (96, 96, 3), uint8)

In [82]:
episodes = 5
for episode in range(episodes):
    obs = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score += reward
    print(f'Episode: {episode}, Score: {score}')
env.close()

Track generation: 1145..1435 -> 290-tiles track
Episode: 0, Score: -34.25605536332229
Track generation: 1151..1443 -> 292-tiles track


KeyboardInterrupt: 

In [84]:
env = gym.make('CarRacing-v0')
env = DummyVecEnv([lambda: env])

In [85]:
log_path = os.path.join('Training', 'Logs')
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [89]:
model.learn(total_timesteps=100000)

Track generation: 1178..1475 -> 297-tiles track
Logging to Training/Logs/PPO_6
Track generation: 1392..1744 -> 352-tiles track
Track generation: 1016..1274 -> 258-tiles track
-----------------------------
| time/              |      |
|    fps             | 66   |
|    iterations      | 1    |
|    time_elapsed    | 30   |
|    total_timesteps | 2048 |
-----------------------------
Track generation: 1310..1641 -> 331-tiles track
Track generation: 1232..1544 -> 312-tiles track
-----------------------------------------
| time/                   |             |
|    fps                  | 59          |
|    iterations           | 2           |
|    time_elapsed         | 68          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.018439773 |
|    clip_fraction        | 0.14        |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.2        |
|    explained_variance   | 0.264       |
|    learning_r

KeyboardInterrupt: 

In [87]:
env.close()

In [1]:
import gym
from gym import Env
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete

import numpy as np
import random
import os

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

In [15]:
Discrete(3).sample()

2

In [16]:
Box(0,1,shape=(3,3)).sample()

array([[0.52564174, 0.47778586, 0.9179523 ],
       [0.39811847, 0.282433  , 0.1081292 ],
       [0.18681692, 0.96648556, 0.21933803]], dtype=float32)

In [17]:
Tuple((Discrete(3), Box(0,1,shape=(3,)))).sample()

(0, array([0.24881668, 0.6202386 , 0.67852414], dtype=float32))

In [18]:
Dict({'height':Discrete(2), 'speed':Box(0,100,shape=(1,))}).sample()

OrderedDict([('height', 1), ('speed', array([64.11167], dtype=float32))])

In [22]:
MultiBinary(4).sample()

array([1, 0, 1, 0], dtype=int8)

In [23]:
MultiDiscrete([5,2,2]).sample()

array([2, 1, 1])

In [24]:
# Building our own env
# make an agent to give us the best shower possible
# random temp
# between 37 and 39 degrees is perfect

In [26]:
class ShowerEnv(Env):
    def __init__(self):
        self.action_space = Discrete(3)
        self.observation_space = Box(low=0, high=100, shape=(1,))
        self.state = 38 + random.randint(-6,6)
        self.shower_length = 60
        
    def step(self, action):
        self.state += action-1
        self.shower_length -= 1
        
        if self.state >= 37 and self.state <= 39:
            reward = 1
        else:
            reward = -1
            
        if self.shower_length <= 0:
            done = True
        else:
            done = False
            
        info = {}
        
        return self.state, reward, done, info
        
    def render(self):
        pass
    def reset(self):
        self.state = 38 + random.randint(-6,6)
        self.shower_length = 60
        return self.state

In [27]:
env = ShowerEnv()

In [28]:
env.observation_space.sample()

array([75.097824], dtype=float32)

In [29]:
env.action_space.sample()

2

In [30]:
episodes = 5
for episode in range(episodes):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score += reward
    print(f'Episode: {episode}, Score: {score}')
env.close()

Episode: 0, Score: -36
Episode: 1, Score: -46
Episode: 2, Score: -42
Episode: 3, Score: -46
Episode: 4, Score: -32


In [31]:
log_path = os.path.join('Training', 'Logs')
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [35]:
model.learn(total_timesteps=40000)

Logging to Training/Logs/PPO_10
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 60       |
|    ep_rew_mean     | 40.1     |
| time/              |          |
|    fps             | 3878     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 44.3        |
| time/                   |             |
|    fps                  | 2411        |
|    iterations           | 2           |
|    time_elapsed         | 1           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.010472085 |
|    clip_fraction        | 0.125       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.821      |
|    explained_variance   | 0.000419    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 46.2        |
| time/                   |             |
|    fps                  | 1857        |
|    iterations           | 11          |
|    time_elapsed         | 12          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.012441677 |
|    clip_fraction        | 0.158       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.805      |
|    explained_variance   | 0.000145    |
|    learning_rate        | 0.0003      |
|    loss                 | 29.3        |
|    n_updates            | 520         |
|    policy_gradient_loss | 0.00581     |
|    value_loss           | 81          |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

<stable_baselines3.ppo.ppo.PPO at 0x7fb28ef55ac0>

In [37]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

ValueError: Error: Unexpected observation shape () for Box environment, please use (1,) or (n_env, 1) for the observation shape.