In [1]:
from opt.mc_sim import *
from common.variables import *
from sim.sim_functions import *
import torch
import gym
from gym import spaces
from gym import Env
from gym.spaces import Discrete, Box, MultiDiscrete, Tuple, MultiBinary

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
s = SourcingEnv()

In [3]:
class CustomGymEnv(Env):
    #metadata = {'render.modes': ['human']}  
    
    def __init__(self, sourcing_env):
        self.SourcingEnv = sourcing_env
        self.counter = 0
        
        # Actions we can take, down, stay, up
        self.action_space = MultiDiscrete([30,30])

        # Inventory Observation State
        self.observation_space = Box(low=np.array([-30, 0, 0, 0, 0]), high=np.array([30, 30, 30, 1, 1]), shape=(5,), dtype=int)
        #Tuple(Box(-30,30,shape=(1,), dtype=int), Discrete(30), Discrete(30), MultiBinary(2))
                                       
    
    def step(self, action):        
        reward = self.reward_func(self.SourcingEnv.current_state, action)
        next_state, event, i, event_probs, supplier_index = self.SourcingEnv.step(action)
        self.counter += 1
        
        info = {}
        
        if self.counter < PERIODS:
            done = False
        else:
            done = True
        
        next_state_array = np.array(next_state.get_list_repr())
        return next_state_array, reward, done, info
    
    def reset(self):
        self.SourcingEnv = SourcingEnv()
        return np.array(self.SourcingEnv.current_state.get_list_repr())
        
    def reward_func(self, state, action):
        reward_hb = H_COST * state.s if state.s >= 0 else  -B_PENALTY * state.s 
        reward = reward_hb + np.sum(np.multiply(action, PROCUREMENT_COST_VEC))
        reward = float(reward)
        return -reward
    
    #def render(self):
        # Implement viz
       # pass

In [4]:
s = SourcingEnv()

In [5]:
custom_gym_env = CustomGymEnv(s)

In [6]:
m_state = custom_gym_env.SourcingEnv.current_state
m_state.get_list_repr()

[0, 0, 0, 1, 1]

In [7]:
str(m_state)

'Stock: 0, n backorders: [0. 0.], supplier status (on/off): [1. 1.]'

In [8]:
from stable_baselines3.common.env_checker import check_env

In [9]:
check_env(custom_gym_env) #warn=True)

In [10]:
episodes = 15
for episode in range(1, episodes+1):
    state = custom_gym_env.reset()
    done = False
    cost = 0
    
    
    while not done:
        #env.render()
        action = custom_gym_env.action_space.sample()
        n_state, reward, done, info = custom_gym_env.step(action)
        cost+=reward
        observation = custom_gym_env.step(action)
    print('Episode:{} Cost:{} Observation {}' .format(episode, cost, observation))
    

custom_gym_env.close()

Episode:1 Cost:-1055.0 Observation (array([23, 23,  8,  1,  1]), -1055.0, True, {})
Episode:2 Cost:-1275.0 Observation (array([26, 26, 42,  1,  1]), -1275.0, True, {})
Episode:3 Cost:-1110.0 Observation (array([23, 23, 30,  1,  1]), -1110.0, True, {})
Episode:4 Cost:-1195.0 Observation (array([26, 26, 10,  1,  1]), -1195.0, True, {})
Episode:5 Cost:-780.0 Observation (array([17, 17,  6,  1,  1]), -780.0, True, {})
Episode:6 Cost:-105.0 Observation (array([-2,  2, 24,  1,  1]), -115.0, True, {})
Episode:7 Cost:-810.0 Observation (array([27, 30, 27,  1,  1]), -810.0, True, {})
Episode:8 Cost:0.0 Observation (array([-1,  0,  0,  0,  1]), -10.0, True, {})
Episode:9 Cost:-660.0 Observation (array([14, 14, 12,  1,  1]), -660.0, True, {})
Episode:10 Cost:-540.0 Observation (array([12, 12,  0,  1,  1]), -540.0, True, {})
Episode:11 Cost:-115.0 Observation (array([ 1,  2, 10,  1,  1]), -125.0, True, {})
Episode:12 Cost:-515.0 Observation (array([13, 20, 13,  1,  1]), -515.0, True, {})
Episode:1

In [11]:
custom_gym_env.observation_space

Box([-30   0   0   0   0], [30 30 30  1  1], (5,), int64)

In [12]:
import os
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy

In [13]:
log_path = os.path.join('Training', 'Logs')

In [25]:
model = PPO("MlpPolicy", custom_gym_env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [26]:
model.learn(total_timesteps=100000)

Logging to Training/Logs/PPO_6
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -750     |
| time/              |          |
|    fps             | 756      |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 100      |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -438      |
| time/                   |           |
|    fps                  | 807       |
|    iterations           | 2         |
|    time_elapsed         | 0         |
|    total_timesteps      | 200       |
| train/                  |           |
|    approx_kl            | 0.5092664 |
|    clip_fraction        | 0.798     |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.61     |
|    explained_variance   | 0         |
|    learning_rate        | 0

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 865       |
|    iterations           | 12        |
|    time_elapsed         | 1         |
|    total_timesteps      | 1200      |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.89e-05 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 4.85      |
|    n_updates            | 110       |
|    policy_gradient_loss | 0.975     |
|    value_loss           | 8.49      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 867       |
|    iterations           | 22        |
|    time_elapsed         | 2         |
|    total_timesteps      | 2200      |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.93e-05 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0439    |
|    n_updates            | 210       |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.121     |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 876       |
|    iterations           | 32        |
|    time_elapsed         | 3         |
|    total_timesteps      | 3200      |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.01e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0435    |
|    n_updates            | 310       |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.121     |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 886       |
|    iterations           | 42        |
|    time_elapsed         | 4         |
|    total_timesteps      | 4200      |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.01e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 410       |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.121     |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 891       |
|    iterations           | 52        |
|    time_elapsed         | 5         |
|    total_timesteps      | 5200      |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.04e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0434    |
|    n_updates            | 510       |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.121     |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 895       |
|    iterations           | 62        |
|    time_elapsed         | 6         |
|    total_timesteps      | 6200      |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.03e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0434    |
|    n_updates            | 610       |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.121     |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 72        |
|    time_elapsed         | 8         |
|    total_timesteps      | 7200      |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.17e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0434    |
|    n_updates            | 710       |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.121     |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 900       |
|    iterations           | 82        |
|    time_elapsed         | 9         |
|    total_timesteps      | 8200      |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.36e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.789     |
|    n_updates            | 810       |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.121     |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 897       |
|    iterations           | 92        |
|    time_elapsed         | 10        |
|    total_timesteps      | 9200      |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.45e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0434    |
|    n_updates            | 910       |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.121     |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 894       |
|    iterations           | 102       |
|    time_elapsed         | 11        |
|    total_timesteps      | 10200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.35e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0434    |
|    n_updates            | 1010      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 891       |
|    iterations           | 112       |
|    time_elapsed         | 12        |
|    total_timesteps      | 11200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.42e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.703    |
|    n_updates            | 1110      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 889       |
|    iterations           | 122       |
|    time_elapsed         | 13        |
|    total_timesteps      | 12200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.16e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.789     |
|    n_updates            | 1210      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 887       |
|    iterations           | 132       |
|    time_elapsed         | 14        |
|    total_timesteps      | 13200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.96e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0434    |
|    n_updates            | 1310      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 885       |
|    iterations           | 142       |
|    time_elapsed         | 16        |
|    total_timesteps      | 14200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.85e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 1410      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 884       |
|    iterations           | 152       |
|    time_elapsed         | 17        |
|    total_timesteps      | 15200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.69e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0435    |
|    n_updates            | 1510      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 883       |
|    iterations           | 162       |
|    time_elapsed         | 18        |
|    total_timesteps      | 16200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.97e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.789     |
|    n_updates            | 1610      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 884       |
|    iterations           | 172       |
|    time_elapsed         | 19        |
|    total_timesteps      | 17200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.16e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 1710      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| ti

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 886       |
|    iterations           | 182       |
|    time_elapsed         | 20        |
|    total_timesteps      | 18200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.16e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.789     |
|    n_updates            | 1810      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 887       |
|    iterations           | 192       |
|    time_elapsed         | 21        |
|    total_timesteps      | 19200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.29e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0435    |
|    n_updates            | 1910      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 888       |
|    iterations           | 202       |
|    time_elapsed         | 22        |
|    total_timesteps      | 20200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.14e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0435    |
|    n_updates            | 2010      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 889       |
|    iterations           | 212       |
|    time_elapsed         | 23        |
|    total_timesteps      | 21200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.04e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0435    |
|    n_updates            | 2110      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 890       |
|    iterations           | 222       |
|    time_elapsed         | 24        |
|    total_timesteps      | 22200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.93e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0435    |
|    n_updates            | 2210      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 890       |
|    iterations           | 232       |
|    time_elapsed         | 26        |
|    total_timesteps      | 23200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.81e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 2310      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 891       |
|    iterations           | 242       |
|    time_elapsed         | 27        |
|    total_timesteps      | 24200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.62e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0436    |
|    n_updates            | 2410      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 891       |
|    iterations           | 252       |
|    time_elapsed         | 28        |
|    total_timesteps      | 25200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.61e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0436    |
|    n_updates            | 2510      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 891       |
|    iterations           | 262       |
|    time_elapsed         | 29        |
|    total_timesteps      | 26200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.44e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 2610      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| time/                   |          |
|    fps                  | 892      |
|    iterations           | 272      |
|    time_elapsed         | 30       |
|    total_timesteps      | 27200    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -5.6e-07 |
|    explained_variance   | nan      |
|    learning_rate        | 0.1      |
|    loss                 | -0.702   |
|    n_updates            | 2710     |
|    policy_gradient_loss | -0.746   |
|    value_loss           | 0.12     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 892       |
|    iterations           | 282       |
|    time_elapsed         | 31        |
|    total_timesteps      | 28200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.43e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 2810      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 892       |
|    iterations           | 292       |
|    time_elapsed         | 32        |
|    total_timesteps      | 29200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.51e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0436    |
|    n_updates            | 2910      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 893       |
|    iterations           | 302       |
|    time_elapsed         | 33        |
|    total_timesteps      | 30200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.24e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0436    |
|    n_updates            | 3010      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 893       |
|    iterations           | 312       |
|    time_elapsed         | 34        |
|    total_timesteps      | 31200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.33e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 3110      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| time/                   |          |
|    fps                  | 894      |
|    iterations           | 322      |
|    time_elapsed         | 36       |
|    total_timesteps      | 32200    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -5.6e-07 |
|    explained_variance   | nan      |
|    learning_rate        | 0.1      |
|    loss                 | 0.0437   |
|    n_updates            | 3210     |
|    policy_gradient_loss | 0        |
|    value_loss           | 0.12     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 894       |
|    iterations           | 332       |
|    time_elapsed         | 37        |
|    total_timesteps      | 33200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.73e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0437    |
|    n_updates            | 3310      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 895       |
|    iterations           | 342       |
|    time_elapsed         | 38        |
|    total_timesteps      | 34200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.72e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0437    |
|    n_updates            | 3410      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 895       |
|    iterations           | 352       |
|    time_elapsed         | 39        |
|    total_timesteps      | 35200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.59e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 3510      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 895       |
|    iterations           | 362       |
|    time_elapsed         | 40        |
|    total_timesteps      | 36200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.61e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0437    |
|    n_updates            | 3610      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 895       |
|    iterations           | 372       |
|    time_elapsed         | 41        |
|    total_timesteps      | 37200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.61e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0437    |
|    n_updates            | 3710      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 895       |
|    iterations           | 382       |
|    time_elapsed         | 42        |
|    total_timesteps      | 38200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.62e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0437    |
|    n_updates            | 3810      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| time/                   |          |
|    fps                  | 896      |
|    iterations           | 392      |
|    time_elapsed         | 43       |
|    total_timesteps      | 39200    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -5.6e-07 |
|    explained_variance   | nan      |
|    learning_rate        | 0.1      |
|    loss                 | 0.79     |
|    n_updates            | 3910     |
|    policy_gradient_loss | 0.746    |
|    value_loss           | 0.12     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 896       |
|    iterations           | 402       |
|    time_elapsed         | 44        |
|    total_timesteps      | 40200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.61e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 4010      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| time/                   |          |
|    fps                  | 896      |
|    iterations           | 412      |
|    time_elapsed         | 45       |
|    total_timesteps      | 41200    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -5.6e-07 |
|    explained_variance   | nan      |
|    learning_rate        | 0.1      |
|    loss                 | 0.0438   |
|    n_updates            | 4110     |
|    policy_gradient_loss | 0        |
|    value_loss           | 0.12     |
--------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| time/                  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 897       |
|    iterations           | 422       |
|    time_elapsed         | 47        |
|    total_timesteps      | 42200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.52e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 4210      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 897       |
|    iterations           | 432       |
|    time_elapsed         | 48        |
|    total_timesteps      | 43200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.71e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0438    |
|    n_updates            | 4310      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 897       |
|    iterations           | 442       |
|    time_elapsed         | 49        |
|    total_timesteps      | 44200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.81e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 4410      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 897       |
|    iterations           | 452       |
|    time_elapsed         | 50        |
|    total_timesteps      | 45200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.74e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 4510      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 462       |
|    time_elapsed         | 51        |
|    total_timesteps      | 46200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.61e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 4610      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 472       |
|    time_elapsed         | 52        |
|    total_timesteps      | 47200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.55e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 4710      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 482       |
|    time_elapsed         | 53        |
|    total_timesteps      | 48200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.47e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0439    |
|    n_updates            | 4810      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 492       |
|    time_elapsed         | 54        |
|    total_timesteps      | 49200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.58e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 4910      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| time/                   |          |
|    fps                  | 898      |
|    iterations           | 502      |
|    time_elapsed         | 55       |
|    total_timesteps      | 50200    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -5.8e-07 |
|    explained_variance   | nan      |
|    learning_rate        | 0.1      |
|    loss                 | 0.0439   |
|    n_updates            | 5010     |
|    policy_gradient_loss | 0        |
|    value_loss           | 0.12     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 512       |
|    time_elapsed         | 56        |
|    total_timesteps      | 51200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.81e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 5110      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 522       |
|    time_elapsed         | 58        |
|    total_timesteps      | 52200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.31e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 5210      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 532       |
|    time_elapsed         | 59        |
|    total_timesteps      | 53200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.21e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.044     |
|    n_updates            | 5310      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 542       |
|    time_elapsed         | 60        |
|    total_timesteps      | 54200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.53e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 5410      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 552       |
|    time_elapsed         | 61        |
|    total_timesteps      | 55200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.99e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 5510      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 562       |
|    time_elapsed         | 62        |
|    total_timesteps      | 56200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.71e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 5610      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 572       |
|    time_elapsed         | 63        |
|    total_timesteps      | 57200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.02e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.044     |
|    n_updates            | 5710      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 582       |
|    time_elapsed         | 64        |
|    total_timesteps      | 58200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.21e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0441    |
|    n_updates            | 5810      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 592       |
|    time_elapsed         | 65        |
|    total_timesteps      | 59200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.39e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 5910      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 602       |
|    time_elapsed         | 67        |
|    total_timesteps      | 60200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.82e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 6010      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| time/                   |          |
|    fps                  | 898      |
|    iterations           | 612      |
|    time_elapsed         | 68       |
|    total_timesteps      | 61200    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -7e-07   |
|    explained_variance   | nan      |
|    learning_rate        | 0.1      |
|    loss                 | 0.79     |
|    n_updates            | 6110     |
|    policy_gradient_loss | 0.746    |
|    value_loss           | 0.12     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 622       |
|    time_elapsed         | 69        |
|    total_timesteps      | 62200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.64e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 6210      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 632       |
|    time_elapsed         | 70        |
|    total_timesteps      | 63200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.06e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0441    |
|    n_updates            | 6310      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 897       |
|    iterations           | 642       |
|    time_elapsed         | 71        |
|    total_timesteps      | 64200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.83e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 6410      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| ti

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 652       |
|    time_elapsed         | 72        |
|    total_timesteps      | 65200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.62e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0442    |
|    n_updates            | 6510      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| ti

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 662       |
|    time_elapsed         | 73        |
|    total_timesteps      | 66200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.21e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 6610      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 672       |
|    time_elapsed         | 74        |
|    total_timesteps      | 67200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.12e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 6710      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 682       |
|    time_elapsed         | 75        |
|    total_timesteps      | 68200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.79e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 6810      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 692       |
|    time_elapsed         | 76        |
|    total_timesteps      | 69200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.91e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0442    |
|    n_updates            | 6910      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 898       |
|    iterations           | 702       |
|    time_elapsed         | 78        |
|    total_timesteps      | 70200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.13e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0442    |
|    n_updates            | 7010      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 899       |
|    iterations           | 712       |
|    time_elapsed         | 79        |
|    total_timesteps      | 71200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.33e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 7110      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 899       |
|    iterations           | 722       |
|    time_elapsed         | 80        |
|    total_timesteps      | 72200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.55e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0443    |
|    n_updates            | 7210      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 899       |
|    iterations           | 732       |
|    time_elapsed         | 81        |
|    total_timesteps      | 73200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.02e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 7310      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 899       |
|    iterations           | 742       |
|    time_elapsed         | 82        |
|    total_timesteps      | 74200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.24e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0443    |
|    n_updates            | 7410      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 899       |
|    iterations           | 752       |
|    time_elapsed         | 83        |
|    total_timesteps      | 75200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.31e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 7510      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 899       |
|    iterations           | 762       |
|    time_elapsed         | 84        |
|    total_timesteps      | 76200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.89e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 7610      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 899       |
|    iterations           | 772       |
|    time_elapsed         | 85        |
|    total_timesteps      | 77200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -9.85e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0443    |
|    n_updates            | 7710      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 899       |
|    iterations           | 782       |
|    time_elapsed         | 86        |
|    total_timesteps      | 78200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.87e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0443    |
|    n_updates            | 7810      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 899       |
|    iterations           | 792       |
|    time_elapsed         | 88        |
|    total_timesteps      | 79200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.43e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 7910      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 900       |
|    iterations           | 802       |
|    time_elapsed         | 89        |
|    total_timesteps      | 80200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.46e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 8010      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 900       |
|    iterations           | 812       |
|    time_elapsed         | 90        |
|    total_timesteps      | 81200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.02e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 8110      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 900       |
|    iterations           | 822       |
|    time_elapsed         | 91        |
|    total_timesteps      | 82200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.67e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.702    |
|    n_updates            | 8210      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 900       |
|    iterations           | 832       |
|    time_elapsed         | 92        |
|    total_timesteps      | 83200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.87e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0444    |
|    n_updates            | 8310      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 900       |
|    iterations           | 842       |
|    time_elapsed         | 93        |
|    total_timesteps      | 84200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.05e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0444    |
|    n_updates            | 8410      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| time/                   |          |
|    fps                  | 900      |
|    iterations           | 852      |
|    time_elapsed         | 94       |
|    total_timesteps      | 85200    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -7.2e-07 |
|    explained_variance   | nan      |
|    learning_rate        | 0.1      |
|    loss                 | 0.0444   |
|    n_updates            | 8510     |
|    policy_gradient_loss | 0        |
|    value_loss           | 0.12     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 900       |
|    iterations           | 862       |
|    time_elapsed         | 95        |
|    total_timesteps      | 86200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.66e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0444    |
|    n_updates            | 8610      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 900       |
|    iterations           | 872       |
|    time_elapsed         | 96        |
|    total_timesteps      | 87200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.66e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0445    |
|    n_updates            | 8710      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 900       |
|    iterations           | 882       |
|    time_elapsed         | 97        |
|    total_timesteps      | 88200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.43e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0445    |
|    n_updates            | 8810      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 900       |
|    iterations           | 892       |
|    time_elapsed         | 99        |
|    total_timesteps      | 89200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.66e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0445    |
|    n_updates            | 8910      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 900       |
|    iterations           | 902       |
|    time_elapsed         | 100       |
|    total_timesteps      | 90200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.63e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.701    |
|    n_updates            | 9010      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 900       |
|    iterations           | 912       |
|    time_elapsed         | 101       |
|    total_timesteps      | 91200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.99e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.79      |
|    n_updates            | 9110      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| ti

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 900       |
|    iterations           | 922       |
|    time_elapsed         | 102       |
|    total_timesteps      | 92200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.64e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0445    |
|    n_updates            | 9210      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 901       |
|    iterations           | 932       |
|    time_elapsed         | 103       |
|    total_timesteps      | 93200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.56e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.701    |
|    n_updates            | 9310      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| ti

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 901       |
|    iterations           | 942       |
|    time_elapsed         | 104       |
|    total_timesteps      | 94200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.58e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0446    |
|    n_updates            | 9410      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -5       |
| ti

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 901       |
|    iterations           | 952       |
|    time_elapsed         | 105       |
|    total_timesteps      | 95200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.76e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0446    |
|    n_updates            | 9510      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 901       |
|    iterations           | 962       |
|    time_elapsed         | 106       |
|    total_timesteps      | 96200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.58e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | -0.701    |
|    n_updates            | 9610      |
|    policy_gradient_loss | -0.746    |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 901       |
|    iterations           | 972       |
|    time_elapsed         | 107       |
|    total_timesteps      | 97200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.27e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0446    |
|    n_updates            | 9710      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 901       |
|    iterations           | 982       |
|    time_elapsed         | 108       |
|    total_timesteps      | 98200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.43e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.0446    |
|    n_updates            | 9810      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |
| time/                   |           |
|    fps                  | 901       |
|    iterations           | 992       |
|    time_elapsed         | 110       |
|    total_timesteps      | 99200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.31e-07 |
|    explained_variance   | nan       |
|    learning_rate        | 0.1       |
|    loss                 | 0.791     |
|    n_updates            | 9910      |
|    policy_gradient_loss | 0.746     |
|    value_loss           | 0.12      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1         |
|    ep_rew_mean          | -5        |


<stable_baselines3.ppo.ppo.PPO at 0x7f45b8257a30>

In [None]:
model = PPO("MlpPolicy", custom_gym_env, verbose=1, tensorboard_log=log_path, learning_rate=0.1, device='auto', batch_size=10, n_steps=10)

In [None]:
model.learn(total_timesteps=100000)

In [None]:
model = PPO("MlpPolicy", custom_gym_env, verbose=1, tensorboard_log=log_path, learning_rate=0.1, device='auto', batch_size=100, n_steps=100)

In [None]:
model.learn(total_timesteps=100000)

In [None]:
model = PPO("MlpPolicy", custom_gym_env, verbose=1, tensorboard_log=log_path, learning_rate=0.1, device='auto', batch_size=1000, n_steps=1000)

In [None]:
model.learn(total_timesteps=100000)

In [None]:
model = PPO("MlpPolicy", custom_gym_env, verbose=1, tensorboard_log=log_path, learning_rate=0.001, device='auto', batch_size=10, n_steps=10)

In [None]:
model.learn(total_timesteps=100000)

In [None]:
model = PPO("MlpPolicy", custom_gym_env, verbose=1, tensorboard_log=log_path, learning_rate=0.001, device='auto', batch_size=100, n_steps=100)

In [None]:
model.learn(total_timesteps=100000)

In [None]:
model = PPO("MlpPolicy", custom_gym_env, verbose=1, tensorboard_log=log_path, learning_rate=0.001, device='auto', batch_size=1000, n_steps=1000)

In [None]:
model.learn(total_timesteps=100000)

In [None]:
model = PPO("MlpPolicy", custom_gym_env, verbose=1, tensorboard_log=log_path, learning_rate=0.00001, device='auto', batch_size=10, n_steps=10)

In [None]:
model.learn(total_timesteps=100000)

In [None]:
model = PPO("MlpPolicy", custom_gym_env, verbose=1, tensorboard_log=log_path, learning_rate=0.00001, device='auto', batch_size=100, n_steps=100)

In [None]:
model.learn(total_timesteps=100000)

In [None]:
model = PPO("MlpPolicy", custom_gym_env, verbose=1, tensorboard_log=log_path, learning_rate=0.00001, device='auto', batch_size=1000, n_steps=1000)

In [None]:
model.learn(total_timesteps=100000)

In [16]:
model.save('PPO')

In [17]:
evaluate_policy(model, custom_gym_env, n_eval_episodes=10, render=False)



(-1055.0, 0.0)

In [18]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu
