In [28]:
# Importing data manipulation/visualization packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Import ML framework library
import torch as th
import torch.nn as nn
import torch.nn.init as init

# Importing gym packages
import gymnasium as gym
from gymnasium import spaces

# Importing IRL libraries
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3 import PPO

# Importing imitation library
from imitation.algorithms.adversarial.airl import AIRL
from imitation.util import util
from imitation.data import rollout
from imitation.data.wrappers import RolloutInfoWrapper
from imitation.util.util import make_vec_env
from imitation.rewards.reward_nets import BasicShapedRewardNet
from imitation.rewards.reward_nets import RewardNet
from imitation.util.networks import RunningNorm
from imitation.util import networks, util

# Import miscellaneuous packages
import random
from scipy.stats import norm
from scipy.stats import bernoulli
from torch.distributions import Categorical

In [29]:
# Setting the seed
SEED = 42
np.random.seed(SEED)
th.manual_seed(SEED)
if th.cuda.is_available():
    th.cuda.manual_seed_all(SEED)

In [30]:
# Arbitrary weights for the importance of the engagement level and section number
a, b = 1, 0.5

# Arbitary thresholds for what to what scores should be considered extremely interested, mildy interested, and not interested
theta_1 = 0.9
theta_2 = 0.4

# Define actions
ACTION_WAIT = 0
ACTION_READ_FREE = 1
ACTION_READ_PAY = 2

# Define sections
BEGINNING = 0
MIDDLE = 1
END = 2

# Gamma decay for hours waited (in days)
gamma = 0.1

# Number of chapters
NUM_CHAPTERS = 24

In [None]:
# Helper function to calculate how interested an agent is
def interest_score (engagement_level, section_number):
    norm_section_number = (section_number - 1) / (NUM_CHAPTERS - 1)
    return a * engagement_level + b * norm_section_number

In [None]:
# Helper function to draw from Bernoulli (in my case, want to draw to choose between 2 values)
def probabilistic_choice(options):
    actions, probabilities = zip(*options)
    return random.choices(actions, weights=probabilities, k=1)[0]

In [None]:
def find_section(chapter):
    if chapter <= 1/3 * NUM_CHAPTERS:
        return BEGINNING
    elif 1/3 * NUM_CHAPTERS < chapter <= 2/3 * NUM_CHAPTERS:
        return MIDDLE
    else:
        return END


In [None]:
def true_reward (state, action):
	
	section_number, engagement_level, time, price, wff, wff_hours_required, wff_hours_waited = state
	section = find_section(section_number)
	score = interest_score(engagement_level, section_number)
	
	if section == BEGINNING:
		if score > theta_1:
			if price == 0:
				if action == ACTION_READ_FREE:
					return 9
				else:
					return -1
		elif theta_2 < score < theta_1:
			if price == 0:
				if action == ACTION_READ_FREE:
					return 7
				else:
					return -1
		else:
			if price == 0:
				if action == ACTION_READ_FREE:
					return 5
				else:
					return -1
	elif section == MIDDLE:
		if wff:
			return 6 * (1 - gamma) ** (wff_hours_waited / 24)
		else:
			if score > theta_1:
				if price == 1:
					if action == ACTION_READ_PAY:
						return 10
					else:
						return -1
			elif theta_2 < score < theta_1:
				if price == 0:
					if action == ACTION_READ_PAY:
						return 8
					else:
						return -1
			else:
				if price == 0:
					if action == ACTION_READ_PAY:
						return 6
					else:
						return -1
	else:
		if score > theta_1:
			if price == 1:
				if action == ACTION_READ_PAY:
					return 11
				else:
					return -1
		elif theta_2 < score < theta_1:
			if price == 1:
				if action == ACTION_READ_PAY:
					return 9
				else:
					return -1
		else:
			if price == 1:
				if action == ACTION_READ_PAY:
					return 7
				else:
					return -1
	return -5

In [35]:
class EBookEnv(gym.Env):
    def __init__(self):
        super(EBookEnv).__init__()

        # state =  [section_number, engagement_level, time, price, wff, wff_hours_required, wff_hours_waited]
        self.observation_space = spaces.Box(low=0, high=1, shape=(7,), dtype=np.float32)

        # action = [wait, read_without_payment, read_with_payment]
        self.action_space = spaces.Discrete(3)

        self.state = None
        self.times_bought = 0

    def reset(self, seed=None, options=None):
        # Resetting the state so that the reader begins at section 1, at a 0.8 engagement level, time 0, price 0, wff 0, and the wff hours stuff 0
        self.state = np.array([1, 0.8, 0, 0, 0, 0, 0], dtype=np.float32)
        self.times_bought = 0
        return self.state, {}

    def step(self, action):
        # Calculate the reward
        reward = true_reward(self.state, action)

        section_number, engagement_level, time, price, wff, wff_hours_required, wff_hours_waited = self.state
        section = find_section(section_number)

        if section == BEGINNING:
            if action == ACTION_READ_FREE:
                section_number += 1
                time = 0
            engagement_level = norm.cdf(np.random.normal(0, 1)) # Draw the engagement index from a N~(0, 1) and normalize to [0, 1]
            if find_section(section_number) == BEGINNING:
                price = 0
            else:
                price = 1
        elif section == MIDDLE:
            if wff == 0:
                if action == ACTION_READ_PAY:
                    section_number += 1
                    if find_section(section_number) == MIDDLE:
                        wff = np.random.choice([0, 1], p=[0.5, 0.5])
                        if wff:
                            wff_hours_required = np.random.randint(1, 4)
                            wff_hours_waited = 0
                    engagement_level = norm.cdf(np.random.normal(0, 1)) # Draw the engagement index from a N~(0, 1) and normalize to [0, 1]
                    time = 0
                price = 1
                time += 1
            else:
                if ACTION_READ_PAY:
                    section_number += 1
                    wff = 0
                    wff_hours_required = 0
                    wff_hours_waited = 0
                    time += 1
                    engagement_level = norm.cdf(np.random.normal(0, 1)) # Draw the engagement index from a N~(0, 1) and normalize to [0, 1]
                    price = 1
                else:
                    wff_hours_waited += 1
                    time += 1
                    if wff_hours_waited == wff_hours_required:
                        wff = 0
        else:
            if action == ACTION_READ_PAY:
                section_number += 1
                time = 0
            price = 1
            engagement_level = norm.cdf(np.random.normal(0, 1)) # Draw the engagement index from a N~(0, 1) and normalize to [0, 1]
            time += 1
        # Save the new state
        self.state = np.array([section_number, engagement_level, time, price, wff, wff_hours_required, wff_hours_waited], dtype=np.float32)

        # The terminating condition is when you reach the end of a book or if the time interval gets very large
        done = time >= 120 or section_number > NUM_CHAPTERS

        truncated = False
        info = {"obs": self.state, "rews": reward}
        return self.state, float(reward), done, truncated, info

    def render(self, mode='human'):
        pass


In [36]:
# Register the gym for compatability with OpenAI Gym
gym.register(id='EBookEnv-v1', entry_point=lambda: EBookEnv())
venv = util.make_vec_env("EBookEnv-v1", rng=np.random.default_rng(SEED), n_envs=1, post_wrappers=[lambda env, _: RolloutInfoWrapper(env)])

  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


In [37]:
expert = PPO("MlpPolicy", venv, verbose=1)
expert.learn(total_timesteps=500000)

Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 68.1     |
|    ep_rew_mean     | 81.8     |
| time/              |          |
|    fps             | 5073     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 61.1        |
|    ep_rew_mean          | 88.7        |
| time/                   |             |
|    fps                  | 3608        |
|    iterations           | 2           |
|    time_elapsed         | 1           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.013774436 |
|    clip_fraction        | 0.195       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.000417   |
|    learning

<stable_baselines3.ppo.ppo.PPO at 0x37991b880>

In [38]:
# Collect 500 trajectories of the expert behavior 
trajectories = rollout.rollout(
    expert,
    venv,
    rollout.make_sample_until(min_episodes=50000),
    rng=np.random.default_rng(SEED),
)


In [None]:
learner = PPO(
    env=venv,
    policy=MlpPolicy,
    batch_size=64,
    ent_coef=0.0,
    learning_rate=0.0005,
    gamma=0.95,
    clip_range=0.1,
    vf_coef=0.1,
    n_epochs=5,
    seed=SEED,
)

"""
    Initialize a reward network. 
"""
reward_net = BasicShapedRewardNet(
    observation_space=venv.observation_space,
    action_space=venv.action_space,
    normalize_input_layer=RunningNorm,
)

# Intialize parameters for AIRL model
airl_trainer = AIRL(
    demonstrations=trajectories,
    demo_batch_size=2048,
    gen_replay_buffer_capacity=512,
    n_disc_updates_per_round=16,
    venv=venv,
    gen_algo=learner,
    reward_net=reward_net,
    allow_variable_horizon=True
)

Running with `allow_variable_horizon` set to True. Some algorithms are biased towards shorter or longer episodes, which may significantly confound results. Additionally, even unbiased algorithms can exploit the information leak from the termination condition, producing spuriously high performance. See https://imitation.readthedocs.io/en/latest/getting-started/variable-horizon.html for more information.


In [40]:
venv.seed(SEED)

airl_trainer.train(500000)  # Train for 2_000_000 steps to match expert by learning a reward and a policy

round:   0%|          | 0/244 [00:00<?, ?it/s]

------------------------------------------
| raw/                        |          |
|    gen/rollout/ep_len_mean  | 67.6     |
|    gen/rollout/ep_rew_mean  | 80.5     |
|    gen/time/fps             | 2264     |
|    gen/time/iterations      | 1        |
|    gen/time/time_elapsed    | 0        |
|    gen/time/total_timesteps | 2048     |
------------------------------------------
--------------------------------------------------
| raw/                                |          |
|    disc/disc_acc                    | 0.5      |
|    disc/disc_acc_expert             | 1        |
|    disc/disc_acc_gen                | 0        |
|    disc/disc_entropy                | 0.565    |
|    disc/disc_loss                   | 0.813    |
|    disc/disc_proportion_expert_pred | 1        |
|    disc/disc_proportion_expert_true | 0.5      |
|    disc/global_step                 | 1        |
|    disc/n_expert                    | 2.05e+03 |
|    disc/n_generated                 | 2.05e+03 |
-

round:   0%|          | 1/244 [00:01<06:32,  1.62s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 66.4        |
|    gen/rollout/ep_rew_mean         | 82.8        |
|    gen/rollout/ep_rew_wrapped_mean | 55.8        |
|    gen/time/fps                    | 2271        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 4096        |
|    gen/train/approx_kl             | 0.004087215 |
|    gen/train/clip_fraction         | 0.275       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -1.09       |
|    gen/train/explained_variance    | -0.013      |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.585       |
|    gen/train/n_updates             | 5           |
|    gen/train/policy_gradient_loss  | -0.00756    |
|    gen/train/value_loss            | 21.5   

round:   1%|          | 2/244 [00:03<06:07,  1.52s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 65.7         |
|    gen/rollout/ep_rew_mean         | 82.4         |
|    gen/rollout/ep_rew_wrapped_mean | 18.3         |
|    gen/time/fps                    | 2354         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 6144         |
|    gen/train/approx_kl             | 0.0067637516 |
|    gen/train/clip_fraction         | 0.343        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -1.09        |
|    gen/train/explained_variance    | -3.6         |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.0921       |
|    gen/train/n_updates             | 10           |
|    gen/train/policy_gradient_loss  | -0.00851     |
|    gen/train/value_loss   

round:   1%|          | 3/244 [00:04<06:05,  1.52s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 66.4         |
|    gen/rollout/ep_rew_mean         | 79.9         |
|    gen/rollout/ep_rew_wrapped_mean | 1.62         |
|    gen/time/fps                    | 2290         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 8192         |
|    gen/train/approx_kl             | 0.0059800865 |
|    gen/train/clip_fraction         | 0.245        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -1.08        |
|    gen/train/explained_variance    | -0.338       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.0809       |
|    gen/train/n_updates             | 15           |
|    gen/train/policy_gradient_loss  | -0.0123      |
|    gen/train/value_loss   

round:   2%|▏         | 4/244 [00:06<05:55,  1.48s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 65.4         |
|    gen/rollout/ep_rew_mean         | 78           |
|    gen/rollout/ep_rew_wrapped_mean | -24.4        |
|    gen/time/fps                    | 2356         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 10240        |
|    gen/train/approx_kl             | 0.0065967073 |
|    gen/train/clip_fraction         | 0.331        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -1.07        |
|    gen/train/explained_variance    | 0.0901       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.379        |
|    gen/train/n_updates             | 20           |
|    gen/train/policy_gradient_loss  | -0.0176      |
|    gen/train/value_loss   

round:   2%|▏         | 5/244 [00:07<05:46,  1.45s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 63.3        |
|    gen/rollout/ep_rew_mean         | 77.7        |
|    gen/rollout/ep_rew_wrapped_mean | -41.3       |
|    gen/time/fps                    | 1985        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 1           |
|    gen/time/total_timesteps        | 12288       |
|    gen/train/approx_kl             | 0.005220633 |
|    gen/train/clip_fraction         | 0.319       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -1.06       |
|    gen/train/explained_variance    | 0.266       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.618       |
|    gen/train/n_updates             | 25          |
|    gen/train/policy_gradient_loss  | -0.0198     |
|    gen/train/value_loss            | 8.27   

round:   2%|▏         | 6/244 [00:09<06:43,  1.69s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 57           |
|    gen/rollout/ep_rew_mean         | 91.4         |
|    gen/rollout/ep_rew_wrapped_mean | -50.9        |
|    gen/time/fps                    | 2317         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 14336        |
|    gen/train/approx_kl             | 0.0046315575 |
|    gen/train/clip_fraction         | 0.363        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -1.04        |
|    gen/train/explained_variance    | 0.332        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 1.21         |
|    gen/train/n_updates             | 30           |
|    gen/train/policy_gradient_loss  | -0.0202      |
|    gen/train/value_loss   

round:   3%|▎         | 7/244 [00:11<06:20,  1.61s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 54.2         |
|    gen/rollout/ep_rew_mean         | 96.5         |
|    gen/rollout/ep_rew_wrapped_mean | -52.1        |
|    gen/time/fps                    | 2306         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 16384        |
|    gen/train/approx_kl             | 0.0043758126 |
|    gen/train/clip_fraction         | 0.366        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -1           |
|    gen/train/explained_variance    | 0.413        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 1.23         |
|    gen/train/n_updates             | 35           |
|    gen/train/policy_gradient_loss  | -0.0219      |
|    gen/train/value_loss   

round:   3%|▎         | 8/244 [00:12<06:03,  1.54s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 51           |
|    gen/rollout/ep_rew_mean         | 103          |
|    gen/rollout/ep_rew_wrapped_mean | -54.7        |
|    gen/time/fps                    | 2349         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 18432        |
|    gen/train/approx_kl             | 0.0047064973 |
|    gen/train/clip_fraction         | 0.367        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.97        |
|    gen/train/explained_variance    | 0.497        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.797        |
|    gen/train/n_updates             | 40           |
|    gen/train/policy_gradient_loss  | -0.0244      |
|    gen/train/value_loss   

round:   4%|▎         | 9/244 [00:14<06:21,  1.62s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 47.4         |
|    gen/rollout/ep_rew_mean         | 111          |
|    gen/rollout/ep_rew_wrapped_mean | -54.3        |
|    gen/time/fps                    | 1255         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 1            |
|    gen/time/total_timesteps        | 20480        |
|    gen/train/approx_kl             | 0.0051368657 |
|    gen/train/clip_fraction         | 0.389        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.927       |
|    gen/train/explained_variance    | 0.528        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 1.36         |
|    gen/train/n_updates             | 45           |
|    gen/train/policy_gradient_loss  | -0.0241      |
|    gen/train/value_loss   

round:   4%|▍         | 10/244 [00:16<07:24,  1.90s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 43.3        |
|    gen/rollout/ep_rew_mean         | 122         |
|    gen/rollout/ep_rew_wrapped_mean | -53.2       |
|    gen/time/fps                    | 1709        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 1           |
|    gen/time/total_timesteps        | 22528       |
|    gen/train/approx_kl             | 0.006085702 |
|    gen/train/clip_fraction         | 0.479       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.892      |
|    gen/train/explained_variance    | 0.535       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 1.65        |
|    gen/train/n_updates             | 50          |
|    gen/train/policy_gradient_loss  | -0.0273     |
|    gen/train/value_loss            | 16     

round:   5%|▍         | 11/244 [00:18<07:26,  1.92s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 39.1        |
|    gen/rollout/ep_rew_mean         | 128         |
|    gen/rollout/ep_rew_wrapped_mean | -47.5       |
|    gen/time/fps                    | 2263        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 24576       |
|    gen/train/approx_kl             | 0.009718313 |
|    gen/train/clip_fraction         | 0.483       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.832      |
|    gen/train/explained_variance    | 0.563       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 1.02        |
|    gen/train/n_updates             | 55          |
|    gen/train/policy_gradient_loss  | -0.0296     |
|    gen/train/value_loss            | 16.2   

round:   5%|▍         | 12/244 [00:20<07:27,  1.93s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 35.7        |
|    gen/rollout/ep_rew_mean         | 130         |
|    gen/rollout/ep_rew_wrapped_mean | -40         |
|    gen/time/fps                    | 2261        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 26624       |
|    gen/train/approx_kl             | 0.009187728 |
|    gen/train/clip_fraction         | 0.472       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.754      |
|    gen/train/explained_variance    | 0.587       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 1.65        |
|    gen/train/n_updates             | 60          |
|    gen/train/policy_gradient_loss  | -0.0282     |
|    gen/train/value_loss            | 15.7   

round:   5%|▌         | 13/244 [00:22<07:03,  1.83s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 31.8        |
|    gen/rollout/ep_rew_mean         | 138         |
|    gen/rollout/ep_rew_wrapped_mean | -32.7       |
|    gen/time/fps                    | 2209        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 28672       |
|    gen/train/approx_kl             | 0.014871398 |
|    gen/train/clip_fraction         | 0.486       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.656      |
|    gen/train/explained_variance    | 0.672       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 1.12        |
|    gen/train/n_updates             | 65          |
|    gen/train/policy_gradient_loss  | -0.0314     |
|    gen/train/value_loss            | 11.7   

round:   6%|▌         | 14/244 [00:23<06:47,  1.77s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 29.6         |
|    gen/rollout/ep_rew_mean         | 143          |
|    gen/rollout/ep_rew_wrapped_mean | -22.1        |
|    gen/time/fps                    | 2280         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 30720        |
|    gen/train/approx_kl             | 0.0144522935 |
|    gen/train/clip_fraction         | 0.319        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.535       |
|    gen/train/explained_variance    | 0.66         |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.811        |
|    gen/train/n_updates             | 70           |
|    gen/train/policy_gradient_loss  | -0.0248      |
|    gen/train/value_loss   

round:   6%|▌         | 15/244 [00:25<06:24,  1.68s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 28.5        |
|    gen/rollout/ep_rew_mean         | 144         |
|    gen/rollout/ep_rew_wrapped_mean | -16.7       |
|    gen/time/fps                    | 2301        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 32768       |
|    gen/train/approx_kl             | 0.010639688 |
|    gen/train/clip_fraction         | 0.307       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.439      |
|    gen/train/explained_variance    | 0.67        |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.753       |
|    gen/train/n_updates             | 75          |
|    gen/train/policy_gradient_loss  | -0.0251     |
|    gen/train/value_loss            | 6.98   

round:   7%|▋         | 16/244 [00:26<06:06,  1.61s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 26.8        |
|    gen/rollout/ep_rew_mean         | 147         |
|    gen/rollout/ep_rew_wrapped_mean | -13.8       |
|    gen/time/fps                    | 2230        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 34816       |
|    gen/train/approx_kl             | 0.007421992 |
|    gen/train/clip_fraction         | 0.294       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.359      |
|    gen/train/explained_variance    | 0.522       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.561       |
|    gen/train/n_updates             | 80          |
|    gen/train/policy_gradient_loss  | -0.0247     |
|    gen/train/value_loss            | 6.77   

round:   7%|▋         | 17/244 [00:28<05:55,  1.57s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 25.9        |
|    gen/rollout/ep_rew_mean         | 148         |
|    gen/rollout/ep_rew_wrapped_mean | -9.35       |
|    gen/time/fps                    | 2284        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 36864       |
|    gen/train/approx_kl             | 0.011435461 |
|    gen/train/clip_fraction         | 0.263       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.27       |
|    gen/train/explained_variance    | 0.273       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.701       |
|    gen/train/n_updates             | 85          |
|    gen/train/policy_gradient_loss  | -0.0246     |
|    gen/train/value_loss            | 5.83   

round:   7%|▋         | 18/244 [00:30<06:19,  1.68s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 25          |
|    gen/rollout/ep_rew_mean         | 150         |
|    gen/rollout/ep_rew_wrapped_mean | -7.48       |
|    gen/time/fps                    | 2305        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 38912       |
|    gen/train/approx_kl             | 0.020591624 |
|    gen/train/clip_fraction         | 0.152       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.18       |
|    gen/train/explained_variance    | 0.174       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.446       |
|    gen/train/n_updates             | 90          |
|    gen/train/policy_gradient_loss  | -0.0162     |
|    gen/train/value_loss            | 4.54   

round:   8%|▊         | 19/244 [00:31<06:00,  1.60s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24.4        |
|    gen/rollout/ep_rew_mean         | 149         |
|    gen/rollout/ep_rew_wrapped_mean | -4.25       |
|    gen/time/fps                    | 2317        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 40960       |
|    gen/train/approx_kl             | 0.011994448 |
|    gen/train/clip_fraction         | 0.0578      |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.107      |
|    gen/train/explained_variance    | 0.303       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.177       |
|    gen/train/n_updates             | 95          |
|    gen/train/policy_gradient_loss  | -0.01       |
|    gen/train/value_loss            | 2      

round:   8%|▊         | 20/244 [00:33<05:45,  1.54s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.2         |
|    gen/rollout/ep_rew_mean         | 149          |
|    gen/rollout/ep_rew_wrapped_mean | -2.15        |
|    gen/time/fps                    | 2321         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 43008        |
|    gen/train/approx_kl             | 0.0027149483 |
|    gen/train/clip_fraction         | 0.0153       |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.0729      |
|    gen/train/explained_variance    | 0.1          |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.0805       |
|    gen/train/n_updates             | 100          |
|    gen/train/policy_gradient_loss  | -0.00537     |
|    gen/train/value_loss   

round:   9%|▊         | 21/244 [00:34<05:39,  1.52s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.2         |
|    gen/rollout/ep_rew_mean         | 147          |
|    gen/rollout/ep_rew_wrapped_mean | -1.38        |
|    gen/time/fps                    | 2304         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 45056        |
|    gen/train/approx_kl             | 0.0011365565 |
|    gen/train/clip_fraction         | 0.0113       |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.0459      |
|    gen/train/explained_variance    | -0.000605    |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.134        |
|    gen/train/n_updates             | 105          |
|    gen/train/policy_gradient_loss  | -0.00368     |
|    gen/train/value_loss   

round:   9%|▉         | 22/244 [00:35<05:32,  1.50s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24.1          |
|    gen/rollout/ep_rew_mean         | 148           |
|    gen/rollout/ep_rew_wrapped_mean | -0.999        |
|    gen/time/fps                    | 2305          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 47104         |
|    gen/train/approx_kl             | 0.00045594145 |
|    gen/train/clip_fraction         | 0.00605       |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.0372       |
|    gen/train/explained_variance    | 0.127         |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.0234        |
|    gen/train/n_updates             | 110           |
|    gen/train/policy_gradient_loss  | -0.00286      |
|    gen/t

round:   9%|▉         | 23/244 [00:37<05:24,  1.47s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 148          |
|    gen/rollout/ep_rew_wrapped_mean | -0.706       |
|    gen/time/fps                    | 2298         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 49152        |
|    gen/train/approx_kl             | 0.0010327271 |
|    gen/train/clip_fraction         | 0.00586      |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.0246      |
|    gen/train/explained_variance    | 0.0757       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.0888       |
|    gen/train/n_updates             | 115          |
|    gen/train/policy_gradient_loss  | -0.0025      |
|    gen/train/value_loss   

round:  10%|▉         | 24/244 [00:39<05:46,  1.57s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24.1          |
|    gen/rollout/ep_rew_mean         | 150           |
|    gen/rollout/ep_rew_wrapped_mean | -0.451        |
|    gen/time/fps                    | 2238          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 51200         |
|    gen/train/approx_kl             | 0.00025990486 |
|    gen/train/clip_fraction         | 0.00215       |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.0182       |
|    gen/train/explained_variance    | -0.0593       |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00507       |
|    gen/train/n_updates             | 120           |
|    gen/train/policy_gradient_loss  | -0.0012       |
|    gen/t

round:  10%|█         | 25/244 [00:40<05:42,  1.57s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 151           |
|    gen/rollout/ep_rew_wrapped_mean | -0.312        |
|    gen/time/fps                    | 2297          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 53248         |
|    gen/train/approx_kl             | 0.00043834822 |
|    gen/train/clip_fraction         | 0.00322       |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.0126       |
|    gen/train/explained_variance    | 0.0264        |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00494       |
|    gen/train/n_updates             | 125           |
|    gen/train/policy_gradient_loss  | -0.00118      |
|    gen/t

round:  11%|█         | 26/244 [00:42<05:31,  1.52s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 151           |
|    gen/rollout/ep_rew_wrapped_mean | -0.323        |
|    gen/time/fps                    | 2308          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 55296         |
|    gen/train/approx_kl             | 0.00054093136 |
|    gen/train/clip_fraction         | 0.00176       |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00854      |
|    gen/train/explained_variance    | -0.0472       |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00749       |
|    gen/train/n_updates             | 130           |
|    gen/train/policy_gradient_loss  | -0.00112      |
|    gen/t

round:  11%|█         | 27/244 [00:43<05:24,  1.50s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.6         |
|    gen/rollout/ep_rew_mean         | 150          |
|    gen/rollout/ep_rew_wrapped_mean | -0.146       |
|    gen/time/fps                    | 2277         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 57344        |
|    gen/train/approx_kl             | 0.0043532536 |
|    gen/train/clip_fraction         | 0.022        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.0261      |
|    gen/train/explained_variance    | 0.107        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00169      |
|    gen/train/n_updates             | 135          |
|    gen/train/policy_gradient_loss  | 0.00615      |
|    gen/train/value_loss   

round:  11%|█▏        | 28/244 [00:45<05:27,  1.52s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.3         |
|    gen/rollout/ep_rew_mean         | 149          |
|    gen/rollout/ep_rew_wrapped_mean | -2.21        |
|    gen/time/fps                    | 2278         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 59392        |
|    gen/train/approx_kl             | 0.0058286423 |
|    gen/train/clip_fraction         | 0.0523       |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.0512      |
|    gen/train/explained_variance    | 0.0704       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.145        |
|    gen/train/n_updates             | 140          |
|    gen/train/policy_gradient_loss  | -0.00416     |
|    gen/train/value_loss   

round:  12%|█▏        | 29/244 [00:46<05:21,  1.50s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24          |
|    gen/rollout/ep_rew_mean         | 148         |
|    gen/rollout/ep_rew_wrapped_mean | -0.903      |
|    gen/time/fps                    | 2292        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 61440       |
|    gen/train/approx_kl             | 0.034620043 |
|    gen/train/clip_fraction         | 0.0436      |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.00907    |
|    gen/train/explained_variance    | 0.172       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.0236      |
|    gen/train/n_updates             | 145         |
|    gen/train/policy_gradient_loss  | -0.0117     |
|    gen/train/value_loss            | 0.291  

round:  12%|█▏        | 30/244 [00:48<05:48,  1.63s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 148           |
|    gen/rollout/ep_rew_wrapped_mean | 0.0276        |
|    gen/time/fps                    | 2299          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 63488         |
|    gen/train/approx_kl             | 1.9387226e-06 |
|    gen/train/clip_fraction         | 0.000488      |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00775      |
|    gen/train/explained_variance    | -0.766        |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00348       |
|    gen/train/n_updates             | 150           |
|    gen/train/policy_gradient_loss  | -0.000339     |
|    gen/t

round:  13%|█▎        | 31/244 [00:50<05:38,  1.59s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 25.8       |
|    gen/rollout/ep_rew_mean         | 147        |
|    gen/rollout/ep_rew_wrapped_mean | -0.246     |
|    gen/time/fps                    | 2244       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 65536      |
|    gen/train/approx_kl             | 0.03405232 |
|    gen/train/clip_fraction         | 0.0896     |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.0912    |
|    gen/train/explained_variance    | -0.228     |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | -0.0136    |
|    gen/train/n_updates             | 155        |
|    gen/train/policy_gradient_loss  | 0.151      |
|    gen/train/value_loss            | 0.0466     |
------------

round:  13%|█▎        | 32/244 [00:51<05:36,  1.59s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 25.2        |
|    gen/rollout/ep_rew_mean         | 145         |
|    gen/rollout/ep_rew_wrapped_mean | -11.4       |
|    gen/time/fps                    | 1530        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 1           |
|    gen/time/total_timesteps        | 67584       |
|    gen/train/approx_kl             | 0.036906727 |
|    gen/train/clip_fraction         | 0.219       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.156      |
|    gen/train/explained_variance    | 0.000638    |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 1.74        |
|    gen/train/n_updates             | 160         |
|    gen/train/policy_gradient_loss  | -0.019      |
|    gen/train/value_loss            | 14     

round:  14%|█▎        | 33/244 [00:53<05:54,  1.68s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24.4        |
|    gen/rollout/ep_rew_mean         | 147         |
|    gen/rollout/ep_rew_wrapped_mean | -5.32       |
|    gen/time/fps                    | 2320        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 69632       |
|    gen/train/approx_kl             | 0.049608663 |
|    gen/train/clip_fraction         | 0.0855      |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.052      |
|    gen/train/explained_variance    | 0.18        |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.563       |
|    gen/train/n_updates             | 165         |
|    gen/train/policy_gradient_loss  | -0.0139     |
|    gen/train/value_loss            | 3.76   

round:  14%|█▍        | 34/244 [00:54<05:33,  1.59s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24.1        |
|    gen/rollout/ep_rew_mean         | 154         |
|    gen/rollout/ep_rew_wrapped_mean | -1.05       |
|    gen/time/fps                    | 2317        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 71680       |
|    gen/train/approx_kl             | 0.010906193 |
|    gen/train/clip_fraction         | 0.0416      |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.0333     |
|    gen/train/explained_variance    | -0.183      |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.0423      |
|    gen/train/n_updates             | 170         |
|    gen/train/policy_gradient_loss  | -0.00674    |
|    gen/train/value_loss            | 0.909  

round:  14%|█▍        | 35/244 [00:56<05:19,  1.53s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 150          |
|    gen/rollout/ep_rew_wrapped_mean | -0.581       |
|    gen/time/fps                    | 2327         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 73728        |
|    gen/train/approx_kl             | 0.0031489688 |
|    gen/train/clip_fraction         | 0.00273      |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00721     |
|    gen/train/explained_variance    | -0.334       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.0534       |
|    gen/train/n_updates             | 175          |
|    gen/train/policy_gradient_loss  | -0.00315     |
|    gen/train/value_loss   

round:  15%|█▍        | 36/244 [00:58<05:32,  1.60s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 150          |
|    gen/rollout/ep_rew_wrapped_mean | -0.213       |
|    gen/time/fps                    | 2329         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 75776        |
|    gen/train/approx_kl             | 0.0001255683 |
|    gen/train/clip_fraction         | 0.000391     |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00401     |
|    gen/train/explained_variance    | 0.236        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.0116       |
|    gen/train/n_updates             | 180          |
|    gen/train/policy_gradient_loss  | -0.000387    |
|    gen/train/value_loss   

round:  15%|█▌        | 37/244 [00:59<05:21,  1.56s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 147           |
|    gen/rollout/ep_rew_wrapped_mean | -0.0502       |
|    gen/time/fps                    | 2336          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 77824         |
|    gen/train/approx_kl             | 1.5832484e-08 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00361      |
|    gen/train/explained_variance    | -0.634        |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00167       |
|    gen/train/n_updates             | 185           |
|    gen/train/policy_gradient_loss  | -1.45e-05     |
|    gen/t

round:  16%|█▌        | 38/244 [01:00<05:09,  1.50s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.1         |
|    gen/rollout/ep_rew_mean         | 147          |
|    gen/rollout/ep_rew_wrapped_mean | -0.202       |
|    gen/time/fps                    | 2330         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 79872        |
|    gen/train/approx_kl             | 0.0019277639 |
|    gen/train/clip_fraction         | 0.00762      |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.0103      |
|    gen/train/explained_variance    | -0.0057      |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.0129       |
|    gen/train/n_updates             | 190          |
|    gen/train/policy_gradient_loss  | -0.00091     |
|    gen/train/value_loss   

round:  16%|█▌        | 39/244 [01:02<05:00,  1.47s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 151          |
|    gen/rollout/ep_rew_wrapped_mean | -0.634       |
|    gen/time/fps                    | 2215         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 81920        |
|    gen/train/approx_kl             | 0.0112457145 |
|    gen/train/clip_fraction         | 0.018        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.004       |
|    gen/train/explained_variance    | -0.0259      |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00608      |
|    gen/train/n_updates             | 195          |
|    gen/train/policy_gradient_loss  | -0.00398     |
|    gen/train/value_loss   

round:  16%|█▋        | 40/244 [01:03<04:55,  1.45s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 147          |
|    gen/rollout/ep_rew_wrapped_mean | -0.255       |
|    gen/time/fps                    | 2339         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 83968        |
|    gen/train/approx_kl             | 0.0003709652 |
|    gen/train/clip_fraction         | 0.000391     |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00234     |
|    gen/train/explained_variance    | -0.435       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00403      |
|    gen/train/n_updates             | 200          |
|    gen/train/policy_gradient_loss  | -0.000366    |
|    gen/train/value_loss   

round:  17%|█▋        | 41/244 [01:05<04:49,  1.43s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 150           |
|    gen/rollout/ep_rew_wrapped_mean | -0.0249       |
|    gen/time/fps                    | 2331          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 86016         |
|    gen/train/approx_kl             | 0.00026501756 |
|    gen/train/clip_fraction         | 0.000391      |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00195      |
|    gen/train/explained_variance    | 0.0673        |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00588       |
|    gen/train/n_updates             | 205           |
|    gen/train/policy_gradient_loss  | -0.000298     |
|    gen/t

round:  17%|█▋        | 42/244 [01:06<05:06,  1.52s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 149          |
|    gen/rollout/ep_rew_wrapped_mean | -0.0896      |
|    gen/time/fps                    | 2342         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 88064        |
|    gen/train/approx_kl             | 9.444193e-08 |
|    gen/train/clip_fraction         | 0            |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00204     |
|    gen/train/explained_variance    | -3.44        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00114      |
|    gen/train/n_updates             | 210          |
|    gen/train/policy_gradient_loss  | -4.28e-05    |
|    gen/train/value_loss   

round:  18%|█▊        | 43/244 [01:08<04:56,  1.48s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 149          |
|    gen/rollout/ep_rew_wrapped_mean | -0.14        |
|    gen/time/fps                    | 2343         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 90112        |
|    gen/train/approx_kl             | 0.0005879389 |
|    gen/train/clip_fraction         | 0.000879     |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00165     |
|    gen/train/explained_variance    | -0.206       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00367      |
|    gen/train/n_updates             | 215          |
|    gen/train/policy_gradient_loss  | -0.000564    |
|    gen/train/value_loss   

round:  18%|█▊        | 44/244 [01:09<04:53,  1.47s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | -0.0201   |
|    gen/time/fps                    | 2333      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 92160     |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.00138  |
|    gen/train/explained_variance    | -0.137    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00103   |
|    gen/train/n_updates             | 220       |
|    gen/train/policy_gradient_loss  | -5.31e-06 |
|    gen/train/value_loss            | 0.016     |
-------------------------------

round:  18%|█▊        | 45/244 [01:10<04:47,  1.44s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 145           |
|    gen/rollout/ep_rew_wrapped_mean | -0.0611       |
|    gen/time/fps                    | 2314          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 94208         |
|    gen/train/approx_kl             | 5.2386895e-10 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00136      |
|    gen/train/explained_variance    | 0.0545        |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.000916      |
|    gen/train/n_updates             | 225           |
|    gen/train/policy_gradient_loss  | -2.99e-06     |
|    gen/t

round:  19%|█▉        | 46/244 [01:12<04:41,  1.42s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 149           |
|    gen/rollout/ep_rew_wrapped_mean | -0.0927       |
|    gen/time/fps                    | 2298          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 96256         |
|    gen/train/approx_kl             | 7.4214768e-09 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00149      |
|    gen/train/explained_variance    | 0.0126        |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00403       |
|    gen/train/n_updates             | 230           |
|    gen/train/policy_gradient_loss  | -7.44e-06     |
|    gen/t

round:  19%|█▉        | 47/244 [01:13<04:48,  1.47s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 151           |
|    gen/rollout/ep_rew_wrapped_mean | -0.0762       |
|    gen/time/fps                    | 2290          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 98304         |
|    gen/train/approx_kl             | 2.5436748e-08 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00147      |
|    gen/train/explained_variance    | -0.116        |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00157       |
|    gen/train/n_updates             | 235           |
|    gen/train/policy_gradient_loss  | -3.05e-05     |
|    gen/t

round:  20%|█▉        | 48/244 [01:16<05:38,  1.73s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 148       |
|    gen/rollout/ep_rew_wrapped_mean | -0.0301   |
|    gen/time/fps                    | 2275      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 100352    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.00137  |
|    gen/train/explained_variance    | 0.294     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00151   |
|    gen/train/n_updates             | 240       |
|    gen/train/policy_gradient_loss  | -5.45e-07 |
|    gen/train/value_loss            | 0.0193    |
-------------------------------

round:  20%|██        | 49/244 [01:17<05:21,  1.65s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 152       |
|    gen/rollout/ep_rew_wrapped_mean | -0.142    |
|    gen/time/fps                    | 2307      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 102400    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.00143  |
|    gen/train/explained_variance    | 0.114     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00235   |
|    gen/train/n_updates             | 245       |
|    gen/train/policy_gradient_loss  | -2.85e-07 |
|    gen/train/value_loss            | 0.0208    |
-------------------------------

round:  20%|██        | 50/244 [01:19<05:09,  1.59s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 152           |
|    gen/rollout/ep_rew_wrapped_mean | -0.144        |
|    gen/time/fps                    | 2282          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 104448        |
|    gen/train/approx_kl             | 0.00032810474 |
|    gen/train/clip_fraction         | 0.000391      |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00143      |
|    gen/train/explained_variance    | -0.0104       |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.0026        |
|    gen/train/n_updates             | 250           |
|    gen/train/policy_gradient_loss  | -0.000313     |
|    gen/t

round:  21%|██        | 51/244 [01:20<05:00,  1.56s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 150          |
|    gen/rollout/ep_rew_wrapped_mean | 0.0305       |
|    gen/time/fps                    | 2295         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 106496       |
|    gen/train/approx_kl             | 1.542503e-09 |
|    gen/train/clip_fraction         | 0            |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00117     |
|    gen/train/explained_variance    | 0.254        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00135      |
|    gen/train/n_updates             | 255          |
|    gen/train/policy_gradient_loss  | -2.06e-06    |
|    gen/train/value_loss   

round:  21%|██▏       | 52/244 [01:22<04:50,  1.51s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.1         |
|    gen/rollout/ep_rew_mean         | 147          |
|    gen/rollout/ep_rew_wrapped_mean | 0.115        |
|    gen/time/fps                    | 2309         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 108544       |
|    gen/train/approx_kl             | 0.0010990204 |
|    gen/train/clip_fraction         | 0.00713      |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00721     |
|    gen/train/explained_variance    | -0.0325      |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00192      |
|    gen/train/n_updates             | 260          |
|    gen/train/policy_gradient_loss  | -0.000915    |
|    gen/train/value_loss   

round:  22%|██▏       | 53/244 [01:23<04:42,  1.48s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24          |
|    gen/rollout/ep_rew_mean         | 148         |
|    gen/rollout/ep_rew_wrapped_mean | -0.567      |
|    gen/time/fps                    | 2191        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 110592      |
|    gen/train/approx_kl             | 0.008923505 |
|    gen/train/clip_fraction         | 0.00781     |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.00219    |
|    gen/train/explained_variance    | -0.0247     |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.163       |
|    gen/train/n_updates             | 265         |
|    gen/train/policy_gradient_loss  | -0.0026     |
|    gen/train/value_loss            | 0.616  

round:  22%|██▏       | 54/244 [01:25<05:05,  1.61s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 150          |
|    gen/rollout/ep_rew_wrapped_mean | -0.117       |
|    gen/time/fps                    | 2240         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 112640       |
|    gen/train/approx_kl             | 0.0004806472 |
|    gen/train/clip_fraction         | 0.000391     |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00118     |
|    gen/train/explained_variance    | -0.289       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00259      |
|    gen/train/n_updates             | 270          |
|    gen/train/policy_gradient_loss  | -0.000287    |
|    gen/train/value_loss   

round:  23%|██▎       | 55/244 [01:26<04:54,  1.56s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 152          |
|    gen/rollout/ep_rew_wrapped_mean | -0.16        |
|    gen/time/fps                    | 2306         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 114688       |
|    gen/train/approx_kl             | 0.0005180559 |
|    gen/train/clip_fraction         | 0.000391     |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00123     |
|    gen/train/explained_variance    | -0.00798     |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.0464       |
|    gen/train/n_updates             | 275          |
|    gen/train/policy_gradient_loss  | -0.000316    |
|    gen/train/value_loss   

round:  23%|██▎       | 56/244 [01:28<04:45,  1.52s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 148          |
|    gen/rollout/ep_rew_wrapped_mean | 0.0106       |
|    gen/time/fps                    | 2147         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 116736       |
|    gen/train/approx_kl             | 6.220653e-07 |
|    gen/train/clip_fraction         | 0            |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00191     |
|    gen/train/explained_variance    | 0.0196       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00444      |
|    gen/train/n_updates             | 280          |
|    gen/train/policy_gradient_loss  | -2.15e-05    |
|    gen/train/value_loss   

round:  23%|██▎       | 57/244 [01:29<04:42,  1.51s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 149           |
|    gen/rollout/ep_rew_wrapped_mean | -0.0411       |
|    gen/time/fps                    | 2316          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 118784        |
|    gen/train/approx_kl             | 3.1199306e-08 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00069      |
|    gen/train/explained_variance    | -0.348        |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.0035        |
|    gen/train/n_updates             | 285           |
|    gen/train/policy_gradient_loss  | -5.03e-05     |
|    gen/t

round:  24%|██▍       | 58/244 [01:31<04:34,  1.48s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | -0.223    |
|    gen/time/fps                    | 2313      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 120832    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000536 |
|    gen/train/explained_variance    | -0.485    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00141   |
|    gen/train/n_updates             | 290       |
|    gen/train/policy_gradient_loss  | -1.48e-06 |
|    gen/train/value_loss            | 0.0175    |
-------------------------------

round:  24%|██▍       | 59/244 [01:32<04:29,  1.46s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 148       |
|    gen/rollout/ep_rew_wrapped_mean | 0.113     |
|    gen/time/fps                    | 2206      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 122880    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000534 |
|    gen/train/explained_variance    | -1.19     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00287   |
|    gen/train/n_updates             | 295       |
|    gen/train/policy_gradient_loss  | -1.07e-06 |
|    gen/train/value_loss            | 0.032     |
-------------------------------

round:  25%|██▍       | 60/244 [01:34<04:48,  1.57s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 151          |
|    gen/rollout/ep_rew_wrapped_mean | -0.161       |
|    gen/time/fps                    | 2313         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 124928       |
|    gen/train/approx_kl             | 4.947651e-10 |
|    gen/train/clip_fraction         | 0            |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.000573    |
|    gen/train/explained_variance    | -1.58        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00204      |
|    gen/train/n_updates             | 300          |
|    gen/train/policy_gradient_loss  | -3.88e-06    |
|    gen/train/value_loss   

round:  25%|██▌       | 61/244 [01:35<04:39,  1.53s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 151           |
|    gen/rollout/ep_rew_wrapped_mean | 0.0654        |
|    gen/time/fps                    | 2297          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 126976        |
|    gen/train/approx_kl             | 8.1490725e-10 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.000544     |
|    gen/train/explained_variance    | 0.185         |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00142       |
|    gen/train/n_updates             | 305           |
|    gen/train/policy_gradient_loss  | -9.29e-06     |
|    gen/t

round:  25%|██▌       | 62/244 [01:37<04:31,  1.49s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 150       |
|    gen/rollout/ep_rew_wrapped_mean | 0.00539   |
|    gen/time/fps                    | 2141      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 129024    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000484 |
|    gen/train/explained_variance    | -0.756    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00199   |
|    gen/train/n_updates             | 310       |
|    gen/train/policy_gradient_loss  | -3.56e-06 |
|    gen/train/value_loss            | 0.0231    |
-------------------------------

round:  26%|██▌       | 63/244 [01:38<04:30,  1.50s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 152       |
|    gen/rollout/ep_rew_wrapped_mean | -0.133    |
|    gen/time/fps                    | 2314      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 131072    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.0005   |
|    gen/train/explained_variance    | 0.0186    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.0021    |
|    gen/train/n_updates             | 315       |
|    gen/train/policy_gradient_loss  | -4.31e-07 |
|    gen/train/value_loss            | 0.0215    |
-------------------------------

round:  26%|██▌       | 64/244 [01:40<04:24,  1.47s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 151       |
|    gen/rollout/ep_rew_wrapped_mean | 0.139     |
|    gen/time/fps                    | 2309      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 133120    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000491 |
|    gen/train/explained_variance    | -0.0521   |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00148   |
|    gen/train/n_updates             | 320       |
|    gen/train/policy_gradient_loss  | -2.96e-06 |
|    gen/train/value_loss            | 0.0173    |
-------------------------------

round:  27%|██▋       | 65/244 [01:41<04:19,  1.45s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | 0.0239    |
|    gen/time/fps                    | 2198      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 135168    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000492 |
|    gen/train/explained_variance    | -0.511    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00115   |
|    gen/train/n_updates             | 325       |
|    gen/train/policy_gradient_loss  | -4.89e-06 |
|    gen/train/value_loss            | 0.0242    |
-------------------------------

round:  27%|██▋       | 66/244 [01:43<04:38,  1.57s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | -0.00175  |
|    gen/time/fps                    | 2240      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 137216    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000506 |
|    gen/train/explained_variance    | 0.105     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00234   |
|    gen/train/n_updates             | 330       |
|    gen/train/policy_gradient_loss  | -2e-06    |
|    gen/train/value_loss            | 0.0204    |
-------------------------------

round:  27%|██▋       | 67/244 [01:44<04:36,  1.56s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 153           |
|    gen/rollout/ep_rew_wrapped_mean | -0.00894      |
|    gen/time/fps                    | 2309          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 139264        |
|    gen/train/approx_kl             | 2.6193447e-10 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.000545     |
|    gen/train/explained_variance    | -0.832        |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00153       |
|    gen/train/n_updates             | 335           |
|    gen/train/policy_gradient_loss  | -4.71e-06     |
|    gen/t

round:  28%|██▊       | 68/244 [01:46<04:26,  1.52s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 150           |
|    gen/rollout/ep_rew_wrapped_mean | -0.0954       |
|    gen/time/fps                    | 2164          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 141312        |
|    gen/train/approx_kl             | 1.1641532e-10 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00054      |
|    gen/train/explained_variance    | -3.51         |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.000837      |
|    gen/train/n_updates             | 340           |
|    gen/train/policy_gradient_loss  | -6.16e-06     |
|    gen/t

round:  28%|██▊       | 69/244 [01:47<04:22,  1.50s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 150       |
|    gen/rollout/ep_rew_wrapped_mean | -0.0682   |
|    gen/time/fps                    | 2313      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 143360    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000517 |
|    gen/train/explained_variance    | -0.0208   |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00115   |
|    gen/train/n_updates             | 345       |
|    gen/train/policy_gradient_loss  | -1.42e-06 |
|    gen/train/value_loss            | 0.0187    |
-------------------------------

round:  29%|██▊       | 70/244 [01:49<04:16,  1.48s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 151       |
|    gen/rollout/ep_rew_wrapped_mean | 0.0474    |
|    gen/time/fps                    | 2310      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 145408    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000566 |
|    gen/train/explained_variance    | -0.802    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00083   |
|    gen/train/n_updates             | 350       |
|    gen/train/policy_gradient_loss  | -2.84e-06 |
|    gen/train/value_loss            | 0.0107    |
-------------------------------

round:  29%|██▉       | 71/244 [01:50<04:12,  1.46s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | -0.0529   |
|    gen/time/fps                    | 2172      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 147456    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000573 |
|    gen/train/explained_variance    | -1.41     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.000998  |
|    gen/train/n_updates             | 355       |
|    gen/train/policy_gradient_loss  | -2.54e-06 |
|    gen/train/value_loss            | 0.0094    |
-------------------------------

round:  30%|██▉       | 72/244 [01:52<04:36,  1.61s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 151          |
|    gen/rollout/ep_rew_wrapped_mean | 0.0122       |
|    gen/time/fps                    | 2274         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 149504       |
|    gen/train/approx_kl             | 3.812602e-09 |
|    gen/train/clip_fraction         | 0            |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.000625    |
|    gen/train/explained_variance    | -0.222       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00246      |
|    gen/train/n_updates             | 360          |
|    gen/train/policy_gradient_loss  | -3.77e-06    |
|    gen/train/value_loss   

round:  30%|██▉       | 73/244 [01:54<04:26,  1.56s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 145           |
|    gen/rollout/ep_rew_wrapped_mean | 0.0241        |
|    gen/time/fps                    | 2249          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 151552        |
|    gen/train/approx_kl             | 1.0884833e-08 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.000516     |
|    gen/train/explained_variance    | -0.174        |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00197       |
|    gen/train/n_updates             | 365           |
|    gen/train/policy_gradient_loss  | -3.21e-05     |
|    gen/t

round:  30%|███       | 74/244 [01:55<04:26,  1.57s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 152          |
|    gen/rollout/ep_rew_wrapped_mean | 0.00979      |
|    gen/time/fps                    | 2106         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 153600       |
|    gen/train/approx_kl             | 1.193257e-09 |
|    gen/train/clip_fraction         | 0            |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.000444    |
|    gen/train/explained_variance    | -1.03        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00241      |
|    gen/train/n_updates             | 370          |
|    gen/train/policy_gradient_loss  | -4.95e-06    |
|    gen/train/value_loss   

round:  31%|███       | 75/244 [01:57<04:23,  1.56s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24.1          |
|    gen/rollout/ep_rew_mean         | 153           |
|    gen/rollout/ep_rew_wrapped_mean | -0.0984       |
|    gen/time/fps                    | 2252          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 155648        |
|    gen/train/approx_kl             | 0.00074589276 |
|    gen/train/clip_fraction         | 0.00889       |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00865      |
|    gen/train/explained_variance    | 0.353         |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00659       |
|    gen/train/n_updates             | 375           |
|    gen/train/policy_gradient_loss  | -0.000662     |
|    gen/t

round:  31%|███       | 76/244 [01:58<04:16,  1.53s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 24         |
|    gen/rollout/ep_rew_mean         | 152        |
|    gen/rollout/ep_rew_wrapped_mean | -0.619     |
|    gen/time/fps                    | 2308       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 157696     |
|    gen/train/approx_kl             | 0.01939363 |
|    gen/train/clip_fraction         | 0.0262     |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.00223   |
|    gen/train/explained_variance    | -0.163     |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | 0.0344     |
|    gen/train/n_updates             | 380        |
|    gen/train/policy_gradient_loss  | -0.003     |
|    gen/train/value_loss            | 0.379      |
------------

round:  32%|███▏      | 77/244 [02:00<04:10,  1.50s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 151          |
|    gen/rollout/ep_rew_wrapped_mean | -0.143       |
|    gen/time/fps                    | 2154         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 159744       |
|    gen/train/approx_kl             | 0.0006898261 |
|    gen/train/clip_fraction         | 0.000391     |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.000435    |
|    gen/train/explained_variance    | -0.489       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00164      |
|    gen/train/n_updates             | 385          |
|    gen/train/policy_gradient_loss  | -0.000352    |
|    gen/train/value_loss   

round:  32%|███▏      | 78/244 [02:01<04:28,  1.62s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 150       |
|    gen/rollout/ep_rew_wrapped_mean | 0.021     |
|    gen/time/fps                    | 2275      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 161792    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000293 |
|    gen/train/explained_variance    | 0.163     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00336   |
|    gen/train/n_updates             | 390       |
|    gen/train/policy_gradient_loss  | -3.5e-08  |
|    gen/train/value_loss            | 0.0261    |
-------------------------------

round:  32%|███▏      | 79/244 [02:03<04:19,  1.57s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 151       |
|    gen/rollout/ep_rew_wrapped_mean | 0.096     |
|    gen/time/fps                    | 2306      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 163840    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000293 |
|    gen/train/explained_variance    | -0.632    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00216   |
|    gen/train/n_updates             | 395       |
|    gen/train/policy_gradient_loss  | -3.28e-07 |
|    gen/train/value_loss            | 0.023     |
-------------------------------

round:  33%|███▎      | 80/244 [02:04<04:11,  1.53s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 150       |
|    gen/rollout/ep_rew_wrapped_mean | -0.0652   |
|    gen/time/fps                    | 2159      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 165888    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000281 |
|    gen/train/explained_variance    | 0.0633    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00133   |
|    gen/train/n_updates             | 400       |
|    gen/train/policy_gradient_loss  | -2.19e-07 |
|    gen/train/value_loss            | 0.0169    |
-------------------------------

round:  33%|███▎      | 81/244 [02:06<04:08,  1.52s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 146       |
|    gen/rollout/ep_rew_wrapped_mean | 0.0728    |
|    gen/time/fps                    | 2300      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 167936    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000289 |
|    gen/train/explained_variance    | -0.25     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00186   |
|    gen/train/n_updates             | 405       |
|    gen/train/policy_gradient_loss  | -1.63e-06 |
|    gen/train/value_loss            | 0.0215    |
-------------------------------

round:  34%|███▎      | 82/244 [02:07<04:02,  1.50s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 152       |
|    gen/rollout/ep_rew_wrapped_mean | -0.0852   |
|    gen/time/fps                    | 2293      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 169984    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000292 |
|    gen/train/explained_variance    | -1.75     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00164   |
|    gen/train/n_updates             | 410       |
|    gen/train/policy_gradient_loss  | -1.93e-06 |
|    gen/train/value_loss            | 0.0185    |
-------------------------------

round:  34%|███▍      | 83/244 [02:09<04:02,  1.51s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 150       |
|    gen/rollout/ep_rew_wrapped_mean | 0.182     |
|    gen/time/fps                    | 2282      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 172032    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000282 |
|    gen/train/explained_variance    | -1.75     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00101   |
|    gen/train/n_updates             | 415       |
|    gen/train/policy_gradient_loss  | -1.43e-06 |
|    gen/train/value_loss            | 0.0221    |
-------------------------------

round:  34%|███▍      | 84/244 [02:11<04:18,  1.62s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 148       |
|    gen/rollout/ep_rew_wrapped_mean | -0.0613   |
|    gen/time/fps                    | 2287      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 174080    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000288 |
|    gen/train/explained_variance    | 0.000291  |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00102   |
|    gen/train/n_updates             | 420       |
|    gen/train/policy_gradient_loss  | -4.98e-07 |
|    gen/train/value_loss            | 0.0125    |
-------------------------------

round:  35%|███▍      | 85/244 [02:12<04:12,  1.59s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 151       |
|    gen/rollout/ep_rew_wrapped_mean | 0.0265    |
|    gen/time/fps                    | 2305      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 176128    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.00028  |
|    gen/train/explained_variance    | -0.59     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00125   |
|    gen/train/n_updates             | 425       |
|    gen/train/policy_gradient_loss  | -2.98e-07 |
|    gen/train/value_loss            | 0.012     |
-------------------------------

round:  35%|███▌      | 86/244 [02:14<04:03,  1.54s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 148       |
|    gen/rollout/ep_rew_wrapped_mean | -0.0726   |
|    gen/time/fps                    | 1972      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 1         |
|    gen/time/total_timesteps        | 178176    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000294 |
|    gen/train/explained_variance    | -0.568    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.000826  |
|    gen/train/n_updates             | 430       |
|    gen/train/policy_gradient_loss  | -6.93e-07 |
|    gen/train/value_loss            | 0.0119    |
-------------------------------

round:  36%|███▌      | 87/244 [02:15<04:10,  1.59s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 152           |
|    gen/rollout/ep_rew_wrapped_mean | 0.0897        |
|    gen/time/fps                    | 2301          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 180224        |
|    gen/train/approx_kl             | 1.1350494e-09 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.000377     |
|    gen/train/explained_variance    | 0.102         |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00169       |
|    gen/train/n_updates             | 435           |
|    gen/train/policy_gradient_loss  | -4.83e-06     |
|    gen/t

round:  36%|███▌      | 88/244 [02:17<04:03,  1.56s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24          |
|    gen/rollout/ep_rew_mean         | 150         |
|    gen/rollout/ep_rew_wrapped_mean | -0.153      |
|    gen/time/fps                    | 2306        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 182272      |
|    gen/train/approx_kl             | 0.000912284 |
|    gen/train/clip_fraction         | 0.000391    |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.00035    |
|    gen/train/explained_variance    | -0.921      |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.00237     |
|    gen/train/n_updates             | 440         |
|    gen/train/policy_gradient_loss  | -0.000281   |
|    gen/train/value_loss            | 0.0382 

round:  36%|███▋      | 89/244 [02:18<04:00,  1.55s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24.3          |
|    gen/rollout/ep_rew_mean         | 151           |
|    gen/rollout/ep_rew_wrapped_mean | -0.0475       |
|    gen/time/fps                    | 2318          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 184320        |
|    gen/train/approx_kl             | 0.00074592314 |
|    gen/train/clip_fraction         | 0.00254       |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00252      |
|    gen/train/explained_variance    | 0.132         |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | -0.00233      |
|    gen/train/n_updates             | 445           |
|    gen/train/policy_gradient_loss  | -0.000254     |
|    gen/t

round:  37%|███▋      | 90/244 [02:20<04:13,  1.64s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 24.1       |
|    gen/rollout/ep_rew_mean         | 153        |
|    gen/rollout/ep_rew_wrapped_mean | -2.06      |
|    gen/time/fps                    | 2262       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 186368     |
|    gen/train/approx_kl             | 0.02820564 |
|    gen/train/clip_fraction         | 0.0516     |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.00728   |
|    gen/train/explained_variance    | -0.105     |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | 0.175      |
|    gen/train/n_updates             | 450        |
|    gen/train/policy_gradient_loss  | -0.0045    |
|    gen/train/value_loss            | 0.837      |
------------

round:  37%|███▋      | 91/244 [02:22<04:06,  1.61s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 150           |
|    gen/rollout/ep_rew_wrapped_mean | -0.307        |
|    gen/time/fps                    | 2311          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 188416        |
|    gen/train/approx_kl             | 0.00042505015 |
|    gen/train/clip_fraction         | 0.000488      |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00031      |
|    gen/train/explained_variance    | -0.91         |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.0248        |
|    gen/train/n_updates             | 455           |
|    gen/train/policy_gradient_loss  | -0.000706     |
|    gen/t

round:  38%|███▊      | 92/244 [02:23<03:56,  1.55s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 153       |
|    gen/rollout/ep_rew_wrapped_mean | 0.00991   |
|    gen/time/fps                    | 2299      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 190464    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000166 |
|    gen/train/explained_variance    | -0.665    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00135   |
|    gen/train/n_updates             | 460       |
|    gen/train/policy_gradient_loss  | -1.04e-06 |
|    gen/train/value_loss            | 0.0173    |
-------------------------------

round:  38%|███▊      | 93/244 [02:25<03:48,  1.52s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_len_mean         | 24       |
|    gen/rollout/ep_rew_mean         | 150      |
|    gen/rollout/ep_rew_wrapped_mean | -0.0845  |
|    gen/time/fps                    | 2209     |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 0        |
|    gen/time/total_timesteps        | 192512   |
|    gen/train/approx_kl             | 0.0      |
|    gen/train/clip_fraction         | 0        |
|    gen/train/clip_range            | 0.1      |
|    gen/train/entropy_loss          | -0.00016 |
|    gen/train/explained_variance    | -1.37    |
|    gen/train/learning_rate         | 0.0005   |
|    gen/train/loss                  | 0.00223  |
|    gen/train/n_updates             | 465      |
|    gen/train/policy_gradient_loss  | -4e-07   |
|    gen/train/value_loss            | 0.0188   |
-------------------------------------------------


round:  39%|███▊      | 94/244 [02:26<03:45,  1.51s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 152       |
|    gen/rollout/ep_rew_wrapped_mean | -0.108    |
|    gen/time/fps                    | 2317      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 194560    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000156 |
|    gen/train/explained_variance    | -0.711    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00176   |
|    gen/train/n_updates             | 470       |
|    gen/train/policy_gradient_loss  | -8.25e-07 |
|    gen/train/value_loss            | 0.0171    |
-------------------------------

round:  39%|███▉      | 95/244 [02:28<03:42,  1.49s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 151       |
|    gen/rollout/ep_rew_wrapped_mean | 0.0937    |
|    gen/time/fps                    | 2297      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 196608    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000161 |
|    gen/train/explained_variance    | -1.26     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00164   |
|    gen/train/n_updates             | 475       |
|    gen/train/policy_gradient_loss  | -1.26e-06 |
|    gen/train/value_loss            | 0.0188    |
-------------------------------

round:  39%|███▉      | 96/244 [02:29<04:01,  1.63s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 150       |
|    gen/rollout/ep_rew_wrapped_mean | -0.126    |
|    gen/time/fps                    | 2285      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 198656    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000167 |
|    gen/train/explained_variance    | 0.114     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00128   |
|    gen/train/n_updates             | 480       |
|    gen/train/policy_gradient_loss  | -6.43e-07 |
|    gen/train/value_loss            | 0.0129    |
-------------------------------

round:  40%|███▉      | 97/244 [02:31<03:51,  1.58s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | 0.0206    |
|    gen/time/fps                    | 2310      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 200704    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000151 |
|    gen/train/explained_variance    | -0.0602   |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00147   |
|    gen/train/n_updates             | 485       |
|    gen/train/policy_gradient_loss  | -6.33e-07 |
|    gen/train/value_loss            | 0.0151    |
-------------------------------

round:  40%|████      | 98/244 [02:32<03:47,  1.56s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 150       |
|    gen/rollout/ep_rew_wrapped_mean | 0.192     |
|    gen/time/fps                    | 2263      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 202752    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000146 |
|    gen/train/explained_variance    | -0.362    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00308   |
|    gen/train/n_updates             | 490       |
|    gen/train/policy_gradient_loss  | -2.98e-07 |
|    gen/train/value_loss            | 0.0255    |
-------------------------------

round:  41%|████      | 99/244 [02:34<03:41,  1.52s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | -0.159    |
|    gen/time/fps                    | 2314      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 204800    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000163 |
|    gen/train/explained_variance    | 0.0786    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00215   |
|    gen/train/n_updates             | 495       |
|    gen/train/policy_gradient_loss  | -2.13e-07 |
|    gen/train/value_loss            | 0.0263    |
-------------------------------

round:  41%|████      | 100/244 [02:35<03:40,  1.53s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 151       |
|    gen/rollout/ep_rew_wrapped_mean | 0.119     |
|    gen/time/fps                    | 2289      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 206848    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000152 |
|    gen/train/explained_variance    | -0.387    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00281   |
|    gen/train/n_updates             | 500       |
|    gen/train/policy_gradient_loss  | -1.37e-06 |
|    gen/train/value_loss            | 0.0309    |
-------------------------------

round:  41%|████▏     | 101/244 [02:37<03:34,  1.50s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 148       |
|    gen/rollout/ep_rew_wrapped_mean | 0.0408    |
|    gen/time/fps                    | 2311      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 208896    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000151 |
|    gen/train/explained_variance    | -1.77     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00249   |
|    gen/train/n_updates             | 505       |
|    gen/train/policy_gradient_loss  | -1e-06    |
|    gen/train/value_loss            | 0.03      |
-------------------------------

round:  42%|████▏     | 102/244 [02:39<03:51,  1.63s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 150       |
|    gen/rollout/ep_rew_wrapped_mean | -0.141    |
|    gen/time/fps                    | 2253      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 210944    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000155 |
|    gen/train/explained_variance    | -0.857    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00209   |
|    gen/train/n_updates             | 510       |
|    gen/train/policy_gradient_loss  | -8.26e-07 |
|    gen/train/value_loss            | 0.024     |
-------------------------------

round:  42%|████▏     | 103/244 [02:40<03:42,  1.58s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 147       |
|    gen/rollout/ep_rew_wrapped_mean | 0.174     |
|    gen/time/fps                    | 2317      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 212992    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000142 |
|    gen/train/explained_variance    | 0.109     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00165   |
|    gen/train/n_updates             | 515       |
|    gen/train/policy_gradient_loss  | -5.93e-07 |
|    gen/train/value_loss            | 0.0234    |
-------------------------------

round:  43%|████▎     | 104/244 [02:42<03:38,  1.56s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_len_mean         | 24       |
|    gen/rollout/ep_rew_mean         | 148      |
|    gen/rollout/ep_rew_wrapped_mean | -0.0567  |
|    gen/time/fps                    | 2296     |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 0        |
|    gen/time/total_timesteps        | 215040   |
|    gen/train/approx_kl             | 0.0      |
|    gen/train/clip_fraction         | 0        |
|    gen/train/clip_range            | 0.1      |
|    gen/train/entropy_loss          | -0.00014 |
|    gen/train/explained_variance    | -1.8     |
|    gen/train/learning_rate         | 0.0005   |
|    gen/train/loss                  | 0.000645 |
|    gen/train/n_updates             | 520      |
|    gen/train/policy_gradient_loss  | -7.6e-07 |
|    gen/train/value_loss            | 0.0122   |
-------------------------------------------------


round:  43%|████▎     | 105/244 [02:43<03:31,  1.52s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 152       |
|    gen/rollout/ep_rew_wrapped_mean | -0.00663  |
|    gen/time/fps                    | 2261      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 217088    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000153 |
|    gen/train/explained_variance    | -0.0183   |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.000896  |
|    gen/train/n_updates             | 525       |
|    gen/train/policy_gradient_loss  | -6.74e-08 |
|    gen/train/value_loss            | 0.011     |
-------------------------------

round:  43%|████▎     | 106/244 [02:45<03:31,  1.53s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 152       |
|    gen/rollout/ep_rew_wrapped_mean | 0.198     |
|    gen/time/fps                    | 2295      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 219136    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000137 |
|    gen/train/explained_variance    | -0.124    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.000566  |
|    gen/train/n_updates             | 530       |
|    gen/train/policy_gradient_loss  | -1.06e-06 |
|    gen/train/value_loss            | 0.0105    |
-------------------------------

round:  44%|████▍     | 107/244 [02:46<03:25,  1.50s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 153       |
|    gen/rollout/ep_rew_wrapped_mean | -0.0459   |
|    gen/time/fps                    | 2314      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 221184    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000137 |
|    gen/train/explained_variance    | -2.35     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.000578  |
|    gen/train/n_updates             | 535       |
|    gen/train/policy_gradient_loss  | -1.02e-06 |
|    gen/train/value_loss            | 0.0109    |
-------------------------------

round:  44%|████▍     | 108/244 [02:48<03:43,  1.65s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_len_mean         | 24       |
|    gen/rollout/ep_rew_mean         | 153      |
|    gen/rollout/ep_rew_wrapped_mean | -0.075   |
|    gen/time/fps                    | 2312     |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 0        |
|    gen/time/total_timesteps        | 223232   |
|    gen/train/approx_kl             | 0.0      |
|    gen/train/clip_fraction         | 0        |
|    gen/train/clip_range            | 0.1      |
|    gen/train/entropy_loss          | -0.00016 |
|    gen/train/explained_variance    | -0.171   |
|    gen/train/learning_rate         | 0.0005   |
|    gen/train/loss                  | 0.00113  |
|    gen/train/n_updates             | 540      |
|    gen/train/policy_gradient_loss  | -2.2e-07 |
|    gen/train/value_loss            | 0.0132   |
-------------------------------------------------


round:  45%|████▍     | 109/244 [02:50<03:36,  1.61s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 147       |
|    gen/rollout/ep_rew_wrapped_mean | -0.0772   |
|    gen/time/fps                    | 1684      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 1         |
|    gen/time/total_timesteps        | 225280    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000151 |
|    gen/train/explained_variance    | -0.196    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00435   |
|    gen/train/n_updates             | 545       |
|    gen/train/policy_gradient_loss  | -8.25e-07 |
|    gen/train/value_loss            | 0.0387    |
-------------------------------

round:  45%|████▌     | 110/244 [02:52<03:51,  1.73s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | 0.154     |
|    gen/time/fps                    | 2174      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 227328    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000135 |
|    gen/train/explained_variance    | 0.0229    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00295   |
|    gen/train/n_updates             | 550       |
|    gen/train/policy_gradient_loss  | -2.05e-07 |
|    gen/train/value_loss            | 0.0358    |
-------------------------------

round:  45%|████▌     | 111/244 [02:53<03:42,  1.67s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 151       |
|    gen/rollout/ep_rew_wrapped_mean | 0.00807   |
|    gen/time/fps                    | 2172      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 229376    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000147 |
|    gen/train/explained_variance    | -0.217    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00166   |
|    gen/train/n_updates             | 555       |
|    gen/train/policy_gradient_loss  | -1.83e-06 |
|    gen/train/value_loss            | 0.0148    |
-------------------------------

round:  46%|████▌     | 112/244 [02:55<03:32,  1.61s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 152       |
|    gen/rollout/ep_rew_wrapped_mean | -0.0406   |
|    gen/time/fps                    | 2310      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 231424    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000164 |
|    gen/train/explained_variance    | 0.0105    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00199   |
|    gen/train/n_updates             | 560       |
|    gen/train/policy_gradient_loss  | -6.39e-07 |
|    gen/train/value_loss            | 0.0246    |
-------------------------------

round:  46%|████▋     | 113/244 [02:56<03:23,  1.56s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | 0.0868    |
|    gen/time/fps                    | 2205      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 233472    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000163 |
|    gen/train/explained_variance    | -0.279    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00142   |
|    gen/train/n_updates             | 565       |
|    gen/train/policy_gradient_loss  | -9.43e-07 |
|    gen/train/value_loss            | 0.0128    |
-------------------------------

round:  47%|████▋     | 114/244 [02:58<03:35,  1.66s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | 0.0597    |
|    gen/time/fps                    | 2300      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 235520    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000148 |
|    gen/train/explained_variance    | -0.254    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.000681  |
|    gen/train/n_updates             | 570       |
|    gen/train/policy_gradient_loss  | -6.16e-06 |
|    gen/train/value_loss            | 0.00663   |
-------------------------------

round:  47%|████▋     | 115/244 [02:59<03:24,  1.59s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 153       |
|    gen/rollout/ep_rew_wrapped_mean | 0.000847  |
|    gen/time/fps                    | 2153      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 237568    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000118 |
|    gen/train/explained_variance    | -0.0116   |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00198   |
|    gen/train/n_updates             | 575       |
|    gen/train/policy_gradient_loss  | -4.21e-07 |
|    gen/train/value_loss            | 0.0212    |
-------------------------------

round:  48%|████▊     | 116/244 [03:01<03:20,  1.56s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 151       |
|    gen/rollout/ep_rew_wrapped_mean | 0.3       |
|    gen/time/fps                    | 2320      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 239616    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000113 |
|    gen/train/explained_variance    | -0.126    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00209   |
|    gen/train/n_updates             | 580       |
|    gen/train/policy_gradient_loss  | -3.15e-07 |
|    gen/train/value_loss            | 0.0303    |
-------------------------------

round:  48%|████▊     | 117/244 [03:02<03:13,  1.52s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 150       |
|    gen/rollout/ep_rew_wrapped_mean | -0.085    |
|    gen/time/fps                    | 2200      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 241664    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000121 |
|    gen/train/explained_variance    | -0.269    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00329   |
|    gen/train/n_updates             | 585       |
|    gen/train/policy_gradient_loss  | -3.33e-08 |
|    gen/train/value_loss            | 0.0269    |
-------------------------------

round:  48%|████▊     | 118/244 [03:04<03:09,  1.51s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 151       |
|    gen/rollout/ep_rew_wrapped_mean | 0.0207    |
|    gen/time/fps                    | 2307      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 243712    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.00012  |
|    gen/train/explained_variance    | -0.297    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.000936  |
|    gen/train/n_updates             | 590       |
|    gen/train/policy_gradient_loss  | -6.36e-07 |
|    gen/train/value_loss            | 0.0145    |
-------------------------------

round:  49%|████▉     | 119/244 [03:05<03:05,  1.49s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | 0.108     |
|    gen/time/fps                    | 2191      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 245760    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000125 |
|    gen/train/explained_variance    | -0.834    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.000969  |
|    gen/train/n_updates             | 595       |
|    gen/train/policy_gradient_loss  | -9.06e-07 |
|    gen/train/value_loss            | 0.0127    |
-------------------------------

round:  49%|████▉     | 120/244 [03:07<03:19,  1.61s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 151       |
|    gen/rollout/ep_rew_wrapped_mean | -0.208    |
|    gen/time/fps                    | 2247      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 247808    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000145 |
|    gen/train/explained_variance    | -0.878    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.000783  |
|    gen/train/n_updates             | 600       |
|    gen/train/policy_gradient_loss  | -3.58e-07 |
|    gen/train/value_loss            | 0.0112    |
-------------------------------

round:  50%|████▉     | 121/244 [03:09<03:13,  1.58s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | -0.0253   |
|    gen/time/fps                    | 2157      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 249856    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000128 |
|    gen/train/explained_variance    | -0.94     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.000897  |
|    gen/train/n_updates             | 605       |
|    gen/train/policy_gradient_loss  | -2.69e-06 |
|    gen/train/value_loss            | 0.0123    |
-------------------------------

round:  50%|█████     | 122/244 [03:10<03:10,  1.56s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 147       |
|    gen/rollout/ep_rew_wrapped_mean | 0.0343    |
|    gen/time/fps                    | 2307      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 251904    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000127 |
|    gen/train/explained_variance    | -1.03     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00122   |
|    gen/train/n_updates             | 610       |
|    gen/train/policy_gradient_loss  | -1.11e-06 |
|    gen/train/value_loss            | 0.0121    |
-------------------------------

round:  50%|█████     | 123/244 [03:12<03:03,  1.52s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 149          |
|    gen/rollout/ep_rew_wrapped_mean | -0.0345      |
|    gen/time/fps                    | 2199         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 253952       |
|    gen/train/approx_kl             | 0.0017328679 |
|    gen/train/clip_fraction         | 0.000879     |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00165     |
|    gen/train/explained_variance    | -0.00681     |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.0193       |
|    gen/train/n_updates             | 615          |
|    gen/train/policy_gradient_loss  | -0.00023     |
|    gen/train/value_loss   

round:  51%|█████     | 124/244 [03:13<03:01,  1.51s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 146           |
|    gen/rollout/ep_rew_wrapped_mean | 0.105         |
|    gen/time/fps                    | 2317          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 256000        |
|    gen/train/approx_kl             | 1.8715509e-06 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00031      |
|    gen/train/explained_variance    | -0.49         |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.000627      |
|    gen/train/n_updates             | 620           |
|    gen/train/policy_gradient_loss  | -0.000219     |
|    gen/t

round:  51%|█████     | 125/244 [03:15<02:56,  1.49s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 152       |
|    gen/rollout/ep_rew_wrapped_mean | -0.108    |
|    gen/time/fps                    | 2157      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 258048    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000123 |
|    gen/train/explained_variance    | -1.58     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.000998  |
|    gen/train/n_updates             | 625       |
|    gen/train/policy_gradient_loss  | -1.74e-07 |
|    gen/train/value_loss            | 0.0147    |
-------------------------------

round:  52%|█████▏    | 126/244 [03:16<03:10,  1.61s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 151          |
|    gen/rollout/ep_rew_wrapped_mean | 0.0993       |
|    gen/time/fps                    | 2300         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 260096       |
|    gen/train/approx_kl             | 6.548362e-09 |
|    gen/train/clip_fraction         | 0            |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.000184    |
|    gen/train/explained_variance    | -3.89        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00054      |
|    gen/train/n_updates             | 630          |
|    gen/train/policy_gradient_loss  | -7.21e-06    |
|    gen/train/value_loss   

round:  52%|█████▏    | 127/244 [03:18<03:03,  1.57s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 152          |
|    gen/rollout/ep_rew_wrapped_mean | 0.123        |
|    gen/time/fps                    | 2107         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 262144       |
|    gen/train/approx_kl             | 1.717126e-08 |
|    gen/train/clip_fraction         | 0            |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.000242    |
|    gen/train/explained_variance    | -0.0973      |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.000815     |
|    gen/train/n_updates             | 635          |
|    gen/train/policy_gradient_loss  | -1.47e-05    |
|    gen/train/value_loss   

round:  52%|█████▏    | 128/244 [03:19<02:59,  1.55s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | 0.036     |
|    gen/time/fps                    | 2297      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 264192    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000145 |
|    gen/train/explained_variance    | -3.57     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00168   |
|    gen/train/n_updates             | 640       |
|    gen/train/policy_gradient_loss  | -2.07e-07 |
|    gen/train/value_loss            | 0.0164    |
-------------------------------

round:  53%|█████▎    | 129/244 [03:21<02:54,  1.52s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 153          |
|    gen/rollout/ep_rew_wrapped_mean | 0.0509       |
|    gen/time/fps                    | 2172         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 266240       |
|    gen/train/approx_kl             | 7.858034e-10 |
|    gen/train/clip_fraction         | 0            |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.000177    |
|    gen/train/explained_variance    | -0.101       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00226      |
|    gen/train/n_updates             | 645          |
|    gen/train/policy_gradient_loss  | -1.71e-06    |
|    gen/train/value_loss   

round:  53%|█████▎    | 130/244 [03:22<02:52,  1.51s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 151           |
|    gen/rollout/ep_rew_wrapped_mean | 0.0408        |
|    gen/time/fps                    | 2312          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 268288        |
|    gen/train/approx_kl             | 2.0954758e-09 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.000177     |
|    gen/train/explained_variance    | 0.124         |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00186       |
|    gen/train/n_updates             | 650           |
|    gen/train/policy_gradient_loss  | -5.56e-06     |
|    gen/t

round:  54%|█████▎    | 131/244 [03:24<03:06,  1.65s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 146       |
|    gen/rollout/ep_rew_wrapped_mean | 0.0495    |
|    gen/time/fps                    | 1112      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 1         |
|    gen/time/total_timesteps        | 270336    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000104 |
|    gen/train/explained_variance    | 0.14      |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00181   |
|    gen/train/n_updates             | 655       |
|    gen/train/policy_gradient_loss  | -4.84e-08 |
|    gen/train/value_loss            | 0.0177    |
-------------------------------

round:  54%|█████▍    | 132/244 [03:28<03:57,  2.12s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 27.4        |
|    gen/rollout/ep_rew_mean         | 147         |
|    gen/rollout/ep_rew_wrapped_mean | -0.0394     |
|    gen/time/fps                    | 2151        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 272384      |
|    gen/train/approx_kl             | 0.000893336 |
|    gen/train/clip_fraction         | 0.000586    |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.00084    |
|    gen/train/explained_variance    | -0.683      |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | -0.00441    |
|    gen/train/n_updates             | 660         |
|    gen/train/policy_gradient_loss  | -8.39e-05   |
|    gen/train/value_loss            | 0.00792

round:  55%|█████▍    | 133/244 [03:29<03:39,  1.97s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 26.2        |
|    gen/rollout/ep_rew_mean         | 147         |
|    gen/rollout/ep_rew_wrapped_mean | -10.3       |
|    gen/time/fps                    | 2020        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 1           |
|    gen/time/total_timesteps        | 274432      |
|    gen/train/approx_kl             | 0.061494805 |
|    gen/train/clip_fraction         | 0.211       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.106      |
|    gen/train/explained_variance    | 0.00916     |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.751       |
|    gen/train/n_updates             | 665         |
|    gen/train/policy_gradient_loss  | -0.0131     |
|    gen/train/value_loss            | 14.1   

round:  55%|█████▍    | 134/244 [03:31<03:38,  1.98s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 25.4         |
|    gen/rollout/ep_rew_mean         | 149          |
|    gen/rollout/ep_rew_wrapped_mean | -2.92        |
|    gen/time/fps                    | 181          |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 11           |
|    gen/time/total_timesteps        | 276480       |
|    gen/train/approx_kl             | 0.0026035125 |
|    gen/train/clip_fraction         | 0.103        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.104       |
|    gen/train/explained_variance    | 0.297        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.549        |
|    gen/train/n_updates             | 670          |
|    gen/train/policy_gradient_loss  | -0.0095      |
|    gen/train/value_loss   

round:  55%|█████▌    | 135/244 [03:44<09:18,  5.12s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 24.6       |
|    gen/rollout/ep_rew_mean         | 148        |
|    gen/rollout/ep_rew_wrapped_mean | -4.42      |
|    gen/time/fps                    | 807        |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 2          |
|    gen/time/total_timesteps        | 278528     |
|    gen/train/approx_kl             | 0.01265759 |
|    gen/train/clip_fraction         | 0.115      |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.067     |
|    gen/train/explained_variance    | 0.518      |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | 0.208      |
|    gen/train/n_updates             | 675        |
|    gen/train/policy_gradient_loss  | -0.0143    |
|    gen/train/value_loss            | 3.58       |
------------

round:  56%|█████▌    | 136/244 [03:47<08:28,  4.71s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 24.1       |
|    gen/rollout/ep_rew_mean         | 148        |
|    gen/rollout/ep_rew_wrapped_mean | -0.858     |
|    gen/time/fps                    | 2150       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 280576     |
|    gen/train/approx_kl             | 0.04408969 |
|    gen/train/clip_fraction         | 0.0575     |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.00663   |
|    gen/train/explained_variance    | -1.06      |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | 0.169      |
|    gen/train/n_updates             | 680        |
|    gen/train/policy_gradient_loss  | -0.011     |
|    gen/train/value_loss            | 1.3        |
------------

round:  56%|█████▌    | 137/244 [03:49<06:48,  3.82s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 149          |
|    gen/rollout/ep_rew_wrapped_mean | -0.439       |
|    gen/time/fps                    | 2246         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 282624       |
|    gen/train/approx_kl             | 2.591405e-07 |
|    gen/train/clip_fraction         | 0            |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00133     |
|    gen/train/explained_variance    | -0.803       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.00428      |
|    gen/train/n_updates             | 685          |
|    gen/train/policy_gradient_loss  | -0.000185    |
|    gen/train/value_loss   

round:  57%|█████▋    | 138/244 [03:52<06:22,  3.61s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 25.3        |
|    gen/rollout/ep_rew_mean         | 150         |
|    gen/rollout/ep_rew_wrapped_mean | -0.0159     |
|    gen/time/fps                    | 2184        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 284672      |
|    gen/train/approx_kl             | 0.004614603 |
|    gen/train/clip_fraction         | 0.163       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.108      |
|    gen/train/explained_variance    | 0.247       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | -0.0293     |
|    gen/train/n_updates             | 690         |
|    gen/train/policy_gradient_loss  | -0.0107     |
|    gen/train/value_loss            | 0.0249 

round:  57%|█████▋    | 139/244 [03:54<05:16,  3.02s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24.5      |
|    gen/rollout/ep_rew_mean         | 148       |
|    gen/rollout/ep_rew_wrapped_mean | -9.84     |
|    gen/time/fps                    | 2287      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 286720    |
|    gen/train/approx_kl             | 0.0394328 |
|    gen/train/clip_fraction         | 0.145     |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.103    |
|    gen/train/explained_variance    | -0.0128   |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 1.25      |
|    gen/train/n_updates             | 695       |
|    gen/train/policy_gradient_loss  | -0.0168   |
|    gen/train/value_loss            | 15.2      |
-------------------------------

round:  57%|█████▋    | 140/244 [03:55<04:26,  2.57s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.1         |
|    gen/rollout/ep_rew_mean         | 148          |
|    gen/rollout/ep_rew_wrapped_mean | -1.5         |
|    gen/time/fps                    | 2264         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 288768       |
|    gen/train/approx_kl             | 0.0150158275 |
|    gen/train/clip_fraction         | 0.0362       |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.0249      |
|    gen/train/explained_variance    | -0.226       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.105        |
|    gen/train/n_updates             | 700          |
|    gen/train/policy_gradient_loss  | -0.00687     |
|    gen/train/value_loss   

round:  58%|█████▊    | 141/244 [03:57<03:50,  2.23s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 24         |
|    gen/rollout/ep_rew_mean         | 147        |
|    gen/rollout/ep_rew_wrapped_mean | -0.706     |
|    gen/time/fps                    | 2303       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 290816     |
|    gen/train/approx_kl             | 0.00674335 |
|    gen/train/clip_fraction         | 0.0173     |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.00581   |
|    gen/train/explained_variance    | 0.265      |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | 0.0266     |
|    gen/train/n_updates             | 705        |
|    gen/train/policy_gradient_loss  | -0.00361   |
|    gen/train/value_loss            | 0.351      |
------------

round:  58%|█████▊    | 142/244 [03:58<03:25,  2.02s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 152           |
|    gen/rollout/ep_rew_wrapped_mean | 0.0427        |
|    gen/time/fps                    | 2255          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 292864        |
|    gen/train/approx_kl             | 0.00010911113 |
|    gen/train/clip_fraction         | 0.000391      |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00216      |
|    gen/train/explained_variance    | -1.71         |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.0264        |
|    gen/train/n_updates             | 710           |
|    gen/train/policy_gradient_loss  | -0.00045      |
|    gen/t

round:  59%|█████▊    | 143/244 [04:00<03:08,  1.86s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 152           |
|    gen/rollout/ep_rew_wrapped_mean | 0.0529        |
|    gen/time/fps                    | 2264          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 294912        |
|    gen/train/approx_kl             | 1.8044375e-09 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00145      |
|    gen/train/explained_variance    | -0.0668       |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00662       |
|    gen/train/n_updates             | 715           |
|    gen/train/policy_gradient_loss  | -5.11e-06     |
|    gen/t

round:  59%|█████▉    | 144/244 [04:02<03:10,  1.91s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 149           |
|    gen/rollout/ep_rew_wrapped_mean | -0.224        |
|    gen/time/fps                    | 2217          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 296960        |
|    gen/train/approx_kl             | 7.5053365e-05 |
|    gen/train/clip_fraction         | 0.000977      |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00122      |
|    gen/train/explained_variance    | -0.203        |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.0159        |
|    gen/train/n_updates             | 720           |
|    gen/train/policy_gradient_loss  | -0.000156     |
|    gen/t

round:  59%|█████▉    | 145/244 [04:03<02:57,  1.79s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 147           |
|    gen/rollout/ep_rew_wrapped_mean | 0.0974        |
|    gen/time/fps                    | 2268          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 299008        |
|    gen/train/approx_kl             | 4.2200554e-09 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00141      |
|    gen/train/explained_variance    | -0.0739       |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00144       |
|    gen/train/n_updates             | 725           |
|    gen/train/policy_gradient_loss  | -6.12e-06     |
|    gen/t

round:  60%|█████▉    | 146/244 [04:05<02:47,  1.71s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24          |
|    gen/rollout/ep_rew_mean         | 149         |
|    gen/rollout/ep_rew_wrapped_mean | -0.18       |
|    gen/time/fps                    | 2247        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 301056      |
|    gen/train/approx_kl             | 0.000514428 |
|    gen/train/clip_fraction         | 0.000391    |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.000927   |
|    gen/train/explained_variance    | 0.00975     |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.0876      |
|    gen/train/n_updates             | 730         |
|    gen/train/policy_gradient_loss  | -0.000397   |
|    gen/train/value_loss            | 0.665  

round:  60%|██████    | 147/244 [04:07<02:43,  1.68s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 151       |
|    gen/rollout/ep_rew_wrapped_mean | 0.152     |
|    gen/time/fps                    | 2114      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 303104    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000722 |
|    gen/train/explained_variance    | -1.13     |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00123   |
|    gen/train/n_updates             | 735       |
|    gen/train/policy_gradient_loss  | -2.88e-06 |
|    gen/train/value_loss            | 0.0269    |
-------------------------------

round:  61%|██████    | 148/244 [04:08<02:40,  1.67s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 150       |
|    gen/rollout/ep_rew_wrapped_mean | -0.0157   |
|    gen/time/fps                    | 2257      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 305152    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000717 |
|    gen/train/explained_variance    | 0.0319    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.002     |
|    gen/train/n_updates             | 740       |
|    gen/train/policy_gradient_loss  | -2.08e-06 |
|    gen/train/value_loss            | 0.0254    |
-------------------------------

round:  61%|██████    | 149/244 [04:10<02:34,  1.62s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24        |
|    gen/rollout/ep_rew_mean         | 149       |
|    gen/rollout/ep_rew_wrapped_mean | 0.102     |
|    gen/time/fps                    | 2103      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 307200    |
|    gen/train/approx_kl             | 0.0       |
|    gen/train/clip_fraction         | 0         |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.000739 |
|    gen/train/explained_variance    | -0.118    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.00159   |
|    gen/train/n_updates             | 745       |
|    gen/train/policy_gradient_loss  | -5.11e-06 |
|    gen/train/value_loss            | 0.0178    |
-------------------------------

round:  61%|██████▏   | 150/244 [04:12<02:42,  1.72s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 150           |
|    gen/rollout/ep_rew_wrapped_mean | -0.0885       |
|    gen/time/fps                    | 2260          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 309248        |
|    gen/train/approx_kl             | 0.00050833204 |
|    gen/train/clip_fraction         | 0.000391      |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.000885     |
|    gen/train/explained_variance    | 0.03          |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00263       |
|    gen/train/n_updates             | 750           |
|    gen/train/policy_gradient_loss  | -0.000316     |
|    gen/t

round:  62%|██████▏   | 151/244 [04:13<02:33,  1.66s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.4         |
|    gen/rollout/ep_rew_mean         | 146          |
|    gen/rollout/ep_rew_wrapped_mean | 0.115        |
|    gen/time/fps                    | 2097         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 311296       |
|    gen/train/approx_kl             | 0.0034006154 |
|    gen/train/clip_fraction         | 0.0362       |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.0276      |
|    gen/train/explained_variance    | -0.174       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | -0.00151     |
|    gen/train/n_updates             | 755          |
|    gen/train/policy_gradient_loss  | -0.00335     |
|    gen/train/value_loss   

round:  62%|██████▏   | 152/244 [04:15<02:29,  1.62s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24.2        |
|    gen/rollout/ep_rew_mean         | 150         |
|    gen/rollout/ep_rew_wrapped_mean | -2.49       |
|    gen/time/fps                    | 2242        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 313344      |
|    gen/train/approx_kl             | 0.027557317 |
|    gen/train/clip_fraction         | 0.0551      |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.0356     |
|    gen/train/explained_variance    | -0.0138     |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.12        |
|    gen/train/n_updates             | 760         |
|    gen/train/policy_gradient_loss  | -0.0051     |
|    gen/train/value_loss            | 2.37   

round:  63%|██████▎   | 153/244 [04:16<02:23,  1.57s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 38.6      |
|    gen/rollout/ep_rew_mean         | 120       |
|    gen/rollout/ep_rew_wrapped_mean | -0.702    |
|    gen/time/fps                    | 2190      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 315392    |
|    gen/train/approx_kl             | 1.4213376 |
|    gen/train/clip_fraction         | 0.2       |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.0575   |
|    gen/train/explained_variance    | -0.129    |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 0.035     |
|    gen/train/n_updates             | 765       |
|    gen/train/policy_gradient_loss  | -0.0187   |
|    gen/train/value_loss            | 0.565     |
-------------------------------

round:  63%|██████▎   | 154/244 [04:18<02:19,  1.55s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 52.4         |
|    gen/rollout/ep_rew_mean         | 96.5         |
|    gen/rollout/ep_rew_wrapped_mean | -182         |
|    gen/time/fps                    | 2334         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 317440       |
|    gen/train/approx_kl             | 0.0049793804 |
|    gen/train/clip_fraction         | 0.239        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.405       |
|    gen/train/explained_variance    | -0.00774     |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 584          |
|    gen/train/n_updates             | 770          |
|    gen/train/policy_gradient_loss  | -0.0121      |
|    gen/train/value_loss   

round:  64%|██████▎   | 155/244 [04:19<02:17,  1.54s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 66.4         |
|    gen/rollout/ep_rew_mean         | 69.5         |
|    gen/rollout/ep_rew_wrapped_mean | -339         |
|    gen/time/fps                    | 2356         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 319488       |
|    gen/train/approx_kl             | 0.0036487486 |
|    gen/train/clip_fraction         | 0.211        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.407       |
|    gen/train/explained_variance    | 0.0245       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 390          |
|    gen/train/n_updates             | 775          |
|    gen/train/policy_gradient_loss  | -0.00897     |
|    gen/train/value_loss   

round:  64%|██████▍   | 156/244 [04:21<02:24,  1.64s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 72.8         |
|    gen/rollout/ep_rew_mean         | 59.4         |
|    gen/rollout/ep_rew_wrapped_mean | -470         |
|    gen/time/fps                    | 2290         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 321536       |
|    gen/train/approx_kl             | 0.0034831294 |
|    gen/train/clip_fraction         | 0.212        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.417       |
|    gen/train/explained_variance    | 0.0246       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 187          |
|    gen/train/n_updates             | 780          |
|    gen/train/policy_gradient_loss  | -0.00834     |
|    gen/train/value_loss   

round:  64%|██████▍   | 157/244 [04:23<02:20,  1.62s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 66.4         |
|    gen/rollout/ep_rew_mean         | 71.7         |
|    gen/rollout/ep_rew_wrapped_mean | -497         |
|    gen/time/fps                    | 2352         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 323584       |
|    gen/train/approx_kl             | 0.0026106108 |
|    gen/train/clip_fraction         | 0.17         |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.415       |
|    gen/train/explained_variance    | 0.0199       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 84.1         |
|    gen/train/n_updates             | 785          |
|    gen/train/policy_gradient_loss  | -0.00544     |
|    gen/train/value_loss   

round:  65%|██████▍   | 158/244 [04:24<02:14,  1.56s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 61           |
|    gen/rollout/ep_rew_mean         | 82.6         |
|    gen/rollout/ep_rew_wrapped_mean | -372         |
|    gen/time/fps                    | 2246         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 325632       |
|    gen/train/approx_kl             | 0.0032063823 |
|    gen/train/clip_fraction         | 0.213        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.433       |
|    gen/train/explained_variance    | 0.0252       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 57.8         |
|    gen/train/n_updates             | 790          |
|    gen/train/policy_gradient_loss  | -0.00841     |
|    gen/train/value_loss   

round:  65%|██████▌   | 159/244 [04:26<02:11,  1.55s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 57.4         |
|    gen/rollout/ep_rew_mean         | 88.9         |
|    gen/rollout/ep_rew_wrapped_mean | -295         |
|    gen/time/fps                    | 2300         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 327680       |
|    gen/train/approx_kl             | 0.0036641126 |
|    gen/train/clip_fraction         | 0.217        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.449       |
|    gen/train/explained_variance    | 0.0393       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 46.4         |
|    gen/train/n_updates             | 795          |
|    gen/train/policy_gradient_loss  | -0.00645     |
|    gen/train/value_loss   

round:  66%|██████▌   | 160/244 [04:27<02:10,  1.55s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 54.3        |
|    gen/rollout/ep_rew_mean         | 96.6        |
|    gen/rollout/ep_rew_wrapped_mean | -256        |
|    gen/time/fps                    | 2310        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 329728      |
|    gen/train/approx_kl             | 0.005854524 |
|    gen/train/clip_fraction         | 0.239       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.455      |
|    gen/train/explained_variance    | 0.0918      |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 39.8        |
|    gen/train/n_updates             | 800         |
|    gen/train/policy_gradient_loss  | -0.000921   |
|    gen/train/value_loss            | 335    

round:  66%|██████▌   | 161/244 [04:29<02:06,  1.53s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 51.2         |
|    gen/rollout/ep_rew_mean         | 101          |
|    gen/rollout/ep_rew_wrapped_mean | -227         |
|    gen/time/fps                    | 2343         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 331776       |
|    gen/train/approx_kl             | 0.0063754367 |
|    gen/train/clip_fraction         | 0.253        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.476       |
|    gen/train/explained_variance    | 0.085        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 44.8         |
|    gen/train/n_updates             | 805          |
|    gen/train/policy_gradient_loss  | -0.0074      |
|    gen/train/value_loss   

round:  66%|██████▋   | 162/244 [04:31<02:16,  1.67s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 48.3         |
|    gen/rollout/ep_rew_mean         | 109          |
|    gen/rollout/ep_rew_wrapped_mean | -205         |
|    gen/time/fps                    | 2120         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 333824       |
|    gen/train/approx_kl             | 0.0051870756 |
|    gen/train/clip_fraction         | 0.244        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.494       |
|    gen/train/explained_variance    | 0.14         |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 37.4         |
|    gen/train/n_updates             | 810          |
|    gen/train/policy_gradient_loss  | -0.00915     |
|    gen/train/value_loss   

round:  67%|██████▋   | 163/244 [04:32<02:14,  1.66s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 45.5         |
|    gen/rollout/ep_rew_mean         | 113          |
|    gen/rollout/ep_rew_wrapped_mean | -184         |
|    gen/time/fps                    | 2155         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 335872       |
|    gen/train/approx_kl             | 0.0044893385 |
|    gen/train/clip_fraction         | 0.237        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.497       |
|    gen/train/explained_variance    | 0.165        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 29.8         |
|    gen/train/n_updates             | 815          |
|    gen/train/policy_gradient_loss  | -0.00973     |
|    gen/train/value_loss   

round:  67%|██████▋   | 164/244 [04:34<02:09,  1.62s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 43.6        |
|    gen/rollout/ep_rew_mean         | 111         |
|    gen/rollout/ep_rew_wrapped_mean | -159        |
|    gen/time/fps                    | 2038        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 1           |
|    gen/time/total_timesteps        | 337920      |
|    gen/train/approx_kl             | 0.004403235 |
|    gen/train/clip_fraction         | 0.249       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.51       |
|    gen/train/explained_variance    | 0.171       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 22          |
|    gen/train/n_updates             | 820         |
|    gen/train/policy_gradient_loss  | -0.0124     |
|    gen/train/value_loss            | 254    

round:  68%|██████▊   | 165/244 [04:36<02:14,  1.70s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 41.1        |
|    gen/rollout/ep_rew_mean         | 118         |
|    gen/rollout/ep_rew_wrapped_mean | -143        |
|    gen/time/fps                    | 2040        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 1           |
|    gen/time/total_timesteps        | 339968      |
|    gen/train/approx_kl             | 0.004163822 |
|    gen/train/clip_fraction         | 0.264       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.509      |
|    gen/train/explained_variance    | 0.181       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 23.6        |
|    gen/train/n_updates             | 825         |
|    gen/train/policy_gradient_loss  | -0.0109     |
|    gen/train/value_loss            | 245    

round:  68%|██████▊   | 166/244 [04:37<02:13,  1.71s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 39.2         |
|    gen/rollout/ep_rew_mean         | 122          |
|    gen/rollout/ep_rew_wrapped_mean | -125         |
|    gen/time/fps                    | 2266         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 342016       |
|    gen/train/approx_kl             | 0.0038713817 |
|    gen/train/clip_fraction         | 0.259        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.505       |
|    gen/train/explained_variance    | 0.188        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 24.7         |
|    gen/train/n_updates             | 830          |
|    gen/train/policy_gradient_loss  | -0.016       |
|    gen/train/value_loss   

round:  68%|██████▊   | 167/244 [04:39<02:06,  1.64s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 38.1        |
|    gen/rollout/ep_rew_mean         | 124         |
|    gen/rollout/ep_rew_wrapped_mean | -114        |
|    gen/time/fps                    | 2171        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 344064      |
|    gen/train/approx_kl             | 0.005219745 |
|    gen/train/clip_fraction         | 0.296       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.498      |
|    gen/train/explained_variance    | 0.219       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 13          |
|    gen/train/n_updates             | 835         |
|    gen/train/policy_gradient_loss  | -0.0153     |
|    gen/train/value_loss            | 187    

round:  69%|██████▉   | 168/244 [04:41<02:11,  1.73s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 36.4        |
|    gen/rollout/ep_rew_mean         | 132         |
|    gen/rollout/ep_rew_wrapped_mean | -108        |
|    gen/time/fps                    | 2257        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 346112      |
|    gen/train/approx_kl             | 0.003797035 |
|    gen/train/clip_fraction         | 0.291       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.491      |
|    gen/train/explained_variance    | 0.281       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 16.6        |
|    gen/train/n_updates             | 840         |
|    gen/train/policy_gradient_loss  | -0.0151     |
|    gen/train/value_loss            | 152    

round:  69%|██████▉   | 169/244 [04:42<02:06,  1.69s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 34.7        |
|    gen/rollout/ep_rew_mean         | 134         |
|    gen/rollout/ep_rew_wrapped_mean | -97.1       |
|    gen/time/fps                    | 2328        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 348160      |
|    gen/train/approx_kl             | 0.004436538 |
|    gen/train/clip_fraction         | 0.282       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.476      |
|    gen/train/explained_variance    | 0.312       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 10.7        |
|    gen/train/n_updates             | 845         |
|    gen/train/policy_gradient_loss  | -0.019      |
|    gen/train/value_loss            | 136    

round:  70%|██████▉   | 170/244 [04:44<02:00,  1.63s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 33.7        |
|    gen/rollout/ep_rew_mean         | 131         |
|    gen/rollout/ep_rew_wrapped_mean | -85.6       |
|    gen/time/fps                    | 2332        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 350208      |
|    gen/train/approx_kl             | 0.004130722 |
|    gen/train/clip_fraction         | 0.294       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.458      |
|    gen/train/explained_variance    | 0.416       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 10.4        |
|    gen/train/n_updates             | 850         |
|    gen/train/policy_gradient_loss  | -0.0203     |
|    gen/train/value_loss            | 108    

round:  70%|███████   | 171/244 [04:45<01:55,  1.59s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 32.3        |
|    gen/rollout/ep_rew_mean         | 138         |
|    gen/rollout/ep_rew_wrapped_mean | -79.8       |
|    gen/time/fps                    | 2296        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 352256      |
|    gen/train/approx_kl             | 0.006579759 |
|    gen/train/clip_fraction         | 0.331       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.438      |
|    gen/train/explained_variance    | 0.371       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 10.8        |
|    gen/train/n_updates             | 855         |
|    gen/train/policy_gradient_loss  | -0.0229     |
|    gen/train/value_loss            | 100    

round:  70%|███████   | 172/244 [04:47<01:51,  1.55s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 30.9         |
|    gen/rollout/ep_rew_mean         | 137          |
|    gen/rollout/ep_rew_wrapped_mean | -70.6        |
|    gen/time/fps                    | 2198         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 354304       |
|    gen/train/approx_kl             | 0.0061589708 |
|    gen/train/clip_fraction         | 0.314        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.408       |
|    gen/train/explained_variance    | 0.44         |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 6.65         |
|    gen/train/n_updates             | 860          |
|    gen/train/policy_gradient_loss  | -0.0184      |
|    gen/train/value_loss   

round:  71%|███████   | 173/244 [04:48<01:48,  1.53s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 29.4         |
|    gen/rollout/ep_rew_mean         | 143          |
|    gen/rollout/ep_rew_wrapped_mean | -58.3        |
|    gen/time/fps                    | 2324         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 356352       |
|    gen/train/approx_kl             | 0.0059358664 |
|    gen/train/clip_fraction         | 0.272        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.373       |
|    gen/train/explained_variance    | 0.377        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 4.99         |
|    gen/train/n_updates             | 865          |
|    gen/train/policy_gradient_loss  | -0.022       |
|    gen/train/value_loss   

round:  71%|███████▏  | 174/244 [04:50<01:55,  1.66s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 28.6         |
|    gen/rollout/ep_rew_mean         | 144          |
|    gen/rollout/ep_rew_wrapped_mean | -44.9        |
|    gen/time/fps                    | 2307         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 358400       |
|    gen/train/approx_kl             | 0.0064278385 |
|    gen/train/clip_fraction         | 0.268        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.33        |
|    gen/train/explained_variance    | 0.274        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 6.19         |
|    gen/train/n_updates             | 870          |
|    gen/train/policy_gradient_loss  | -0.0206      |
|    gen/train/value_loss   

round:  72%|███████▏  | 175/244 [04:52<01:49,  1.59s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 27.7        |
|    gen/rollout/ep_rew_mean         | 142         |
|    gen/rollout/ep_rew_wrapped_mean | -38.1       |
|    gen/time/fps                    | 2176        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 360448      |
|    gen/train/approx_kl             | 0.015147862 |
|    gen/train/clip_fraction         | 0.25        |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.283      |
|    gen/train/explained_variance    | 0.363       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 3.75        |
|    gen/train/n_updates             | 875         |
|    gen/train/policy_gradient_loss  | -0.0192     |
|    gen/train/value_loss            | 39.6   

round:  72%|███████▏  | 176/244 [04:53<01:46,  1.56s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 27.2        |
|    gen/rollout/ep_rew_mean         | 144         |
|    gen/rollout/ep_rew_wrapped_mean | -31.3       |
|    gen/time/fps                    | 2303        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 362496      |
|    gen/train/approx_kl             | 0.006066943 |
|    gen/train/clip_fraction         | 0.223       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.252      |
|    gen/train/explained_variance    | 0.219       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 4.62        |
|    gen/train/n_updates             | 880         |
|    gen/train/policy_gradient_loss  | -0.0191     |
|    gen/train/value_loss            | 43.1   

round:  73%|███████▎  | 177/244 [04:55<01:43,  1.54s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 26          |
|    gen/rollout/ep_rew_mean         | 146         |
|    gen/rollout/ep_rew_wrapped_mean | -26.4       |
|    gen/time/fps                    | 2318        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 364544      |
|    gen/train/approx_kl             | 0.015098841 |
|    gen/train/clip_fraction         | 0.213       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.209      |
|    gen/train/explained_variance    | 0.181       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 3.64        |
|    gen/train/n_updates             | 885         |
|    gen/train/policy_gradient_loss  | -0.0185     |
|    gen/train/value_loss            | 36.8   

round:  73%|███████▎  | 178/244 [04:56<01:39,  1.51s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 25.7        |
|    gen/rollout/ep_rew_mean         | 147         |
|    gen/rollout/ep_rew_wrapped_mean | -15.6       |
|    gen/time/fps                    | 2212        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 366592      |
|    gen/train/approx_kl             | 0.012068853 |
|    gen/train/clip_fraction         | 0.156       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.15       |
|    gen/train/explained_variance    | -0.00808    |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 2.73        |
|    gen/train/n_updates             | 890         |
|    gen/train/policy_gradient_loss  | -0.0147     |
|    gen/train/value_loss            | 27.8   

round:  73%|███████▎  | 179/244 [04:58<01:37,  1.50s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 25          |
|    gen/rollout/ep_rew_mean         | 148         |
|    gen/rollout/ep_rew_wrapped_mean | -14         |
|    gen/time/fps                    | 2298        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 368640      |
|    gen/train/approx_kl             | 0.006449322 |
|    gen/train/clip_fraction         | 0.126       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.112      |
|    gen/train/explained_variance    | 0.142       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 1.38        |
|    gen/train/n_updates             | 895         |
|    gen/train/policy_gradient_loss  | -0.0135     |
|    gen/train/value_loss            | 20     

round:  74%|███████▍  | 180/244 [05:00<01:44,  1.63s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 24.3       |
|    gen/rollout/ep_rew_mean         | 150        |
|    gen/rollout/ep_rew_wrapped_mean | -8.82      |
|    gen/time/fps                    | 2280       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 370688     |
|    gen/train/approx_kl             | 0.04803267 |
|    gen/train/clip_fraction         | 0.129      |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.0416    |
|    gen/train/explained_variance    | 0.0359     |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | 1.12       |
|    gen/train/n_updates             | 900        |
|    gen/train/policy_gradient_loss  | -0.0144    |
|    gen/train/value_loss            | 20.1       |
------------

round:  74%|███████▍  | 181/244 [05:01<01:40,  1.60s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.1         |
|    gen/rollout/ep_rew_mean         | 148          |
|    gen/rollout/ep_rew_wrapped_mean | -1.08        |
|    gen/time/fps                    | 1414         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 1            |
|    gen/time/total_timesteps        | 372736       |
|    gen/train/approx_kl             | 0.0016899181 |
|    gen/train/clip_fraction         | 0.00547      |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.0204      |
|    gen/train/explained_variance    | -0.286       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.397        |
|    gen/train/n_updates             | 905          |
|    gen/train/policy_gradient_loss  | 0.00224      |
|    gen/train/value_loss   

round:  75%|███████▍  | 182/244 [05:03<01:52,  1.81s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24          |
|    gen/rollout/ep_rew_mean         | 151         |
|    gen/rollout/ep_rew_wrapped_mean | -0.913      |
|    gen/time/fps                    | 2233        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 374784      |
|    gen/train/approx_kl             | 0.007219593 |
|    gen/train/clip_fraction         | 0.00479     |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.00347    |
|    gen/train/explained_variance    | -0.175      |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.172       |
|    gen/train/n_updates             | 910         |
|    gen/train/policy_gradient_loss  | -0.0014     |
|    gen/train/value_loss            | 2.22   

round:  75%|███████▌  | 183/244 [05:05<01:46,  1.75s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 25.4       |
|    gen/rollout/ep_rew_mean         | 148        |
|    gen/rollout/ep_rew_wrapped_mean | -0.631     |
|    gen/time/fps                    | 2315       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 376832     |
|    gen/train/approx_kl             | 0.00571968 |
|    gen/train/clip_fraction         | 0.217      |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.139     |
|    gen/train/explained_variance    | 0.24       |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | -0.0309    |
|    gen/train/n_updates             | 915        |
|    gen/train/policy_gradient_loss  | -0.0122    |
|    gen/train/value_loss            | 0.0388     |
------------

round:  75%|███████▌  | 184/244 [05:07<01:43,  1.73s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 25         |
|    gen/rollout/ep_rew_mean         | 150        |
|    gen/rollout/ep_rew_wrapped_mean | -15.1      |
|    gen/time/fps                    | 2140       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 378880     |
|    gen/train/approx_kl             | 0.01599491 |
|    gen/train/clip_fraction         | 0.0791     |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.155     |
|    gen/train/explained_variance    | -0.0069    |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | 2.76       |
|    gen/train/n_updates             | 920        |
|    gen/train/policy_gradient_loss  | -0.0131    |
|    gen/train/value_loss            | 39.3       |
------------

round:  76%|███████▌  | 185/244 [05:08<01:38,  1.67s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 24.1      |
|    gen/rollout/ep_rew_mean         | 151       |
|    gen/rollout/ep_rew_wrapped_mean | -8.5      |
|    gen/time/fps                    | 2302      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 0         |
|    gen/time/total_timesteps        | 380928    |
|    gen/train/approx_kl             | 0.0338299 |
|    gen/train/clip_fraction         | 0.0325    |
|    gen/train/clip_range            | 0.1       |
|    gen/train/entropy_loss          | -0.0608   |
|    gen/train/explained_variance    | -0.0441   |
|    gen/train/learning_rate         | 0.0005    |
|    gen/train/loss                  | 1.86      |
|    gen/train/n_updates             | 925       |
|    gen/train/policy_gradient_loss  | -0.0127   |
|    gen/train/value_loss            | 19        |
-------------------------------

round:  76%|███████▌  | 186/244 [05:10<01:41,  1.76s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.7         |
|    gen/rollout/ep_rew_mean         | 148          |
|    gen/rollout/ep_rew_wrapped_mean | 0.846        |
|    gen/time/fps                    | 2044         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 1            |
|    gen/time/total_timesteps        | 382976       |
|    gen/train/approx_kl             | 0.0129430005 |
|    gen/train/clip_fraction         | 0.0331       |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.0442      |
|    gen/train/explained_variance    | -0.934       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.0217       |
|    gen/train/n_updates             | 930          |
|    gen/train/policy_gradient_loss  | 0.00401      |
|    gen/train/value_loss   

round:  77%|███████▋  | 187/244 [05:12<01:36,  1.70s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24.8        |
|    gen/rollout/ep_rew_mean         | 150         |
|    gen/rollout/ep_rew_wrapped_mean | -9.07       |
|    gen/time/fps                    | 2136        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 385024      |
|    gen/train/approx_kl             | 0.009407067 |
|    gen/train/clip_fraction         | 0.0938      |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.0672     |
|    gen/train/explained_variance    | -0.153      |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 1.23        |
|    gen/train/n_updates             | 935         |
|    gen/train/policy_gradient_loss  | -0.00363    |
|    gen/train/value_loss            | 18.1   

round:  77%|███████▋  | 188/244 [05:13<01:32,  1.66s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24.2        |
|    gen/rollout/ep_rew_mean         | 151         |
|    gen/rollout/ep_rew_wrapped_mean | -7.96       |
|    gen/time/fps                    | 2302        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 387072      |
|    gen/train/approx_kl             | 0.027005438 |
|    gen/train/clip_fraction         | 0.134       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.0344     |
|    gen/train/explained_variance    | 0.0893      |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 1.35        |
|    gen/train/n_updates             | 940         |
|    gen/train/policy_gradient_loss  | -0.0146     |
|    gen/train/value_loss            | 16.6   

round:  77%|███████▋  | 189/244 [05:15<01:28,  1.61s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 42.3       |
|    gen/rollout/ep_rew_mean         | 133        |
|    gen/rollout/ep_rew_wrapped_mean | -0.754     |
|    gen/time/fps                    | 2321       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 389120     |
|    gen/train/approx_kl             | 0.33263418 |
|    gen/train/clip_fraction         | 0.0627     |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.0391    |
|    gen/train/explained_variance    | -0.556     |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | 0.454      |
|    gen/train/n_updates             | 945        |
|    gen/train/policy_gradient_loss  | -0.00978   |
|    gen/train/value_loss            | 3.6        |
------------

round:  78%|███████▊  | 190/244 [05:16<01:24,  1.56s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 56.8       |
|    gen/rollout/ep_rew_mean         | 119        |
|    gen/rollout/ep_rew_wrapped_mean | -85.4      |
|    gen/time/fps                    | 2160       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 391168     |
|    gen/train/approx_kl             | 0.05207441 |
|    gen/train/clip_fraction         | 0.0508     |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.0713    |
|    gen/train/explained_variance    | -0.00432   |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | 4.03       |
|    gen/train/n_updates             | 950        |
|    gen/train/policy_gradient_loss  | 0.00211    |
|    gen/train/value_loss            | 641        |
------------

round:  78%|███████▊  | 191/244 [05:18<01:22,  1.55s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 66.3        |
|    gen/rollout/ep_rew_mean         | 109         |
|    gen/rollout/ep_rew_wrapped_mean | -144        |
|    gen/time/fps                    | 2317        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 393216      |
|    gen/train/approx_kl             | 0.017091919 |
|    gen/train/clip_fraction         | 0.179       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.306      |
|    gen/train/explained_variance    | 0.31        |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 13.4        |
|    gen/train/n_updates             | 955         |
|    gen/train/policy_gradient_loss  | -0.0068     |
|    gen/train/value_loss            | 232    

round:  79%|███████▊  | 192/244 [05:20<01:29,  1.71s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 42.9        |
|    gen/rollout/ep_rew_mean         | 130         |
|    gen/rollout/ep_rew_wrapped_mean | -172        |
|    gen/time/fps                    | 2318        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 395264      |
|    gen/train/approx_kl             | 0.002154206 |
|    gen/train/clip_fraction         | 0.141       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.33       |
|    gen/train/explained_variance    | 0.688       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 16.4        |
|    gen/train/n_updates             | 960         |
|    gen/train/policy_gradient_loss  | -0.0103     |
|    gen/train/value_loss            | 200    

round:  79%|███████▉  | 193/244 [05:21<01:23,  1.64s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 35.4        |
|    gen/rollout/ep_rew_mean         | 138         |
|    gen/rollout/ep_rew_wrapped_mean | -59.4       |
|    gen/time/fps                    | 2153        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 397312      |
|    gen/train/approx_kl             | 0.010805818 |
|    gen/train/clip_fraction         | 0.266       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.337      |
|    gen/train/explained_variance    | 0.724       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 15          |
|    gen/train/n_updates             | 965         |
|    gen/train/policy_gradient_loss  | -0.0145     |
|    gen/train/value_loss            | 138    

round:  80%|███████▉  | 194/244 [05:23<01:20,  1.61s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 31.4         |
|    gen/rollout/ep_rew_mean         | 143          |
|    gen/rollout/ep_rew_wrapped_mean | -41.4        |
|    gen/time/fps                    | 2317         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 399360       |
|    gen/train/approx_kl             | 0.0046838475 |
|    gen/train/clip_fraction         | 0.208        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.301       |
|    gen/train/explained_variance    | 0.642        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 5.75         |
|    gen/train/n_updates             | 970          |
|    gen/train/policy_gradient_loss  | -0.0053      |
|    gen/train/value_loss   

round:  80%|███████▉  | 195/244 [05:24<01:17,  1.58s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 29.7        |
|    gen/rollout/ep_rew_mean         | 145         |
|    gen/rollout/ep_rew_wrapped_mean | -32.2       |
|    gen/time/fps                    | 2276        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 401408      |
|    gen/train/approx_kl             | 0.006332065 |
|    gen/train/clip_fraction         | 0.274       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.272      |
|    gen/train/explained_variance    | 0.661       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 3.89        |
|    gen/train/n_updates             | 975         |
|    gen/train/policy_gradient_loss  | -0.0164     |
|    gen/train/value_loss            | 42.5   

round:  80%|████████  | 196/244 [05:26<01:14,  1.55s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 28.6         |
|    gen/rollout/ep_rew_mean         | 142          |
|    gen/rollout/ep_rew_wrapped_mean | -26.3        |
|    gen/time/fps                    | 2189         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 403456       |
|    gen/train/approx_kl             | 0.0042996155 |
|    gen/train/clip_fraction         | 0.245        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.247       |
|    gen/train/explained_variance    | 0.683        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 3.18         |
|    gen/train/n_updates             | 980          |
|    gen/train/policy_gradient_loss  | -0.0204      |
|    gen/train/value_loss   

round:  81%|████████  | 197/244 [05:27<01:11,  1.53s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 27.2        |
|    gen/rollout/ep_rew_mean         | 149         |
|    gen/rollout/ep_rew_wrapped_mean | -20.9       |
|    gen/time/fps                    | 2288        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 405504      |
|    gen/train/approx_kl             | 0.004154277 |
|    gen/train/clip_fraction         | 0.236       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.219      |
|    gen/train/explained_variance    | 0.615       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 2.94        |
|    gen/train/n_updates             | 985         |
|    gen/train/policy_gradient_loss  | -0.0179     |
|    gen/train/value_loss            | 30     

round:  81%|████████  | 198/244 [05:29<01:15,  1.64s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 25.9         |
|    gen/rollout/ep_rew_mean         | 150          |
|    gen/rollout/ep_rew_wrapped_mean | -14.3        |
|    gen/time/fps                    | 2203         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 407552       |
|    gen/train/approx_kl             | 0.0064952103 |
|    gen/train/clip_fraction         | 0.214        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.176       |
|    gen/train/explained_variance    | 0.519        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 1.29         |
|    gen/train/n_updates             | 990          |
|    gen/train/policy_gradient_loss  | -0.0208      |
|    gen/train/value_loss   

round:  82%|████████▏ | 199/244 [05:31<01:12,  1.61s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 25.4         |
|    gen/rollout/ep_rew_mean         | 149          |
|    gen/rollout/ep_rew_wrapped_mean | -9.02        |
|    gen/time/fps                    | 2062         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 409600       |
|    gen/train/approx_kl             | 0.0074095135 |
|    gen/train/clip_fraction         | 0.179        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.116       |
|    gen/train/explained_variance    | 0.439        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 1.78         |
|    gen/train/n_updates             | 995          |
|    gen/train/policy_gradient_loss  | -0.0195      |
|    gen/train/value_loss   

round:  82%|████████▏ | 200/244 [05:32<01:10,  1.60s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 36.5       |
|    gen/rollout/ep_rew_mean         | 131        |
|    gen/rollout/ep_rew_wrapped_mean | -6.68      |
|    gen/time/fps                    | 2346       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 411648     |
|    gen/train/approx_kl             | 0.62278533 |
|    gen/train/clip_fraction         | 0.55       |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.228     |
|    gen/train/explained_variance    | 0.34       |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | 0.403      |
|    gen/train/n_updates             | 1000       |
|    gen/train/policy_gradient_loss  | -0.0299    |
|    gen/train/value_loss            | 6.07       |
------------

round:  82%|████████▏ | 201/244 [05:34<01:07,  1.57s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 46.9         |
|    gen/rollout/ep_rew_mean         | 117          |
|    gen/rollout/ep_rew_wrapped_mean | -176         |
|    gen/time/fps                    | 2321         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 413696       |
|    gen/train/approx_kl             | 0.0067734625 |
|    gen/train/clip_fraction         | 0.267        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.48        |
|    gen/train/explained_variance    | -0.0602      |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 455          |
|    gen/train/n_updates             | 1005         |
|    gen/train/policy_gradient_loss  | -0.0113      |
|    gen/train/value_loss   

round:  83%|████████▎ | 202/244 [05:35<01:04,  1.54s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 50          |
|    gen/rollout/ep_rew_mean         | 116         |
|    gen/rollout/ep_rew_wrapped_mean | -318        |
|    gen/time/fps                    | 2199        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 415744      |
|    gen/train/approx_kl             | 0.004783083 |
|    gen/train/clip_fraction         | 0.252       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.488      |
|    gen/train/explained_variance    | -0.0294     |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 298         |
|    gen/train/n_updates             | 1010        |
|    gen/train/policy_gradient_loss  | -0.00974    |
|    gen/train/value_loss            | 3.37e+0

round:  83%|████████▎ | 203/244 [05:37<01:04,  1.56s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 45.7         |
|    gen/rollout/ep_rew_mean         | 121          |
|    gen/rollout/ep_rew_wrapped_mean | -341         |
|    gen/time/fps                    | 2338         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 417792       |
|    gen/train/approx_kl             | 0.0037761698 |
|    gen/train/clip_fraction         | 0.267        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.5         |
|    gen/train/explained_variance    | 0.054        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 91.5         |
|    gen/train/n_updates             | 1015         |
|    gen/train/policy_gradient_loss  | -0.0136      |
|    gen/train/value_loss   

round:  84%|████████▎ | 204/244 [05:39<01:07,  1.68s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 44.1         |
|    gen/rollout/ep_rew_mean         | 123          |
|    gen/rollout/ep_rew_wrapped_mean | -250         |
|    gen/time/fps                    | 2303         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 419840       |
|    gen/train/approx_kl             | 0.0044589154 |
|    gen/train/clip_fraction         | 0.244        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.495       |
|    gen/train/explained_variance    | 0.107        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 55.8         |
|    gen/train/n_updates             | 1020         |
|    gen/train/policy_gradient_loss  | -0.0127      |
|    gen/train/value_loss   

round:  84%|████████▍ | 205/244 [05:40<01:03,  1.63s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 42.1        |
|    gen/rollout/ep_rew_mean         | 127         |
|    gen/rollout/ep_rew_wrapped_mean | -220        |
|    gen/time/fps                    | 2163        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 421888      |
|    gen/train/approx_kl             | 0.004299801 |
|    gen/train/clip_fraction         | 0.27        |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.501      |
|    gen/train/explained_variance    | 0.169       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 72.1        |
|    gen/train/n_updates             | 1025        |
|    gen/train/policy_gradient_loss  | -0.011      |
|    gen/train/value_loss            | 642    

round:  84%|████████▍ | 206/244 [05:42<01:00,  1.60s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 39.2         |
|    gen/rollout/ep_rew_mean         | 132          |
|    gen/rollout/ep_rew_wrapped_mean | -194         |
|    gen/time/fps                    | 2314         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 423936       |
|    gen/train/approx_kl             | 0.0028091082 |
|    gen/train/clip_fraction         | 0.264        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.495       |
|    gen/train/explained_variance    | 0.162        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 39.2         |
|    gen/train/n_updates             | 1030         |
|    gen/train/policy_gradient_loss  | -0.0118      |
|    gen/train/value_loss   

round:  85%|████████▍ | 207/244 [05:43<00:58,  1.58s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 37           |
|    gen/rollout/ep_rew_mean         | 133          |
|    gen/rollout/ep_rew_wrapped_mean | -157         |
|    gen/time/fps                    | 2319         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 425984       |
|    gen/train/approx_kl             | 0.0026840759 |
|    gen/train/clip_fraction         | 0.233        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.485       |
|    gen/train/explained_variance    | 0.146        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 53.2         |
|    gen/train/n_updates             | 1035         |
|    gen/train/policy_gradient_loss  | -0.0159      |
|    gen/train/value_loss   

round:  85%|████████▌ | 208/244 [05:45<00:55,  1.54s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 35.6        |
|    gen/rollout/ep_rew_mean         | 136         |
|    gen/rollout/ep_rew_wrapped_mean | -132        |
|    gen/time/fps                    | 2191        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 428032      |
|    gen/train/approx_kl             | 0.002847861 |
|    gen/train/clip_fraction         | 0.236       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.476      |
|    gen/train/explained_variance    | 0.182       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 43.6        |
|    gen/train/n_updates             | 1040        |
|    gen/train/policy_gradient_loss  | -0.015      |
|    gen/train/value_loss            | 368    

round:  86%|████████▌ | 209/244 [05:46<00:53,  1.54s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 34           |
|    gen/rollout/ep_rew_mean         | 137          |
|    gen/rollout/ep_rew_wrapped_mean | -118         |
|    gen/time/fps                    | 2315         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 430080       |
|    gen/train/approx_kl             | 0.0029016312 |
|    gen/train/clip_fraction         | 0.236        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.461       |
|    gen/train/explained_variance    | 0.23         |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 25.7         |
|    gen/train/n_updates             | 1045         |
|    gen/train/policy_gradient_loss  | -0.017       |
|    gen/train/value_loss   

round:  86%|████████▌ | 210/244 [05:48<00:56,  1.65s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 32.6         |
|    gen/rollout/ep_rew_mean         | 139          |
|    gen/rollout/ep_rew_wrapped_mean | -99.6        |
|    gen/time/fps                    | 2258         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 432128       |
|    gen/train/approx_kl             | 0.0024484494 |
|    gen/train/clip_fraction         | 0.208        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.446       |
|    gen/train/explained_variance    | 0.234        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 17.8         |
|    gen/train/n_updates             | 1050         |
|    gen/train/policy_gradient_loss  | -0.0189      |
|    gen/train/value_loss   

round:  86%|████████▋ | 211/244 [05:50<00:52,  1.60s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 31.9         |
|    gen/rollout/ep_rew_mean         | 143          |
|    gen/rollout/ep_rew_wrapped_mean | -86.2        |
|    gen/time/fps                    | 2160         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 434176       |
|    gen/train/approx_kl             | 0.0028253421 |
|    gen/train/clip_fraction         | 0.215        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.426       |
|    gen/train/explained_variance    | 0.208        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 30.3         |
|    gen/train/n_updates             | 1055         |
|    gen/train/policy_gradient_loss  | -0.0188      |
|    gen/train/value_loss   

round:  87%|████████▋ | 212/244 [05:51<00:50,  1.58s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 31           |
|    gen/rollout/ep_rew_mean         | 143          |
|    gen/rollout/ep_rew_wrapped_mean | -81.6        |
|    gen/time/fps                    | 2265         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 436224       |
|    gen/train/approx_kl             | 0.0024164985 |
|    gen/train/clip_fraction         | 0.22         |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.409       |
|    gen/train/explained_variance    | 0.222        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 16.6         |
|    gen/train/n_updates             | 1060         |
|    gen/train/policy_gradient_loss  | -0.0195      |
|    gen/train/value_loss   

round:  87%|████████▋ | 213/244 [05:53<00:48,  1.57s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 29.7         |
|    gen/rollout/ep_rew_mean         | 141          |
|    gen/rollout/ep_rew_wrapped_mean | -72.6        |
|    gen/time/fps                    | 2316         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 438272       |
|    gen/train/approx_kl             | 0.0031591998 |
|    gen/train/clip_fraction         | 0.231        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.385       |
|    gen/train/explained_variance    | 0.226        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 14.5         |
|    gen/train/n_updates             | 1065         |
|    gen/train/policy_gradient_loss  | -0.0189      |
|    gen/train/value_loss   

round:  88%|████████▊ | 214/244 [05:54<00:46,  1.54s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 28.9        |
|    gen/rollout/ep_rew_mean         | 142         |
|    gen/rollout/ep_rew_wrapped_mean | -57.4       |
|    gen/time/fps                    | 2176        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 440320      |
|    gen/train/approx_kl             | 0.005229678 |
|    gen/train/clip_fraction         | 0.215       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.347      |
|    gen/train/explained_variance    | 0.221       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 14.9        |
|    gen/train/n_updates             | 1070        |
|    gen/train/policy_gradient_loss  | -0.0185     |
|    gen/train/value_loss            | 135    

round:  88%|████████▊ | 215/244 [05:56<00:44,  1.53s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 27.4        |
|    gen/rollout/ep_rew_mean         | 144         |
|    gen/rollout/ep_rew_wrapped_mean | -47.8       |
|    gen/time/fps                    | 2318        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 442368      |
|    gen/train/approx_kl             | 0.007233225 |
|    gen/train/clip_fraction         | 0.222       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.308      |
|    gen/train/explained_variance    | 0.244       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 9.43        |
|    gen/train/n_updates             | 1075        |
|    gen/train/policy_gradient_loss  | -0.02       |
|    gen/train/value_loss            | 87.4   

round:  89%|████████▊ | 216/244 [05:58<00:46,  1.66s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 26.4        |
|    gen/rollout/ep_rew_mean         | 146         |
|    gen/rollout/ep_rew_wrapped_mean | -31.6       |
|    gen/time/fps                    | 2256        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 444416      |
|    gen/train/approx_kl             | 0.010739606 |
|    gen/train/clip_fraction         | 0.225       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.236      |
|    gen/train/explained_variance    | 0.173       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 7.84        |
|    gen/train/n_updates             | 1080        |
|    gen/train/policy_gradient_loss  | -0.0204     |
|    gen/train/value_loss            | 66.5   

round:  89%|████████▉ | 217/244 [05:59<00:43,  1.61s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 25.9         |
|    gen/rollout/ep_rew_mean         | 147          |
|    gen/rollout/ep_rew_wrapped_mean | -22          |
|    gen/time/fps                    | 2159         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 446464       |
|    gen/train/approx_kl             | 0.0049818805 |
|    gen/train/clip_fraction         | 0.145        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.181       |
|    gen/train/explained_variance    | 0.0906       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 3.25         |
|    gen/train/n_updates             | 1085         |
|    gen/train/policy_gradient_loss  | -0.0147      |
|    gen/train/value_loss   

round:  89%|████████▉ | 218/244 [06:01<00:41,  1.59s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 25          |
|    gen/rollout/ep_rew_mean         | 148         |
|    gen/rollout/ep_rew_wrapped_mean | -16.9       |
|    gen/time/fps                    | 2151        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 448512      |
|    gen/train/approx_kl             | 0.010463179 |
|    gen/train/clip_fraction         | 0.135       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.123      |
|    gen/train/explained_variance    | 0.112       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 3.48        |
|    gen/train/n_updates             | 1090        |
|    gen/train/policy_gradient_loss  | -0.0152     |
|    gen/train/value_loss            | 31.8   

round:  90%|████████▉ | 219/244 [06:03<00:40,  1.60s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24.3        |
|    gen/rollout/ep_rew_mean         | 150         |
|    gen/rollout/ep_rew_wrapped_mean | -8.2        |
|    gen/time/fps                    | 2300        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 450560      |
|    gen/train/approx_kl             | 0.015406611 |
|    gen/train/clip_fraction         | 0.085       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.0585     |
|    gen/train/explained_variance    | 0.000798    |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 1.88        |
|    gen/train/n_updates             | 1095        |
|    gen/train/policy_gradient_loss  | -0.00879    |
|    gen/train/value_loss            | 22.9   

round:  90%|█████████ | 220/244 [06:04<00:38,  1.60s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24          |
|    gen/rollout/ep_rew_mean         | 151         |
|    gen/rollout/ep_rew_wrapped_mean | -1.85       |
|    gen/time/fps                    | 2118        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 452608      |
|    gen/train/approx_kl             | 0.013889572 |
|    gen/train/clip_fraction         | 0.0084      |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.0125     |
|    gen/train/explained_variance    | -0.149      |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.246       |
|    gen/train/n_updates             | 1100        |
|    gen/train/policy_gradient_loss  | -0.00444    |
|    gen/train/value_loss            | 4.99   

round:  91%|█████████ | 221/244 [06:06<00:36,  1.60s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24           |
|    gen/rollout/ep_rew_mean         | 145          |
|    gen/rollout/ep_rew_wrapped_mean | -0.146       |
|    gen/time/fps                    | 2283         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 454656       |
|    gen/train/approx_kl             | 0.0005877642 |
|    gen/train/clip_fraction         | 0.000391     |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00565     |
|    gen/train/explained_variance    | 0.0395       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.0028       |
|    gen/train/n_updates             | 1105         |
|    gen/train/policy_gradient_loss  | -0.000102    |
|    gen/train/value_loss   

round:  91%|█████████ | 222/244 [06:08<00:37,  1.71s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 29.7       |
|    gen/rollout/ep_rew_mean         | 145        |
|    gen/rollout/ep_rew_wrapped_mean | -0.174     |
|    gen/time/fps                    | 2241       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 456704     |
|    gen/train/approx_kl             | 0.37476104 |
|    gen/train/clip_fraction         | 0.232      |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.106     |
|    gen/train/explained_variance    | 0.112      |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | -0.0208    |
|    gen/train/n_updates             | 1110       |
|    gen/train/policy_gradient_loss  | -0.0169    |
|    gen/train/value_loss            | 0.101      |
------------

round:  91%|█████████▏| 223/244 [06:09<00:34,  1.64s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 32.7         |
|    gen/rollout/ep_rew_mean         | 141          |
|    gen/rollout/ep_rew_wrapped_mean | -76.4        |
|    gen/time/fps                    | 2135         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 458752       |
|    gen/train/approx_kl             | 0.0043783756 |
|    gen/train/clip_fraction         | 0.0879       |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.221       |
|    gen/train/explained_variance    | -0.00994     |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 11.3         |
|    gen/train/n_updates             | 1115         |
|    gen/train/policy_gradient_loss  | -0.00281     |
|    gen/train/value_loss   

round:  92%|█████████▏| 224/244 [06:11<00:32,  1.62s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 31.5        |
|    gen/rollout/ep_rew_mean         | 142         |
|    gen/rollout/ep_rew_wrapped_mean | -110        |
|    gen/time/fps                    | 2313        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 460800      |
|    gen/train/approx_kl             | 0.009693994 |
|    gen/train/clip_fraction         | 0.151       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.22       |
|    gen/train/explained_variance    | 0.389       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 12.3        |
|    gen/train/n_updates             | 1120        |
|    gen/train/policy_gradient_loss  | 0.00321     |
|    gen/train/value_loss            | 149    

round:  92%|█████████▏| 225/244 [06:12<00:30,  1.58s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 29.9       |
|    gen/rollout/ep_rew_mean         | 145        |
|    gen/rollout/ep_rew_wrapped_mean | -83.8      |
|    gen/time/fps                    | 2314       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 462848     |
|    gen/train/approx_kl             | 0.01388531 |
|    gen/train/clip_fraction         | 0.15       |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.203     |
|    gen/train/explained_variance    | 0.524      |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | 8.6        |
|    gen/train/n_updates             | 1125       |
|    gen/train/policy_gradient_loss  | -0.00574   |
|    gen/train/value_loss            | 93         |
------------

round:  93%|█████████▎| 226/244 [06:14<00:27,  1.54s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 29.2       |
|    gen/rollout/ep_rew_mean         | 145        |
|    gen/rollout/ep_rew_wrapped_mean | -60.4      |
|    gen/time/fps                    | 2167       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 464896     |
|    gen/train/approx_kl             | 0.00450607 |
|    gen/train/clip_fraction         | 0.139      |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.189     |
|    gen/train/explained_variance    | 0.507      |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | 8.05       |
|    gen/train/n_updates             | 1130       |
|    gen/train/policy_gradient_loss  | -0.00994   |
|    gen/train/value_loss            | 79.1       |
------------

round:  93%|█████████▎| 227/244 [06:15<00:26,  1.55s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 28.7         |
|    gen/rollout/ep_rew_mean         | 145          |
|    gen/rollout/ep_rew_wrapped_mean | -52.6        |
|    gen/time/fps                    | 2305         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 466944       |
|    gen/train/approx_kl             | 0.0039216075 |
|    gen/train/clip_fraction         | 0.139        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.181       |
|    gen/train/explained_variance    | 0.585        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 4.12         |
|    gen/train/n_updates             | 1135         |
|    gen/train/policy_gradient_loss  | -0.0116      |
|    gen/train/value_loss   

round:  93%|█████████▎| 228/244 [06:17<00:26,  1.69s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 28           |
|    gen/rollout/ep_rew_mean         | 147          |
|    gen/rollout/ep_rew_wrapped_mean | -47.8        |
|    gen/time/fps                    | 2093         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 468992       |
|    gen/train/approx_kl             | 0.0038976946 |
|    gen/train/clip_fraction         | 0.171        |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.17        |
|    gen/train/explained_variance    | 0.524        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 4.12         |
|    gen/train/n_updates             | 1140         |
|    gen/train/policy_gradient_loss  | -0.0137      |
|    gen/train/value_loss   

round:  94%|█████████▍| 229/244 [06:19<00:25,  1.68s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 27.5        |
|    gen/rollout/ep_rew_mean         | 145         |
|    gen/rollout/ep_rew_wrapped_mean | -39.8       |
|    gen/time/fps                    | 1867        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 1           |
|    gen/time/total_timesteps        | 471040      |
|    gen/train/approx_kl             | 0.004823502 |
|    gen/train/clip_fraction         | 0.178       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.156      |
|    gen/train/explained_variance    | 0.486       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 3.9         |
|    gen/train/n_updates             | 1145        |
|    gen/train/policy_gradient_loss  | -0.0132     |
|    gen/train/value_loss            | 45.1   

round:  94%|█████████▍| 230/244 [06:21<00:24,  1.72s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 27.1        |
|    gen/rollout/ep_rew_mean         | 147         |
|    gen/rollout/ep_rew_wrapped_mean | -34.7       |
|    gen/time/fps                    | 2287        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 473088      |
|    gen/train/approx_kl             | 0.010841243 |
|    gen/train/clip_fraction         | 0.157       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.148      |
|    gen/train/explained_variance    | 0.551       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 3.15        |
|    gen/train/n_updates             | 1150        |
|    gen/train/policy_gradient_loss  | -0.0141     |
|    gen/train/value_loss            | 30.2   

round:  95%|█████████▍| 231/244 [06:22<00:21,  1.64s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 26.2        |
|    gen/rollout/ep_rew_mean         | 146         |
|    gen/rollout/ep_rew_wrapped_mean | -31.6       |
|    gen/time/fps                    | 2122        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 475136      |
|    gen/train/approx_kl             | 0.008925475 |
|    gen/train/clip_fraction         | 0.151       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.126      |
|    gen/train/explained_variance    | 0.579       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 1.61        |
|    gen/train/n_updates             | 1155        |
|    gen/train/policy_gradient_loss  | -0.0143     |
|    gen/train/value_loss            | 23.6   

round:  95%|█████████▌| 232/244 [06:24<00:19,  1.63s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 25.4        |
|    gen/rollout/ep_rew_mean         | 149         |
|    gen/rollout/ep_rew_wrapped_mean | -21.1       |
|    gen/time/fps                    | 2305        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 477184      |
|    gen/train/approx_kl             | 0.013123005 |
|    gen/train/clip_fraction         | 0.145       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.0831     |
|    gen/train/explained_variance    | 0.222       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 2.41        |
|    gen/train/n_updates             | 1160        |
|    gen/train/policy_gradient_loss  | -0.0125     |
|    gen/train/value_loss            | 25.3   

round:  95%|█████████▌| 233/244 [06:25<00:17,  1.60s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24.7        |
|    gen/rollout/ep_rew_mean         | 150         |
|    gen/rollout/ep_rew_wrapped_mean | -12.2       |
|    gen/time/fps                    | 2302        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 479232      |
|    gen/train/approx_kl             | 0.008780785 |
|    gen/train/clip_fraction         | 0.108       |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.0771     |
|    gen/train/explained_variance    | 0.071       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 2.45        |
|    gen/train/n_updates             | 1165        |
|    gen/train/policy_gradient_loss  | -0.0122     |
|    gen/train/value_loss            | 22.7   

round:  96%|█████████▌| 234/244 [06:27<00:16,  1.68s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24.1        |
|    gen/rollout/ep_rew_mean         | 150         |
|    gen/rollout/ep_rew_wrapped_mean | -5.74       |
|    gen/time/fps                    | 2048        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 481280      |
|    gen/train/approx_kl             | 0.055834386 |
|    gen/train/clip_fraction         | 0.0824      |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.0141     |
|    gen/train/explained_variance    | -0.148      |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 1.44        |
|    gen/train/n_updates             | 1170        |
|    gen/train/policy_gradient_loss  | -0.0128     |
|    gen/train/value_loss            | 12.9   

round:  96%|█████████▋| 235/244 [06:29<00:14,  1.65s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.1         |
|    gen/rollout/ep_rew_mean         | 151          |
|    gen/rollout/ep_rew_wrapped_mean | -0.108       |
|    gen/time/fps                    | 2246         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 483328       |
|    gen/train/approx_kl             | 0.0011328717 |
|    gen/train/clip_fraction         | 0.0186       |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.0238      |
|    gen/train/explained_variance    | -0.488       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 0.0446       |
|    gen/train/n_updates             | 1175         |
|    gen/train/policy_gradient_loss  | -0.00199     |
|    gen/train/value_loss   

round:  97%|█████████▋| 236/244 [06:31<00:13,  1.68s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24          |
|    gen/rollout/ep_rew_mean         | 150         |
|    gen/rollout/ep_rew_wrapped_mean | -1.25       |
|    gen/time/fps                    | 2251        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 485376      |
|    gen/train/approx_kl             | 0.012645533 |
|    gen/train/clip_fraction         | 0.0165      |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.0128     |
|    gen/train/explained_variance    | -0.239      |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.401       |
|    gen/train/n_updates             | 1180        |
|    gen/train/policy_gradient_loss  | 0.00179     |
|    gen/train/value_loss            | 3.26   

round:  97%|█████████▋| 237/244 [06:32<00:11,  1.64s/it]

------------------------------------------------------
| raw/                               |               |
|    gen/rollout/ep_len_mean         | 24            |
|    gen/rollout/ep_rew_mean         | 148           |
|    gen/rollout/ep_rew_wrapped_mean | -0.495        |
|    gen/time/fps                    | 2145          |
|    gen/time/iterations             | 1             |
|    gen/time/time_elapsed           | 0             |
|    gen/time/total_timesteps        | 487424        |
|    gen/train/approx_kl             | 1.0430813e-06 |
|    gen/train/clip_fraction         | 0             |
|    gen/train/clip_range            | 0.1           |
|    gen/train/entropy_loss          | -0.00151      |
|    gen/train/explained_variance    | -1.67         |
|    gen/train/learning_rate         | 0.0005        |
|    gen/train/loss                  | 0.00361       |
|    gen/train/n_updates             | 1185          |
|    gen/train/policy_gradient_loss  | -0.000658     |
|    gen/t

round:  98%|█████████▊| 238/244 [06:34<00:09,  1.61s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.9         |
|    gen/rollout/ep_rew_mean         | 148          |
|    gen/rollout/ep_rew_wrapped_mean | 0.104        |
|    gen/time/fps                    | 2305         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 489472       |
|    gen/train/approx_kl             | 0.0018479881 |
|    gen/train/clip_fraction         | 0.00273      |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.00328     |
|    gen/train/explained_variance    | -0.363       |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | -0.00133     |
|    gen/train/n_updates             | 1190         |
|    gen/train/policy_gradient_loss  | -0.000138    |
|    gen/train/value_loss   

round:  98%|█████████▊| 239/244 [06:35<00:07,  1.58s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.9         |
|    gen/rollout/ep_rew_mean         | 150          |
|    gen/rollout/ep_rew_wrapped_mean | -11.8        |
|    gen/time/fps                    | 2301         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 491520       |
|    gen/train/approx_kl             | 0.0019605886 |
|    gen/train/clip_fraction         | 0.0729       |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.0697      |
|    gen/train/explained_variance    | -0.0149      |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | 1.15         |
|    gen/train/n_updates             | 1195         |
|    gen/train/policy_gradient_loss  | -0.00254     |
|    gen/train/value_loss   

round:  98%|█████████▊| 240/244 [06:37<00:06,  1.66s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24.3        |
|    gen/rollout/ep_rew_mean         | 150         |
|    gen/rollout/ep_rew_wrapped_mean | -9.31       |
|    gen/time/fps                    | 2110        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 493568      |
|    gen/train/approx_kl             | 0.024674937 |
|    gen/train/clip_fraction         | 0.0856      |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.0229     |
|    gen/train/explained_variance    | 0.211       |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 1.38        |
|    gen/train/n_updates             | 1200        |
|    gen/train/policy_gradient_loss  | -0.0113     |
|    gen/train/value_loss            | 12.3   

round:  99%|█████████▉| 241/244 [06:39<00:04,  1.66s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 24         |
|    gen/rollout/ep_rew_mean         | 152        |
|    gen/rollout/ep_rew_wrapped_mean | -1.47      |
|    gen/time/fps                    | 2175       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 0          |
|    gen/time/total_timesteps        | 495616     |
|    gen/train/approx_kl             | 0.03916193 |
|    gen/train/clip_fraction         | 0.0431     |
|    gen/train/clip_range            | 0.1        |
|    gen/train/entropy_loss          | -0.0039    |
|    gen/train/explained_variance    | -0.495     |
|    gen/train/learning_rate         | 0.0005     |
|    gen/train/loss                  | 0.251      |
|    gen/train/n_updates             | 1205       |
|    gen/train/policy_gradient_loss  | -0.0088    |
|    gen/train/value_loss            | 6.41       |
------------

round:  99%|█████████▉| 242/244 [06:40<00:03,  1.65s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_len_mean         | 24.2         |
|    gen/rollout/ep_rew_mean         | 149          |
|    gen/rollout/ep_rew_wrapped_mean | 0.184        |
|    gen/time/fps                    | 2305         |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 0            |
|    gen/time/total_timesteps        | 497664       |
|    gen/train/approx_kl             | 0.0007068297 |
|    gen/train/clip_fraction         | 0.0231       |
|    gen/train/clip_range            | 0.1          |
|    gen/train/entropy_loss          | -0.0195      |
|    gen/train/explained_variance    | -1.83        |
|    gen/train/learning_rate         | 0.0005       |
|    gen/train/loss                  | -0.0051      |
|    gen/train/n_updates             | 1210         |
|    gen/train/policy_gradient_loss  | -0.00391     |
|    gen/train/value_loss   

round: 100%|█████████▉| 243/244 [06:42<00:01,  1.59s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_len_mean         | 24.1        |
|    gen/rollout/ep_rew_mean         | 149         |
|    gen/rollout/ep_rew_wrapped_mean | -2.8        |
|    gen/time/fps                    | 2170        |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 0           |
|    gen/time/total_timesteps        | 499712      |
|    gen/train/approx_kl             | 0.005610967 |
|    gen/train/clip_fraction         | 0.0384      |
|    gen/train/clip_range            | 0.1         |
|    gen/train/entropy_loss          | -0.00527    |
|    gen/train/explained_variance    | -0.0687     |
|    gen/train/learning_rate         | 0.0005      |
|    gen/train/loss                  | 0.344       |
|    gen/train/n_updates             | 1215        |
|    gen/train/policy_gradient_loss  | -0.00367    |
|    gen/train/value_loss            | 5.16   

round: 100%|██████████| 244/244 [06:43<00:00,  1.65s/it]


In [None]:
# Helper function to calculate the behavorial cloning loss
def behavioral_cloning_loss(expert_trajs, learner_policy, device='cpu'):
    total_loss = 0
    num_samples = 0

    for traj in expert_trajs:
        states = traj.obs  
        expert_actions = traj.acts  
        
        states_tensor = th.tensor(states, dtype=th.float32, device=device)
        
        with th.no_grad():
            learner_actions = learner_policy(states_tensor)[0]

        learner_actions_np = learner_actions.cpu().numpy()
        
        # Calculate the loss (number of mismatched actions)
        loss = np.sum(learner_actions_np != expert_actions)
        total_loss += loss
        num_samples += len(states)
    return total_loss / num_samples  # Average loss

bc_loss = behavioral_cloning_loss(trajectories, learner.policy)
print(f"Behavioral Cloning Loss: {bc_loss}")

  loss = np.sum(learner_actions_np != expert_actions)


Behavioral Cloning Loss: 0.03999539253078045


In [None]:
state = venv.reset()  
state_tensor = th.tensor(state, dtype=th.float32)  

true_distribution = expert.policy.get_distribution(state_tensor)
learned_distribution = learner.policy.get_distribution(state_tensor)

true_action_probs = true_distribution.distribution.probs.detach()
learned_action_probs = learned_distribution.distribution.probs.detach()

true_dist = Categorical(probs=true_action_probs)
learned_dist = Categorical(probs=learned_action_probs)

kl_div = th.distributions.kl.kl_divergence(true_dist, learned_dist)

print(f"Action Probabilities from Expert Policy: {true_action_probs}")
print(f"Action Probabilities from Learned Policy: {learned_action_probs}")
print(f"KL Divergence: {kl_div.item()}")


Action Probabilities from Expert Policy: tensor([[1.7457e-04, 9.9963e-01, 1.9423e-04]])
Action Probabilities from Learned Policy: tensor([[1.9107e-04, 9.7093e-01, 2.8875e-02]])
KL Divergence: 0.02813028357923031


In [None]:
# Collect 500 trajectories of the expert behavior 
test_trajectories = rollout.rollout(
    expert,
    venv,
    rollout.make_sample_until(min_episodes=500),
    rng=np.random.default_rng(SEED),
)

# Helper function to calculate the KL divergence
def total_kl_divergence(expert_trajs, learner, expert, device='cpu'):
    total_kl_divergences = []

    for traj in expert_trajs:
        traj_kl_divergences = []

        states = traj.obs

        states_tensor = th.tensor(states, dtype=th.float32, device=device)
        
        for state_tensor in states_tensor:
            state_tensor = state_tensor.unsqueeze(0)

            true_distribution = expert.policy.get_distribution(state_tensor)
            learned_distribution = learner.policy.get_distribution(state_tensor)

            true_action_probs = true_distribution.distribution.probs.detach()
            learned_action_probs = learned_distribution.distribution.probs.detach()

            print(f"true_action: {th.argmax(true_action_probs).item()}")
            print(f"learned_action: {th.argmax(learned_action_probs).item()}")

            true_dist = Categorical(probs=true_action_probs)
            learned_dist = Categorical(probs=learned_action_probs)

            kl_div = th.distributions.kl.kl_divergence(true_dist, learned_dist).item()
            print(f"kl_div: {kl_div}")
            traj_kl_divergences.append(kl_div)

        total_kl_divergences.append(np.mean(traj_kl_divergences))

    # Return average KL divergence across all trajectories
    return np.mean(total_kl_divergences)


# Compute Total KL Divergence
kl_divergence = total_kl_divergence(test_trajectories, learner, expert)
print(f"Total KL Divergence: {kl_divergence}")


true_action: 1
learned_action: 1
kl_div: 0.00014371622819453478
true_action: 1
learned_action: 1
kl_div: 9.994952415581793e-05
true_action: 1
learned_action: 1
kl_div: 8.010320016182959e-05
true_action: 1
learned_action: 1
kl_div: 0.00017091260815504938
true_action: 1
learned_action: 1
kl_div: 0.0003818089608103037
true_action: 1
learned_action: 1
kl_div: 0.00035817123716697097
true_action: 1
learned_action: 1
kl_div: 0.0015080327866598964
true_action: 1
learned_action: 1
kl_div: 0.0006421628058888018
true_action: 2
learned_action: 2
kl_div: 0.0001216732052853331
true_action: 2
learned_action: 2
kl_div: 2.2269392502494156e-05
true_action: 2
learned_action: 2
kl_div: 4.686740430770442e-05
true_action: 2
learned_action: 2
kl_div: 2.108224362018518e-05
true_action: 2
learned_action: 2
kl_div: 3.9320060750469565e-05
true_action: 2
learned_action: 2
kl_div: 6.200295320013538e-05
true_action: 2
learned_action: 2
kl_div: 2.1005906091886573e-05
true_action: 2
learned_action: 2
kl_div: 6.005744

In [None]:
# Initialize lists to track metrics
bc_losses = []
kl_divergences = []

# Train AIRL with monitoring
num_iterations = 50
steps_per_iteration = 10000

for i in range(num_iterations):
    airl_trainer.train(steps_per_iteration)
    
    # Calculate metrics
    bc_loss = behavioral_cloning_loss(trajectories, learner.policy)
    bc_losses.append(bc_loss)

    kl_divergence = total_kl_divergence(test_trajectories, learner, expert)
    kl_divergences.append(kl_divergence)

    print(f"Iteration {i+1}: BC Loss = {bc_loss}, KL Divergence = {kl_divergence}")

# Plot the metrics
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))
plt.plot(range(1, num_iterations+1), bc_losses, label="BC Loss")
plt.plot(range(1, num_iterations+1), kl_divergences, label="KL Divergence")
plt.xlabel("Training Iterations")
plt.ylabel("Metric Value")
plt.title("Training Metrics for AIRL Model")
plt.legend()
plt.show()
