In [1]:
import gymnasium as gym
from stk_actor.wrappers import StuckStopWrapper
import torch
import torch.nn.functional as F

class PreprocessObservationWrapper(gym.ObservationWrapper):
    def __init__(self, env):
        """
        A Gym wrapper to preprocess mixed observation space (continuous + discrete)
        into a flat tensor.
        
        Args:
            env: The Gym environment to wrap.
        """
        super().__init__(env)
        self.observation_space = self._get_flat_observation_space(env.observation_space)
        self.mean = torch.load('buffer_mean_2', map_location='cpu')
        self.std = torch.load('buffer_std_2', map_location='cpu')

    def _get_flat_observation_space(self, observation_space):
        """
        Create a flat observation space based on the original observation space.
        
        Args:
            observation_space: Original observation space with 'continuous' and 'discrete' components.
        
        Returns:
            A flattened observation space.
        """
        continuous_dim = observation_space['continuous'].shape[0]
        discrete_dims = sum(space.n for space in observation_space['discrete'])
        flat_dim = continuous_dim + discrete_dims
        return gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(flat_dim,), dtype=float)

    def observation(self, obs):
        """
        Process the observation into a flat tensor.
        
        Args:
            obs: The raw observation from the environment.
        
        Returns:
            A preprocessed flat tensor.
        """
        continuous_obs, discrete_obs = obs['continuous'], obs['discrete']
        continuous_tensor = torch.FloatTensor(continuous_obs)
        
        discrete_tensors = [
            F.one_hot(torch.tensor(x), num_classes=num_classes.n).float()
            for x, num_classes in zip(discrete_obs, self.env.observation_space['discrete'])
        ]
        
        flat_tensor = torch.cat([continuous_tensor] + discrete_tensors)
        normed_flat_tensor = (flat_tensor - self.mean) / (self.std + 1e-8)
        return normed_flat_tensor

import gymnasium as gym
from gymnasium import Wrapper

class SkipFirstNStepsWrapper(Wrapper):
    def __init__(self, env, n):
        super().__init__(env)
        self.n = n

    def reset(self, **kwargs):
        # Reset the environment
        obs, info = self.env.reset(**kwargs)
        # Skip the first n steps
        for _ in range(self.n):
            obs, _, done, truncated, info = self.env.step(self.env.action_space.sample())
            if done or truncated:
                obs, info = self.env.reset(**kwargs)
        return obs, info

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical
from typing import Dict, List, Tuple, Union, Type
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical

import gymnasium as gym
from gymnasium import spaces

def get_device(device: Union[torch.device, str] = "auto") -> torch.device:
    if device == "auto":
        device = "cuda"
    device = torch.device(device)
    if device.type == torch.device("cuda").type and not torch.cuda.is_available():
        return torch.device("cpu")
    return device

class BaseFeaturesExtractor(nn.Module):
    def __init__(self, observation_space: gym.Space, features_dim: int = 0) -> None:
        super().__init__()
        assert features_dim > 0
        self._observation_space = observation_space
        self._features_dim = features_dim
    @property
    def features_dim(self) -> int:
        return self._features_dim

def get_flattened_obs_dim(observation_space: spaces.Space) -> int:
    if isinstance(observation_space, spaces.MultiDiscrete):
        return sum(observation_space.nvec)
    else:
        return spaces.utils.flatdim(observation_space)

class FlattenExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space: gym.Space) -> None:
        super().__init__(observation_space, get_flattened_obs_dim(observation_space))
        self.flatten = nn.Flatten()
    def forward(self, observations: torch.Tensor) -> torch.Tensor:
        return self.flatten(observations)
    
class MlpExtractor(nn.Module):
    def __init__(
        self,
        feature_dim: int,
        net_arch: Union[List[int], Dict[str, List[int]]],
        activation_fn: Type[nn.Module],
        device: Union[torch.device, str] = "auto",
    ) -> None:
        super().__init__()
        # device = torch.get_device(device)
        policy_net: List[nn.Module] = []
        value_net: List[nn.Module] = []
        last_layer_dim_pi = feature_dim
        last_layer_dim_vf = feature_dim

        if isinstance(net_arch, dict):
            pi_layers_dims = net_arch.get("pi", []) 
            vf_layers_dims = net_arch.get("vf", []) 
        else:
            pi_layers_dims = vf_layers_dims = net_arch
        for curr_layer_dim in pi_layers_dims:
            policy_net.append(nn.Linear(last_layer_dim_pi, curr_layer_dim))
            policy_net.append(activation_fn())
            last_layer_dim_pi = curr_layer_dim
        for curr_layer_dim in vf_layers_dims:
            value_net.append(nn.Linear(last_layer_dim_vf, curr_layer_dim))
            value_net.append(activation_fn())
            last_layer_dim_vf = curr_layer_dim

        self.latent_dim_pi = last_layer_dim_pi
        self.latent_dim_vf = last_layer_dim_vf
        self.policy_net = nn.Sequential(*policy_net)#.to(device)
        self.value_net = nn.Sequential(*value_net)#.to(device)

    def forward(self, features: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        :return: latent_policy, latent_value of the specified network.
            If all layers are shared, then ``latent_policy == latent_value``
        """
        return self.forward_actor(features), self.forward_critic(features)

    def forward_actor(self, features: torch.Tensor) -> torch.Tensor:
        return self.policy_net(features)

    def forward_critic(self, features: torch.Tensor) -> torch.Tensor:
        return self.value_net(features)

    
class Policy(nn.Module):
    def __init__(self, observation_space, action_dims, net_arch, activation_fn,):
        super().__init__()
        self.features_extractor = FlattenExtractor(observation_space)
        self.pi_features_extractor = self.features_extractor
        self.vf_features_extractor = self.features_extractor
        self.mlp_extractor = MlpExtractor(
            self.features_extractor.features_dim,
            net_arch=net_arch,
            activation_fn=activation_fn,
        )
        self.action_net = nn.Linear(net_arch[-1], sum(action_dims))
        self.value_net = nn.Linear(net_arch[-1], 1)


class UnifiedSACPolicy(nn.Module):
    def __init__(self, observation_space, action_dims, net_arch, activation_fn):
        super().__init__()
        
        self.shared = Policy(
            observation_space,
            action_dims,
            net_arch=net_arch,
            activation_fn=activation_fn
        )
        self.action_dims = action_dims
    
    def forward(self, x):
        x = self.shared.features_extractor(x)
        x = self.shared.mlp_extractor.policy_net(x)
        x = self.shared.action_net(x)
        return x
    
    def sample(self, x, deterministic=False):
        logits = self.forward(x)
        
        # Split logits for each action dimension
        split_logits = torch.split(logits, self.action_dims, dim=-1)
        
        actions = []
        log_probs = []
        probs = []
        
        for logit in split_logits:
            distribution = Categorical(logits=logit)
            if deterministic:
                action = torch.argmax(logit, dim=-1)
            else:
                action = distribution.sample()
            
            log_prob = distribution.log_prob(action)
            prob = F.softmax(logit, dim=-1)
            
            actions.append(action)
            log_probs.append(log_prob)
            probs.append(prob)
        
        return (
            torch.stack(actions),
            torch.stack(log_probs),
            probs
        )
    
#policy = torch.load('policy_512_512_512_512_SiLU_3_statedict', map_location='cuda')


from stable_baselines3 import PPO, A2C
from stable_baselines3.common.env_util import make_vec_env
import gymnasium as gym
from pystk2_gymnasium import AgentSpec
from bbrl.agents.gymnasium import ParallelGymAgent, make_env
from functools import partial

tracks = [
    'abyss',
    'black_forest',
    'candela_city',
    'cocoa_temple',
    'cornfield_crossing',
    'fortmagma',
    'gran_paradiso_island',
    'hacienda',
    'lighthouse',
    'mines',
    'minigolf',
    'olivermath',
    'ravenbridge_mansion',
    'sandtrack',
    'scotland',
    'snowmountain',
    'snowtuxpeak',
    'stk_enterprise',
    'volcano_island',
    'xr591',
    'zengarden',

#   ==================   #

    'fortmagma',
    'ravenbridge_mansion',
    'snowmountain',
    'cocoa_temple',
    'sandtrack',    
    'scotland', 
    'stk_enterprise',
    'volcano_island', # 1104
    'xr591', # 864   
]

vec_env = make_vec_env(
    "supertuxkart/flattened_multidiscrete-v0",
    # seed=12,
    n_envs=8,#len(tracks), 
    wrapper_class=lambda x : (
        SkipFirstNStepsWrapper(
            StuckStopWrapper(
                PreprocessObservationWrapper(x),
                n=128,
            ), 
            n=19,
        )
    ), 
    env_kwargs={
    'render_mode':None, 'agent':AgentSpec(use_ai=False, name="walid"), #'track':'minigolf', 
    'laps':1,
    'difficulty':2, 
    'num_kart':12, #'difficulty':0
})


# for i,venv in enumerate(vec_env.envs):
#     print(i, tracks[i%len(tracks)])
#     venv.env.default_track = tracks[i%len(tracks)]



net_arch=[1024,1024,1024]
activation_fn=torch.nn.Tanh
filename = 'policy_normed_1024_1024_1024_Tanh_statedict_2'

action_dims = [space.n for space in vec_env.action_space]
unified_policy = UnifiedSACPolicy(
    vec_env.observation_space, 
    action_dims, 
    net_arch=net_arch, 
    activation_fn=activation_fn
)
unified_policy.load_state_dict(torch.load(filename, map_location='cpu'))


..:: Antarctica Rendering Engine 2.0 ::..


  self.mean = torch.load('buffer_mean_2', map_location='cpu')
  self.std = torch.load('buffer_std_2', map_location='cpu')
  unified_policy.load_state_dict(torch.load(filename, map_location='cpu'))


<All keys matched successfully>

In [2]:
steps = [(
    2048,
    1_000_000,
)]
for n_steps, total_timesteps in steps:
    model = PPO(
        "MlpPolicy", 
        vec_env, 
        verbose=1, 
        policy_kwargs = dict(net_arch=net_arch, activation_fn=activation_fn,),
        device='cpu',
        learning_rate=0.0003,
        n_steps=n_steps,
        tensorboard_log="./outputs/",
        clip_range=0.2,
    )
    print('DOING', n_steps, total_timesteps)
    model.policy.load_state_dict(unified_policy.shared.state_dict())
    model.policy.load_state_dict(
        PPO.load(
            "ppti_ppo4_2048_batch128_clip01_ent0001", 
            custom_objects={'policy_kwargs' :  dict(net_arch=net_arch, activation_fn=activation_fn), }
        ).policy.state_dict(),
    )
    model.learn(total_timesteps=total_timesteps, progress_bar=True)
    # model.save(f'ppti_ppo2_{n_steps}_batch128_clip01_ent0001')

    


Using cpu device
..:: Antarctica Rendering Engine 2.0 ::..
..:: Antarctica Rendering Engine 2.0 ::..
..:: Antarctica Rendering Engine 2.0 ::..
..:: Antarctica Rendering Engine 2.0 ::..
..:: Antarctica Rendering Engine 2.0 ::..
..:: Antarctica Rendering Engine 2.0 ::..
..:: Antarctica Rendering Engine 2.0 ::..
DOING 2048 1000000
Logging to ./outputs/PPO_58


Output()

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 655      |
|    ep_rew_mean     | 412      |
| time/              |          |
|    fps             | 47       |
|    iterations      | 1        |
|    time_elapsed    | 347      |
|    total_timesteps | 16384    |
---------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 640       |
|    ep_rew_mean          | 442       |
| time/                   |           |
|    fps                  | 51        |
|    iterations           | 2         |
|    time_elapsed         | 633       |
|    total_timesteps      | 32768     |
| train/                  |           |
|    approx_kl            | 4.4540358 |
|    clip_fraction        | 0.524     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.29     |
|    explained_variance   | 0.833     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.599     |
|    n_updates            | 10        |
|    policy_gradient_loss | -0.0161   |
|    value_loss           | 8.85      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 641       |
|    ep_rew_mean          | 424       |
| time/                   |           |
|    fps                  | 48        |
|    iterations           | 3         |
|    time_elapsed         | 1021      |
|    total_timesteps      | 49152     |
| train/                  |           |
|    approx_kl            | 4.6397104 |
|    clip_fraction        | 0.463     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.255    |
|    explained_variance   | 0.806     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.23      |
|    n_updates            | 20        |
|    policy_gradient_loss | -0.024    |
|    value_loss           | 11.8      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 656       |
|    ep_rew_mean          | 454       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 4         |
|    time_elapsed         | 1382      |
|    total_timesteps      | 65536     |
| train/                  |           |
|    approx_kl            | 3.5380187 |
|    clip_fraction        | 0.509     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.277    |
|    explained_variance   | 0.842     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.939     |
|    n_updates            | 30        |
|    policy_gradient_loss | 6.01e-05  |
|    value_loss           | 5.98      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 673       |
|    ep_rew_mean          | 467       |
| time/                   |           |
|    fps                  | 44        |
|    iterations           | 5         |
|    time_elapsed         | 1847      |
|    total_timesteps      | 81920     |
| train/                  |           |
|    approx_kl            | 3.1022773 |
|    clip_fraction        | 0.499     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.303    |
|    explained_variance   | 0.813     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.678     |
|    n_updates            | 40        |
|    policy_gradient_loss | -0.0112   |
|    value_loss           | 7.52      |
---------------------------------------


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 660      |
|    ep_rew_mean          | 457      |
| time/                   |          |
|    fps                  | 45       |
|    iterations           | 6        |
|    time_elapsed         | 2146     |
|    total_timesteps      | 98304    |
| train/                  |          |
|    approx_kl            | 3.744722 |
|    clip_fraction        | 0.502    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.309   |
|    explained_variance   | 0.676    |
|    learning_rate        | 0.0003   |
|    loss                 | 1.16     |
|    n_updates            | 50       |
|    policy_gradient_loss | 0.00516  |
|    value_loss           | 17.7     |
--------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 654       |
|    ep_rew_mean          | 436       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 7         |
|    time_elapsed         | 2492      |
|    total_timesteps      | 114688    |
| train/                  |           |
|    approx_kl            | 3.4124427 |
|    clip_fraction        | 0.53      |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.319    |
|    explained_variance   | 0.863     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.617     |
|    n_updates            | 60        |
|    policy_gradient_loss | 0.00102   |
|    value_loss           | 6.88      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 674       |
|    ep_rew_mean          | 437       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 8         |
|    time_elapsed         | 2846      |
|    total_timesteps      | 131072    |
| train/                  |           |
|    approx_kl            | 2.7799153 |
|    clip_fraction        | 0.53      |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.328    |
|    explained_variance   | 0.824     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.46      |
|    n_updates            | 70        |
|    policy_gradient_loss | 0.000479  |
|    value_loss           | 6.93      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 676       |
|    ep_rew_mean          | 439       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 9         |
|    time_elapsed         | 3165      |
|    total_timesteps      | 147456    |
| train/                  |           |
|    approx_kl            | 3.1227627 |
|    clip_fraction        | 0.532     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.312    |
|    explained_variance   | 0.819     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.377     |
|    n_updates            | 80        |
|    policy_gradient_loss | -0.00211  |
|    value_loss           | 10.1      |
---------------------------------------


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 651      |
|    ep_rew_mean          | 412      |
| time/                   |          |
|    fps                  | 47       |
|    iterations           | 10       |
|    time_elapsed         | 3485     |
|    total_timesteps      | 163840   |
| train/                  |          |
|    approx_kl            | 4.315278 |
|    clip_fraction        | 0.532    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.324   |
|    explained_variance   | 0.831    |
|    learning_rate        | 0.0003   |
|    loss                 | 1.46     |
|    n_updates            | 90       |
|    policy_gradient_loss | -0.0142  |
|    value_loss           | 6.5      |
--------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 646       |
|    ep_rew_mean          | 393       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 11        |
|    time_elapsed         | 3857      |
|    total_timesteps      | 180224    |
| train/                  |           |
|    approx_kl            | 2.8284473 |
|    clip_fraction        | 0.494     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.299    |
|    explained_variance   | 0.82      |
|    learning_rate        | 0.0003    |
|    loss                 | 0.4       |
|    n_updates            | 100       |
|    policy_gradient_loss | -0.00905  |
|    value_loss           | 6.43      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 633       |
|    ep_rew_mean          | 414       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 12        |
|    time_elapsed         | 4202      |
|    total_timesteps      | 196608    |
| train/                  |           |
|    approx_kl            | 3.1356606 |
|    clip_fraction        | 0.505     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.297    |
|    explained_variance   | 0.847     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.571     |
|    n_updates            | 110       |
|    policy_gradient_loss | -0.0177   |
|    value_loss           | 7.55      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 640       |
|    ep_rew_mean          | 409       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 13        |
|    time_elapsed         | 4526      |
|    total_timesteps      | 212992    |
| train/                  |           |
|    approx_kl            | 2.1823106 |
|    clip_fraction        | 0.462     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.289    |
|    explained_variance   | 0.773     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.744     |
|    n_updates            | 120       |
|    policy_gradient_loss | -0.0218   |
|    value_loss           | 9.22      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 648       |
|    ep_rew_mean          | 401       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 14        |
|    time_elapsed         | 4948      |
|    total_timesteps      | 229376    |
| train/                  |           |
|    approx_kl            | 2.3294015 |
|    clip_fraction        | 0.535     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.338    |
|    explained_variance   | 0.821     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.436     |
|    n_updates            | 130       |
|    policy_gradient_loss | 0.00217   |
|    value_loss           | 5.83      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 667       |
|    ep_rew_mean          | 426       |
| time/                   |           |
|    fps                  | 45        |
|    iterations           | 15        |
|    time_elapsed         | 5368      |
|    total_timesteps      | 245760    |
| train/                  |           |
|    approx_kl            | 3.8346918 |
|    clip_fraction        | 0.552     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.354    |
|    explained_variance   | 0.728     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.567     |
|    n_updates            | 140       |
|    policy_gradient_loss | 0.00194   |
|    value_loss           | 8.32      |
---------------------------------------


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 682      |
|    ep_rew_mean          | 420      |
| time/                   |          |
|    fps                  | 45       |
|    iterations           | 16       |
|    time_elapsed         | 5701     |
|    total_timesteps      | 262144   |
| train/                  |          |
|    approx_kl            | 2.463502 |
|    clip_fraction        | 0.505    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.324   |
|    explained_variance   | 0.818    |
|    learning_rate        | 0.0003   |
|    loss                 | 0.547    |
|    n_updates            | 150      |
|    policy_gradient_loss | -0.0149  |
|    value_loss           | 6.43     |
--------------------------------------


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 676      |
|    ep_rew_mean          | 396      |
| time/                   |          |
|    fps                  | 46       |
|    iterations           | 17       |
|    time_elapsed         | 5963     |
|    total_timesteps      | 278528   |
| train/                  |          |
|    approx_kl            | 2.525824 |
|    clip_fraction        | 0.5      |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.316   |
|    explained_variance   | 0.84     |
|    learning_rate        | 0.0003   |
|    loss                 | 0.455    |
|    n_updates            | 160      |
|    policy_gradient_loss | -0.00922 |
|    value_loss           | 6.52     |
--------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 668       |
|    ep_rew_mean          | 384       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 18        |
|    time_elapsed         | 6216      |
|    total_timesteps      | 294912    |
| train/                  |           |
|    approx_kl            | 3.0081537 |
|    clip_fraction        | 0.511     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.336    |
|    explained_variance   | 0.825     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.04      |
|    n_updates            | 170       |
|    policy_gradient_loss | -0.02     |
|    value_loss           | 8.25      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 648       |
|    ep_rew_mean          | 393       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 19        |
|    time_elapsed         | 6574      |
|    total_timesteps      | 311296    |
| train/                  |           |
|    approx_kl            | 2.4309387 |
|    clip_fraction        | 0.471     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.291    |
|    explained_variance   | 0.816     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.697     |
|    n_updates            | 180       |
|    policy_gradient_loss | -0.0147   |
|    value_loss           | 8.29      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 649       |
|    ep_rew_mean          | 390       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 20        |
|    time_elapsed         | 6838      |
|    total_timesteps      | 327680    |
| train/                  |           |
|    approx_kl            | 2.6713357 |
|    clip_fraction        | 0.531     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.344    |
|    explained_variance   | 0.856     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.677     |
|    n_updates            | 190       |
|    policy_gradient_loss | -0.000512 |
|    value_loss           | 9.9       |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 630       |
|    ep_rew_mean          | 450       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 21        |
|    time_elapsed         | 7233      |
|    total_timesteps      | 344064    |
| train/                  |           |
|    approx_kl            | 4.7500677 |
|    clip_fraction        | 0.521     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.327    |
|    explained_variance   | 0.859     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.535     |
|    n_updates            | 200       |
|    policy_gradient_loss | -0.0237   |
|    value_loss           | 9.93      |
---------------------------------------


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 647      |
|    ep_rew_mean          | 472      |
| time/                   |          |
|    fps                  | 47       |
|    iterations           | 22       |
|    time_elapsed         | 7575     |
|    total_timesteps      | 360448   |
| train/                  |          |
|    approx_kl            | 2.444137 |
|    clip_fraction        | 0.473    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.299   |
|    explained_variance   | 0.814    |
|    learning_rate        | 0.0003   |
|    loss                 | 0.694    |
|    n_updates            | 210      |
|    policy_gradient_loss | -0.0218  |
|    value_loss           | 9.96     |
--------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 665       |
|    ep_rew_mean          | 485       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 23        |
|    time_elapsed         | 7965      |
|    total_timesteps      | 376832    |
| train/                  |           |
|    approx_kl            | 3.1731205 |
|    clip_fraction        | 0.521     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.337    |
|    explained_variance   | 0.835     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.57      |
|    n_updates            | 220       |
|    policy_gradient_loss | -0.0137   |
|    value_loss           | 15.7      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 647       |
|    ep_rew_mean          | 480       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 24        |
|    time_elapsed         | 8288      |
|    total_timesteps      | 393216    |
| train/                  |           |
|    approx_kl            | 2.7227664 |
|    clip_fraction        | 0.499     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.327    |
|    explained_variance   | 0.806     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.81      |
|    n_updates            | 230       |
|    policy_gradient_loss | -0.0166   |
|    value_loss           | 11        |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 651       |
|    ep_rew_mean          | 475       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 25        |
|    time_elapsed         | 8536      |
|    total_timesteps      | 409600    |
| train/                  |           |
|    approx_kl            | 2.8046172 |
|    clip_fraction        | 0.52      |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.336    |
|    explained_variance   | 0.816     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.04      |
|    n_updates            | 240       |
|    policy_gradient_loss | -0.0152   |
|    value_loss           | 7.62      |
---------------------------------------


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 663      |
|    ep_rew_mean          | 460      |
| time/                   |          |
|    fps                  | 48       |
|    iterations           | 26       |
|    time_elapsed         | 8818     |
|    total_timesteps      | 425984   |
| train/                  |          |
|    approx_kl            | 2.649378 |
|    clip_fraction        | 0.539    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.359   |
|    explained_variance   | 0.895    |
|    learning_rate        | 0.0003   |
|    loss                 | 0.824    |
|    n_updates            | 250      |
|    policy_gradient_loss | -0.00833 |
|    value_loss           | 6.18     |
--------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 671       |
|    ep_rew_mean          | 433       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 27        |
|    time_elapsed         | 9234      |
|    total_timesteps      | 442368    |
| train/                  |           |
|    approx_kl            | 5.3311834 |
|    clip_fraction        | 0.583     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.394    |
|    explained_variance   | 0.884     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.25      |
|    n_updates            | 260       |
|    policy_gradient_loss | -0.0137   |
|    value_loss           | 8.24      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 676       |
|    ep_rew_mean          | 424       |
| time/                   |           |
|    fps                  | 48        |
|    iterations           | 28        |
|    time_elapsed         | 9507      |
|    total_timesteps      | 458752    |
| train/                  |           |
|    approx_kl            | 2.4461284 |
|    clip_fraction        | 0.532     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.356    |
|    explained_variance   | 0.911     |
|    learning_rate        | 0.0003    |
|    loss                 | 2.32      |
|    n_updates            | 270       |
|    policy_gradient_loss | -0.00199  |
|    value_loss           | 5.68      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 671       |
|    ep_rew_mean          | 397       |
| time/                   |           |
|    fps                  | 48        |
|    iterations           | 29        |
|    time_elapsed         | 9889      |
|    total_timesteps      | 475136    |
| train/                  |           |
|    approx_kl            | 3.4286602 |
|    clip_fraction        | 0.526     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.345    |
|    explained_variance   | 0.898     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.983     |
|    n_updates            | 280       |
|    policy_gradient_loss | -0.0165   |
|    value_loss           | 6.23      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 686       |
|    ep_rew_mean          | 401       |
| time/                   |           |
|    fps                  | 48        |
|    iterations           | 30        |
|    time_elapsed         | 10204     |
|    total_timesteps      | 491520    |
| train/                  |           |
|    approx_kl            | 3.3454647 |
|    clip_fraction        | 0.502     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.318    |
|    explained_variance   | 0.835     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.62      |
|    n_updates            | 290       |
|    policy_gradient_loss | -0.0135   |
|    value_loss           | 11.4      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 667       |
|    ep_rew_mean          | 396       |
| time/                   |           |
|    fps                  | 48        |
|    iterations           | 31        |
|    time_elapsed         | 10513     |
|    total_timesteps      | 507904    |
| train/                  |           |
|    approx_kl            | 2.5643625 |
|    clip_fraction        | 0.512     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.33     |
|    explained_variance   | 0.865     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.36      |
|    n_updates            | 300       |
|    policy_gradient_loss | -0.00562  |
|    value_loss           | 7.58      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 640       |
|    ep_rew_mean          | 411       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 32        |
|    time_elapsed         | 10953     |
|    total_timesteps      | 524288    |
| train/                  |           |
|    approx_kl            | 2.1649806 |
|    clip_fraction        | 0.484     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.316    |
|    explained_variance   | 0.868     |
|    learning_rate        | 0.0003    |
|    loss                 | 3.25      |
|    n_updates            | 310       |
|    policy_gradient_loss | -0.017    |
|    value_loss           | 15.2      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 651       |
|    ep_rew_mean          | 432       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 33        |
|    time_elapsed         | 11403     |
|    total_timesteps      | 540672    |
| train/                  |           |
|    approx_kl            | 3.4589424 |
|    clip_fraction        | 0.491     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.324    |
|    explained_variance   | 0.872     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.648     |
|    n_updates            | 320       |
|    policy_gradient_loss | -0.0175   |
|    value_loss           | 9.46      |
---------------------------------------


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 661      |
|    ep_rew_mean          | 440      |
| time/                   |          |
|    fps                  | 47       |
|    iterations           | 34       |
|    time_elapsed         | 11768    |
|    total_timesteps      | 557056   |
| train/                  |          |
|    approx_kl            | 3.556283 |
|    clip_fraction        | 0.542    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.355   |
|    explained_variance   | 0.826    |
|    learning_rate        | 0.0003   |
|    loss                 | 0.593    |
|    n_updates            | 330      |
|    policy_gradient_loss | 0.0168   |
|    value_loss           | 7.02     |
--------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 628       |
|    ep_rew_mean          | 461       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 35        |
|    time_elapsed         | 12023     |
|    total_timesteps      | 573440    |
| train/                  |           |
|    approx_kl            | 2.2939377 |
|    clip_fraction        | 0.505     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.346    |
|    explained_variance   | 0.859     |
|    learning_rate        | 0.0003    |
|    loss                 | 9.7       |
|    n_updates            | 340       |
|    policy_gradient_loss | -0.0214   |
|    value_loss           | 9.69      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 640       |
|    ep_rew_mean          | 467       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 36        |
|    time_elapsed         | 12419     |
|    total_timesteps      | 589824    |
| train/                  |           |
|    approx_kl            | 1.7868258 |
|    clip_fraction        | 0.465     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.319    |
|    explained_variance   | 0.834     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.05      |
|    n_updates            | 350       |
|    policy_gradient_loss | -0.03     |
|    value_loss           | 9.78      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 675       |
|    ep_rew_mean          | 424       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 37        |
|    time_elapsed         | 12859     |
|    total_timesteps      | 606208    |
| train/                  |           |
|    approx_kl            | 3.2231314 |
|    clip_fraction        | 0.559     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.361    |
|    explained_variance   | 0.833     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.941     |
|    n_updates            | 360       |
|    policy_gradient_loss | -0.0092   |
|    value_loss           | 6.99      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 641       |
|    ep_rew_mean          | 417       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 38        |
|    time_elapsed         | 13117     |
|    total_timesteps      | 622592    |
| train/                  |           |
|    approx_kl            | 3.3135633 |
|    clip_fraction        | 0.527     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.368    |
|    explained_variance   | 0.79      |
|    learning_rate        | 0.0003    |
|    loss                 | 0.988     |
|    n_updates            | 370       |
|    policy_gradient_loss | -0.0122   |
|    value_loss           | 9.07      |
---------------------------------------


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 644      |
|    ep_rew_mean          | 416      |
| time/                   |          |
|    fps                  | 47       |
|    iterations           | 39       |
|    time_elapsed         | 13477    |
|    total_timesteps      | 638976   |
| train/                  |          |
|    approx_kl            | 2.866899 |
|    clip_fraction        | 0.521    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.337   |
|    explained_variance   | 0.855    |
|    learning_rate        | 0.0003   |
|    loss                 | 0.873    |
|    n_updates            | 380      |
|    policy_gradient_loss | -0.0167  |
|    value_loss           | 7.44     |
--------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 662       |
|    ep_rew_mean          | 398       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 40        |
|    time_elapsed         | 13811     |
|    total_timesteps      | 655360    |
| train/                  |           |
|    approx_kl            | 2.9740417 |
|    clip_fraction        | 0.53      |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.336    |
|    explained_variance   | 0.873     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.93      |
|    n_updates            | 390       |
|    policy_gradient_loss | -0.0072   |
|    value_loss           | 7.67      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 669       |
|    ep_rew_mean          | 423       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 41        |
|    time_elapsed         | 14294     |
|    total_timesteps      | 671744    |
| train/                  |           |
|    approx_kl            | 2.2717066 |
|    clip_fraction        | 0.495     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.337    |
|    explained_variance   | 0.847     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.09      |
|    n_updates            | 400       |
|    policy_gradient_loss | -0.0231   |
|    value_loss           | 10.7      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 635       |
|    ep_rew_mean          | 448       |
| time/                   |           |
|    fps                  | 47        |
|    iterations           | 42        |
|    time_elapsed         | 14624     |
|    total_timesteps      | 688128    |
| train/                  |           |
|    approx_kl            | 2.4343662 |
|    clip_fraction        | 0.498     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.333    |
|    explained_variance   | 0.886     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.929     |
|    n_updates            | 410       |
|    policy_gradient_loss | -0.0165   |
|    value_loss           | 9.44      |
---------------------------------------


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 635      |
|    ep_rew_mean          | 470      |
| time/                   |          |
|    fps                  | 47       |
|    iterations           | 43       |
|    time_elapsed         | 14971    |
|    total_timesteps      | 704512   |
| train/                  |          |
|    approx_kl            | 2.558467 |
|    clip_fraction        | 0.514    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.349   |
|    explained_variance   | 0.823    |
|    learning_rate        | 0.0003   |
|    loss                 | 1.31     |
|    n_updates            | 420      |
|    policy_gradient_loss | -0.022   |
|    value_loss           | 8.79     |
--------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 629       |
|    ep_rew_mean          | 485       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 44        |
|    time_elapsed         | 15348     |
|    total_timesteps      | 720896    |
| train/                  |           |
|    approx_kl            | 2.9897091 |
|    clip_fraction        | 0.515     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.332    |
|    explained_variance   | 0.843     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.731     |
|    n_updates            | 430       |
|    policy_gradient_loss | -0.0177   |
|    value_loss           | 8.41      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 644       |
|    ep_rew_mean          | 496       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 45        |
|    time_elapsed         | 15769     |
|    total_timesteps      | 737280    |
| train/                  |           |
|    approx_kl            | 2.7111878 |
|    clip_fraction        | 0.514     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.358    |
|    explained_variance   | 0.841     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.02      |
|    n_updates            | 440       |
|    policy_gradient_loss | -0.0155   |
|    value_loss           | 8.28      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 642       |
|    ep_rew_mean          | 499       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 46        |
|    time_elapsed         | 16241     |
|    total_timesteps      | 753664    |
| train/                  |           |
|    approx_kl            | 3.1169066 |
|    clip_fraction        | 0.536     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.343    |
|    explained_variance   | 0.827     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.26      |
|    n_updates            | 450       |
|    policy_gradient_loss | -0.00376  |
|    value_loss           | 7.94      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 647       |
|    ep_rew_mean          | 472       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 47        |
|    time_elapsed         | 16553     |
|    total_timesteps      | 770048    |
| train/                  |           |
|    approx_kl            | 1.9805951 |
|    clip_fraction        | 0.511     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.354    |
|    explained_variance   | 0.84      |
|    learning_rate        | 0.0003    |
|    loss                 | 1.03      |
|    n_updates            | 460       |
|    policy_gradient_loss | -0.0151   |
|    value_loss           | 9.9       |
---------------------------------------


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 644      |
|    ep_rew_mean          | 466      |
| time/                   |          |
|    fps                  | 46       |
|    iterations           | 48       |
|    time_elapsed         | 16861    |
|    total_timesteps      | 786432   |
| train/                  |          |
|    approx_kl            | 2.356236 |
|    clip_fraction        | 0.529    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.356   |
|    explained_variance   | 0.859    |
|    learning_rate        | 0.0003   |
|    loss                 | 0.947    |
|    n_updates            | 470      |
|    policy_gradient_loss | -0.0182  |
|    value_loss           | 7.42     |
--------------------------------------


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 642      |
|    ep_rew_mean          | 462      |
| time/                   |          |
|    fps                  | 46       |
|    iterations           | 49       |
|    time_elapsed         | 17312    |
|    total_timesteps      | 802816   |
| train/                  |          |
|    approx_kl            | 2.736772 |
|    clip_fraction        | 0.522    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.344   |
|    explained_variance   | 0.864    |
|    learning_rate        | 0.0003   |
|    loss                 | 0.597    |
|    n_updates            | 480      |
|    policy_gradient_loss | -0.0203  |
|    value_loss           | 7.94     |
--------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 632       |
|    ep_rew_mean          | 461       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 50        |
|    time_elapsed         | 17580     |
|    total_timesteps      | 819200    |
| train/                  |           |
|    approx_kl            | 2.0802338 |
|    clip_fraction        | 0.514     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.351    |
|    explained_variance   | 0.881     |
|    learning_rate        | 0.0003    |
|    loss                 | 1         |
|    n_updates            | 490       |
|    policy_gradient_loss | -0.0191   |
|    value_loss           | 9.2       |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 625       |
|    ep_rew_mean          | 459       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 51        |
|    time_elapsed         | 17970     |
|    total_timesteps      | 835584    |
| train/                  |           |
|    approx_kl            | 2.9651086 |
|    clip_fraction        | 0.543     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.357    |
|    explained_variance   | 0.861     |
|    learning_rate        | 0.0003    |
|    loss                 | 6.58      |
|    n_updates            | 500       |
|    policy_gradient_loss | 0.0321    |
|    value_loss           | 9.68      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 631       |
|    ep_rew_mean          | 444       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 52        |
|    time_elapsed         | 18315     |
|    total_timesteps      | 851968    |
| train/                  |           |
|    approx_kl            | 2.1710155 |
|    clip_fraction        | 0.497     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.345    |
|    explained_variance   | 0.835     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.66      |
|    n_updates            | 510       |
|    policy_gradient_loss | -0.0277   |
|    value_loss           | 11.2      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 636       |
|    ep_rew_mean          | 457       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 53        |
|    time_elapsed         | 18575     |
|    total_timesteps      | 868352    |
| train/                  |           |
|    approx_kl            | 3.0851703 |
|    clip_fraction        | 0.554     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.357    |
|    explained_variance   | 0.907     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.735     |
|    n_updates            | 520       |
|    policy_gradient_loss | -0.0109   |
|    value_loss           | 7.31      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 662       |
|    ep_rew_mean          | 443       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 54        |
|    time_elapsed         | 18911     |
|    total_timesteps      | 884736    |
| train/                  |           |
|    approx_kl            | 3.9259624 |
|    clip_fraction        | 0.524     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.373    |
|    explained_variance   | 0.859     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.2       |
|    n_updates            | 530       |
|    policy_gradient_loss | -0.0265   |
|    value_loss           | 13.1      |
---------------------------------------


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 641      |
|    ep_rew_mean          | 461      |
| time/                   |          |
|    fps                  | 46       |
|    iterations           | 55       |
|    time_elapsed         | 19283    |
|    total_timesteps      | 901120   |
| train/                  |          |
|    approx_kl            | 2.754097 |
|    clip_fraction        | 0.526    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.353   |
|    explained_variance   | 0.873    |
|    learning_rate        | 0.0003   |
|    loss                 | 1.28     |
|    n_updates            | 540      |
|    policy_gradient_loss | -0.0107  |
|    value_loss           | 7.06     |
--------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 663       |
|    ep_rew_mean          | 453       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 56        |
|    time_elapsed         | 19751     |
|    total_timesteps      | 917504    |
| train/                  |           |
|    approx_kl            | 2.3594325 |
|    clip_fraction        | 0.513     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.352    |
|    explained_variance   | 0.893     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.794     |
|    n_updates            | 550       |
|    policy_gradient_loss | -0.0163   |
|    value_loss           | 7.79      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 656       |
|    ep_rew_mean          | 469       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 57        |
|    time_elapsed         | 20172     |
|    total_timesteps      | 933888    |
| train/                  |           |
|    approx_kl            | 2.1566422 |
|    clip_fraction        | 0.48      |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.331    |
|    explained_variance   | 0.879     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.591     |
|    n_updates            | 560       |
|    policy_gradient_loss | -0.0187   |
|    value_loss           | 7.32      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 665       |
|    ep_rew_mean          | 479       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 58        |
|    time_elapsed         | 20476     |
|    total_timesteps      | 950272    |
| train/                  |           |
|    approx_kl            | 2.3556702 |
|    clip_fraction        | 0.497     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.334    |
|    explained_variance   | 0.881     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.864     |
|    n_updates            | 570       |
|    policy_gradient_loss | -0.0207   |
|    value_loss           | 7.14      |
---------------------------------------


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 658      |
|    ep_rew_mean          | 478      |
| time/                   |          |
|    fps                  | 46       |
|    iterations           | 59       |
|    time_elapsed         | 20904    |
|    total_timesteps      | 966656   |
| train/                  |          |
|    approx_kl            | 2.28696  |
|    clip_fraction        | 0.496    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.342   |
|    explained_variance   | 0.864    |
|    learning_rate        | 0.0003   |
|    loss                 | 1.16     |
|    n_updates            | 580      |
|    policy_gradient_loss | -0.0221  |
|    value_loss           | 9.17     |
--------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 668       |
|    ep_rew_mean          | 476       |
| time/                   |           |
|    fps                  | 46        |
|    iterations           | 60        |
|    time_elapsed         | 21305     |
|    total_timesteps      | 983040    |
| train/                  |           |
|    approx_kl            | 2.2894773 |
|    clip_fraction        | 0.502     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.351    |
|    explained_variance   | 0.857     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.485     |
|    n_updates            | 590       |
|    policy_gradient_loss | -0.0169   |
|    value_loss           | 9.16      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 660       |
|    ep_rew_mean          | 519       |
| time/                   |           |
|    fps                  | 45        |
|    iterations           | 61        |
|    time_elapsed         | 21760     |
|    total_timesteps      | 999424    |
| train/                  |           |
|    approx_kl            | 2.6353445 |
|    clip_fraction        | 0.493     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.322    |
|    explained_variance   | 0.876     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.07      |
|    n_updates            | 600       |
|    policy_gradient_loss | -0.0244   |
|    value_loss           | 8.86      |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 651       |
|    ep_rew_mean          | 529       |
| time/                   |           |
|    fps                  | 45        |
|    iterations           | 62        |
|    time_elapsed         | 22146     |
|    total_timesteps      | 1015808   |
| train/                  |           |
|    approx_kl            | 1.8711927 |
|    clip_fraction        | 0.467     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.335    |
|    explained_variance   | 0.861     |
|    learning_rate        | 0.0003    |
|    loss                 | 1         |
|    n_updates            | 610       |
|    policy_gradient_loss | -0.0286   |
|    value_loss           | 20.6      |
---------------------------------------


In [3]:
model.save(f'ppti_ppo4_{n_steps}_batch128_clip01_ent0001')
f'ppti_ppo4_{n_steps}_batch128_clip01_ent0001'

'ppti_ppo4_2048_batch128_clip01_ent0001'