In [None]:
import datetime
import pandas as pd
import numpy as np

from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.logger import configure
from finrl.meta.data_processor import DataProcessor

from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
from pprint import pprint

import sys
sys.path.append("../FinRL")

import itertools

import wandb
wandb.login(key="fd2d0476c22a97c32eef81b22402a4c73ab32831")


from finrl import config
from finrl import config_tickers
import os
#os.environ["CUDA_VISIBLE_DEVICES"]="0"

from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
    INDICATORS,
    TRAIN_START_DATE,
    TRAIN_END_DATE,
    TEST_START_DATE,
    TEST_END_DATE,
    TRADE_START_DATE,
    TRADE_END_DATE,
)
check_and_make_directories([DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR])


#超参数优化

sweep_config = {
    'method': 'random'
    }

import math
parameters_dict = {
    'n_steps':{
        'values':[2048, 256, 128,512,1024,4098]
        },
    'total_timesteps':{
        'values':[50000, 100000, 200000,300000,500000]
        },
    'ent_coef': {
                'distribution': 'uniform',
        'min': 0,
        'max': 0.1
        },
    'learning_rate': {
        # a flat distribution between 0 and 0.1
        'distribution': 'uniform',
        'min': 0,
        'max': 0.1
      },
    'batch_size': {
        # integers between 32 and 256
        # with evenly-distributed logarithms 
        'distribution': 'q_log_uniform',
        'q': 1,
        'min': math.log(32),
        'max': math.log(1024),
      }
    }

sweep_config['parameters'] = parameters_dict








TRAIN_START_DATE = '2008-01-01'
TRAIN_END_DATE = '2020-03-01'
TRADE_START_DATE = '2020-03-01'
TRADE_END_DATE = '2022-10-10'


res = pd.read_csv('DOW_8s.csv')   #读取整理好后数据，根据情况进行时间格式转换
#INDICATORS.pop()
INDICATORS
INDICATORS.append('sentiment_score')


#时间格式转换
res['date'] = pd.to_datetime(res['date'],errors='coerce')     #str转date
res['date'] = res['date'].dt.strftime('%Y-%m-%d')   #date转str


train = data_split(res, TRAIN_START_DATE,TRAIN_END_DATE)
trade = data_split(res, TRADE_START_DATE,TRADE_END_DATE)


#建议环境配置
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 20,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}


e_train_gym = StockTradingEnv(df = train, **env_kwargs)


#设置train环境
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))
sweep_id = wandb.sweep(sweep_config, project="test-project")


def train():
    wandb.init()
    agent = DRLAgent(env = env_train)
    PPO_PARAMS = {
            "ent_coef": wandb.config.ent_coef,
            "n_steps": wandb.config.n_steps,
            "learning_rate": wandb.config.learning_rate,
            "batch_size": wandb.config.batch_size,}


    model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)




    trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=wandb.config.total_timesteps)




    #Set turbulence threshold
    data_risk_indicator = res[(res.date<TRAIN_END_DATE) & (res.date>=TRAIN_START_DATE)]
    insample_risk_indicator = data_risk_indicator.drop_duplicates(subset=['date'])
    insample_risk_indicator.vix.describe() #VIX
    insample_risk_indicator.vix.quantile(0.996)
    insample_risk_indicator.turbulence.describe() #turbulence
    insample_risk_indicator.turbulence.quantile(0.996)



    e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70,risk_indicator_col='vix', **env_kwargs)
    trained_moedl = trained_ppo
    df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=trained_moedl, 
    environment = e_trade_gym)
    #df_account_value.shape #资产格式
    #print(df_account_value.tail()) #资产状态
    #df_actions.tail(200)    #交易操作



    print("==============Get Backtest Results===========")
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

    perf_stats_all = backtest_stats(account_value=df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)



    wandb.log({"Backtest Results": perf_stats_all,
          "action": df_actions,})
    
    
wandb.agent(sweep_id, train, count=50)

wandb.finish()

  'Module "zipline.assets" not found; mutltipliers will not be applied' +
[34m[1mwandb[0m: Currently logged in as: [33mzhilu[0m ([33mfinsrl[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/zhangzhilu/.netrc


             date   tic        open        high         low       close  \
0      2008-01-02  AAPL    7.116786    7.152143    6.876786    5.931611   
1      2008-01-02   CAT   72.559998   72.669998   70.050003   46.539093   
2      2008-01-02   DIS   32.320000   32.630001   31.690001   27.012506   
3      2008-01-02    GS  214.800003  215.050003  206.600006  166.547913   
4      2008-01-02  INTC   26.280001   26.340000   24.950001   16.023220   
...           ...   ...         ...         ...         ...         ...   
24483  2020-02-28    GS  199.089996  202.250000  194.850006  190.099899   
24484  2020-02-28  INTC   54.299999   56.380001   53.599998   51.038109   
24485  2020-02-28   JPM  117.519997  118.489998  112.660004  106.632568   
24486  2020-02-28   MMM  154.089996  156.720001  146.000000  136.575607   
24487  2020-02-28  MSFT  152.410004  163.710007  152.000000  158.378555   

             volume  day      macd     boll_ub     boll_lb      rsi_30  \
0      1.079179e+09  2.0 



Create sweep with ID: wjkvqlsd
Sweep URL: https://wandb.ai/finsrl/test-project/sweeps/wjkvqlsd


[34m[1mwandb[0m: Agent Starting Run: ufb9b2fs with config:
[34m[1mwandb[0m: 	batch_size: 556
[34m[1mwandb[0m: 	ent_coef: 0.010850762418435711
[34m[1mwandb[0m: 	learning_rate: 0.04205927886524091
[34m[1mwandb[0m: 	n_steps: 128
[34m[1mwandb[0m: 	total_timesteps: 300000


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016753713199999964, max=1.0…

{'ent_coef': 0.010850762418435711, 'n_steps': 128, 'learning_rate': 0.04205927886524091, 'batch_size': 556}
Using cpu device


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=128 and n_envs=1)
  f"You have specified a mini-batch size of {batch_size},"


------------------------------------
| time/              |             |
|    fps             | 85          |
|    iterations      | 1           |
|    time_elapsed    | 1           |
|    total_timesteps | 128         |
| train/             |             |
|    reward          | -0.13082159 |
------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 76         |
|    iterations           | 2          |
|    time_elapsed         | 3          |
|    total_timesteps      | 256        |
| train/                  |            |
|    approx_kl            | 3.4071157  |
|    clip_fraction        | 0.848      |
|    clip_range           | 0.2        |
|    entropy_loss         | -11.9      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 0.364      |
|    n_updates            | 10         |
|    policy_gradient_loss | 0.19       |
|    reward

---------------------------------------
| time/                   |           |
|    fps                  | 117       |
|    iterations           | 12        |
|    time_elapsed         | 13        |
|    total_timesteps      | 1536      |
| train/                  |           |
|    approx_kl            | 0.3106264 |
|    clip_fraction        | 0.759     |
|    clip_range           | 0.2       |
|    entropy_loss         | -17.7     |
|    explained_variance   | 3.22e-06  |
|    learning_rate        | 0.0421    |
|    loss                 | 3.75      |
|    n_updates            | 110       |
|    policy_gradient_loss | 0.224     |
|    reward               | 0.743117  |
|    std                  | 2.55      |
|    value_loss           | 8.55      |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 120        |
|    iterations           | 13         |
|    time_elapsed         | 13      

----------------------------------------
| time/                   |            |
|    fps                  | 132        |
|    iterations           | 22         |
|    time_elapsed         | 21         |
|    total_timesteps      | 2816       |
| train/                  |            |
|    approx_kl            | 0.06447627 |
|    clip_fraction        | 0.499      |
|    clip_range           | 0.2        |
|    entropy_loss         | -18.8      |
|    explained_variance   | -3.58e-07  |
|    learning_rate        | 0.0421     |
|    loss                 | 50         |
|    n_updates            | 210        |
|    policy_gradient_loss | 0.0159     |
|    reward               | -1.0285001 |
|    std                  | 2.87       |
|    value_loss           | 109        |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 132        |
|    iterations           | 23         |
|    time_elapse

----------------------------------------
| time/                   |            |
|    fps                  | 124        |
|    iterations           | 32         |
|    time_elapsed         | 32         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.35250506 |
|    clip_fraction        | 0.59       |
|    clip_range           | 0.2        |
|    entropy_loss         | -21.5      |
|    explained_variance   | -2.23e-05  |
|    learning_rate        | 0.0421     |
|    loss                 | 6.78       |
|    n_updates            | 310        |
|    policy_gradient_loss | 0.0339     |
|    reward               | 0.13382849 |
|    std                  | 4.07       |
|    value_loss           | 15.4       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 125        |
|    iterations           | 33         |
|    time_elapse

----------------------------------------
| time/                   |            |
|    fps                  | 123        |
|    iterations           | 42         |
|    time_elapsed         | 43         |
|    total_timesteps      | 5376       |
| train/                  |            |
|    approx_kl            | 0.26983392 |
|    clip_fraction        | 0.514      |
|    clip_range           | 0.2        |
|    entropy_loss         | -23.6      |
|    explained_variance   | -1.19e-07  |
|    learning_rate        | 0.0421     |
|    loss                 | 9.95       |
|    n_updates            | 410        |
|    policy_gradient_loss | -0.0246    |
|    reward               | -0.5999761 |
|    std                  | 5.75       |
|    value_loss           | 22.1       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 124         |
|    iterations           | 43          |
|    time_el

----------------------------------------
| time/                   |            |
|    fps                  | 128        |
|    iterations           | 52         |
|    time_elapsed         | 51         |
|    total_timesteps      | 6656       |
| train/                  |            |
|    approx_kl            | 0.31223828 |
|    clip_fraction        | 0.431      |
|    clip_range           | 0.2        |
|    entropy_loss         | -25.6      |
|    explained_variance   | 2.38e-07   |
|    learning_rate        | 0.0421     |
|    loss                 | 5          |
|    n_updates            | 510        |
|    policy_gradient_loss | 0.0314     |
|    reward               | 1.6524693  |
|    std                  | 7.75       |
|    value_loss           | 12.3       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 128        |
|    iterations           | 53         |
|    time_elapse

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 62          |
|    time_elapsed         | 60          |
|    total_timesteps      | 7936        |
| train/                  |             |
|    approx_kl            | 0.028147798 |
|    clip_fraction        | 0.184       |
|    clip_range           | 0.2         |
|    entropy_loss         | -27.7       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 5.29        |
|    n_updates            | 610         |
|    policy_gradient_loss | -0.0234     |
|    reward               | 3.634245    |
|    std                  | 9.89        |
|    value_loss           | 11          |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 63    

-----------------------------------------
| time/                   |             |
|    fps                  | 128         |
|    iterations           | 72          |
|    time_elapsed         | 71          |
|    total_timesteps      | 9216        |
| train/                  |             |
|    approx_kl            | 0.024087131 |
|    clip_fraction        | 0.191       |
|    clip_range           | 0.2         |
|    entropy_loss         | -27.8       |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 0.0421      |
|    loss                 | 49.3        |
|    n_updates            | 710         |
|    policy_gradient_loss | -0.000629   |
|    reward               | 0.032276545 |
|    std                  | 10.6        |
|    value_loss           | 100         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 128          |
|    iterations           | 73 

-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 82          |
|    time_elapsed         | 79          |
|    total_timesteps      | 10496       |
| train/                  |             |
|    approx_kl            | 0.027494643 |
|    clip_fraction        | 0.284       |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.3       |
|    explained_variance   | -0.00446    |
|    learning_rate        | 0.0421      |
|    loss                 | 18.3        |
|    n_updates            | 810         |
|    policy_gradient_loss | -0.0197     |
|    reward               | -1.7421726  |
|    std                  | 10.4        |
|    value_loss           | 37.4        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 132        |
|    iterations           | 83       

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 92          |
|    time_elapsed         | 87          |
|    total_timesteps      | 11776       |
| train/                  |             |
|    approx_kl            | 0.048427526 |
|    clip_fraction        | 0.227       |
|    clip_range           | 0.2         |
|    entropy_loss         | -29.6       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 7.78        |
|    n_updates            | 910         |
|    policy_gradient_loss | -0.00633    |
|    reward               | -2.8049057  |
|    std                  | 12.8        |
|    value_loss           | 16.4        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 93    

----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 102        |
|    time_elapsed         | 97         |
|    total_timesteps      | 13056      |
| train/                  |            |
|    approx_kl            | 0.17360237 |
|    clip_fraction        | 0.54       |
|    clip_range           | 0.2        |
|    entropy_loss         | -29.9      |
|    explained_variance   | -8.19e-05  |
|    learning_rate        | 0.0421     |
|    loss                 | 12.9       |
|    n_updates            | 1010       |
|    policy_gradient_loss | -0.00652   |
|    reward               | 0.06070407 |
|    std                  | 13.5       |
|    value_loss           | 26.6       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 103         |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 112         |
|    time_elapsed         | 105         |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 2.4808512   |
|    clip_fraction        | 0.605       |
|    clip_range           | 0.2         |
|    entropy_loss         | -31.7       |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0421      |
|    loss                 | 30.8        |
|    n_updates            | 1110        |
|    policy_gradient_loss | 0.113       |
|    reward               | -0.81993246 |
|    std                  | 15.3        |
|    value_loss           | 63.2        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 136        |
|    iterations           | 113      

----------------------------------------
| time/                   |            |
|    fps                  | 136        |
|    iterations           | 122        |
|    time_elapsed         | 114        |
|    total_timesteps      | 15616      |
| train/                  |            |
|    approx_kl            | 0.0658074  |
|    clip_fraction        | 0.33       |
|    clip_range           | 0.2        |
|    entropy_loss         | -31.6      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 3.59       |
|    n_updates            | 1210       |
|    policy_gradient_loss | -0.0217    |
|    reward               | -1.8054439 |
|    std                  | 16.5       |
|    value_loss           | 10.3       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 136        |
|    iterations           | 123        |
|    time_elapse

----------------------------------------
| time/                   |            |
|    fps                  | 131        |
|    iterations           | 132        |
|    time_elapsed         | 128        |
|    total_timesteps      | 16896      |
| train/                  |            |
|    approx_kl            | 0.05070085 |
|    clip_fraction        | 0.316      |
|    clip_range           | 0.2        |
|    entropy_loss         | -32.4      |
|    explained_variance   | 1.19e-07   |
|    learning_rate        | 0.0421     |
|    loss                 | 8.68       |
|    n_updates            | 1310       |
|    policy_gradient_loss | -0.0201    |
|    reward               | 1.5465604  |
|    std                  | 17.4       |
|    value_loss           | 17.9       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 131        |
|    iterations           | 133        |
|    time_elapse

-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 142         |
|    time_elapsed         | 136         |
|    total_timesteps      | 18176       |
| train/                  |             |
|    approx_kl            | 0.016741196 |
|    clip_fraction        | 0.102       |
|    clip_range           | 0.2         |
|    entropy_loss         | -33.2       |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0421      |
|    loss                 | 33.9        |
|    n_updates            | 1410        |
|    policy_gradient_loss | -0.00978    |
|    reward               | -2.6570573  |
|    std                  | 17.6        |
|    value_loss           | 71.1        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 143   

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 152         |
|    time_elapsed         | 143         |
|    total_timesteps      | 19456       |
| train/                  |             |
|    approx_kl            | 0.020018345 |
|    clip_fraction        | 0.145       |
|    clip_range           | 0.2         |
|    entropy_loss         | -32.5       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 10.8        |
|    n_updates            | 1510        |
|    policy_gradient_loss | 0.00327     |
|    reward               | -0.2839096  |
|    std                  | 17.4        |
|    value_loss           | 38.1        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 136         |
|    iterations           | 153   

-----------------------------------------
| time/                   |             |
|    fps                  | 138         |
|    iterations           | 162         |
|    time_elapsed         | 149         |
|    total_timesteps      | 20736       |
| train/                  |             |
|    approx_kl            | 0.027224248 |
|    clip_fraction        | 0.216       |
|    clip_range           | 0.2         |
|    entropy_loss         | -34         |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 22.5        |
|    n_updates            | 1610        |
|    policy_gradient_loss | -0.011      |
|    reward               | 2.529803    |
|    std                  | 23.5        |
|    value_loss           | 49.6        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 139         |
|    iterations           | 163   

-----------------------------------------
| time/                   |             |
|    fps                  | 138         |
|    iterations           | 172         |
|    time_elapsed         | 159         |
|    total_timesteps      | 22016       |
| train/                  |             |
|    approx_kl            | 0.045297347 |
|    clip_fraction        | 0.177       |
|    clip_range           | 0.2         |
|    entropy_loss         | -32.9       |
|    explained_variance   | -2.38e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | 1.96        |
|    n_updates            | 1710        |
|    policy_gradient_loss | -0.00601    |
|    reward               | -0.67733425 |
|    std                  | 25.9        |
|    value_loss           | 5.39        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 137         |
|    iterations           | 173   

----------------------------------------
| time/                   |            |
|    fps                  | 138        |
|    iterations           | 182        |
|    time_elapsed         | 168        |
|    total_timesteps      | 23296      |
| train/                  |            |
|    approx_kl            | 0.07260006 |
|    clip_fraction        | 0.462      |
|    clip_range           | 0.2        |
|    entropy_loss         | -33.4      |
|    explained_variance   | -1.19e-07  |
|    learning_rate        | 0.0421     |
|    loss                 | 6.89       |
|    n_updates            | 1810       |
|    policy_gradient_loss | -0.000448  |
|    reward               | 2.1586573  |
|    std                  | 31.9       |
|    value_loss           | 14.5       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 138        |
|    iterations           | 183        |
|    time_elapse

----------------------------------------
| time/                   |            |
|    fps                  | 138        |
|    iterations           | 192        |
|    time_elapsed         | 176        |
|    total_timesteps      | 24576      |
| train/                  |            |
|    approx_kl            | 0.03768868 |
|    clip_fraction        | 0.232      |
|    clip_range           | 0.2        |
|    entropy_loss         | -32.6      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 77         |
|    n_updates            | 1910       |
|    policy_gradient_loss | 0.00139    |
|    reward               | 0.03774665 |
|    std                  | 25.3       |
|    value_loss           | 188        |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 139         |
|    iterations           | 193         |
|    time_el

----------------------------------------
| time/                   |            |
|    fps                  | 139        |
|    iterations           | 202        |
|    time_elapsed         | 185        |
|    total_timesteps      | 25856      |
| train/                  |            |
|    approx_kl            | 0.26687247 |
|    clip_fraction        | 0.575      |
|    clip_range           | 0.2        |
|    entropy_loss         | -34.2      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 5.92       |
|    n_updates            | 2010       |
|    policy_gradient_loss | -0.0193    |
|    reward               | -0.8734136 |
|    std                  | 25.3       |
|    value_loss           | 12.6       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 139        |
|    iterations           | 203        |
|    time_elapse

------------------------------------------
| time/                   |              |
|    fps                  | 140          |
|    iterations           | 212          |
|    time_elapsed         | 193          |
|    total_timesteps      | 27136        |
| train/                  |              |
|    approx_kl            | 0.0071809124 |
|    clip_fraction        | 0.0273       |
|    clip_range           | 0.2          |
|    entropy_loss         | -35.6        |
|    explained_variance   | -1.67e-05    |
|    learning_rate        | 0.0421       |
|    loss                 | 22.9         |
|    n_updates            | 2110         |
|    policy_gradient_loss | -0.00137     |
|    reward               | 0.25398934   |
|    std                  | 35.8         |
|    value_loss           | 109          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 140          |
|    iterat

----------------------------------------
| time/                   |            |
|    fps                  | 141        |
|    iterations           | 222        |
|    time_elapsed         | 201        |
|    total_timesteps      | 28416      |
| train/                  |            |
|    approx_kl            | 0.05312089 |
|    clip_fraction        | 0.246      |
|    clip_range           | 0.2        |
|    entropy_loss         | -36.4      |
|    explained_variance   | -2.09e-05  |
|    learning_rate        | 0.0421     |
|    loss                 | 4.19       |
|    n_updates            | 2210       |
|    policy_gradient_loss | -0.018     |
|    reward               | -2.673054  |
|    std                  | 41.4       |
|    value_loss           | 8.87       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 141         |
|    iterations           | 223         |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 141         |
|    iterations           | 232         |
|    time_elapsed         | 209         |
|    total_timesteps      | 29696       |
| train/                  |             |
|    approx_kl            | 0.038873024 |
|    clip_fraction        | 0.206       |
|    clip_range           | 0.2         |
|    entropy_loss         | -37.1       |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0421      |
|    loss                 | 12.1        |
|    n_updates            | 2310        |
|    policy_gradient_loss | -0.00436    |
|    reward               | -0.18594465 |
|    std                  | 49.9        |
|    value_loss           | 25.4        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 141         |
|    iterations           | 233   

-----------------------------------------
| time/                   |             |
|    fps                  | 142         |
|    iterations           | 242         |
|    time_elapsed         | 218         |
|    total_timesteps      | 30976       |
| train/                  |             |
|    approx_kl            | 0.07757908  |
|    clip_fraction        | 0.229       |
|    clip_range           | 0.2         |
|    entropy_loss         | -37.5       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 1.46        |
|    n_updates            | 2410        |
|    policy_gradient_loss | -0.0228     |
|    reward               | -0.26205534 |
|    std                  | 47          |
|    value_loss           | 5.23        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 142         |
|    iterations           | 243   

-----------------------------------------
| time/                   |             |
|    fps                  | 142         |
|    iterations           | 252         |
|    time_elapsed         | 226         |
|    total_timesteps      | 32256       |
| train/                  |             |
|    approx_kl            | 0.020573426 |
|    clip_fraction        | 0.103       |
|    clip_range           | 0.2         |
|    entropy_loss         | -38.7       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 3.86        |
|    n_updates            | 2510        |
|    policy_gradient_loss | -0.00762    |
|    reward               | -2.4170694  |
|    std                  | 49.9        |
|    value_loss           | 9.49        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 142        |
|    iterations           | 253      

-----------------------------------------
| time/                   |             |
|    fps                  | 143         |
|    iterations           | 262         |
|    time_elapsed         | 233         |
|    total_timesteps      | 33536       |
| train/                  |             |
|    approx_kl            | 0.014536001 |
|    clip_fraction        | 0.0547      |
|    clip_range           | 0.2         |
|    entropy_loss         | -39.4       |
|    explained_variance   | -0.00559    |
|    learning_rate        | 0.0421      |
|    loss                 | 99.6        |
|    n_updates            | 2610        |
|    policy_gradient_loss | -0.0185     |
|    reward               | 2.9379585   |
|    std                  | 59.5        |
|    value_loss           | 197         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 143         |
|    iterations           | 263   

-----------------------------------------
| time/                   |             |
|    fps                  | 142         |
|    iterations           | 272         |
|    time_elapsed         | 244         |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.042479932 |
|    clip_fraction        | 0.275       |
|    clip_range           | 0.2         |
|    entropy_loss         | -40.7       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 10.8        |
|    n_updates            | 2710        |
|    policy_gradient_loss | -0.00929    |
|    reward               | 0.9636458   |
|    std                  | 75.5        |
|    value_loss           | 22.4        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 142         |
|    iterations           | 273   

-----------------------------------------
| time/                   |             |
|    fps                  | 141         |
|    iterations           | 282         |
|    time_elapsed         | 254         |
|    total_timesteps      | 36096       |
| train/                  |             |
|    approx_kl            | 0.014309237 |
|    clip_fraction        | 0.0961      |
|    clip_range           | 0.2         |
|    entropy_loss         | -42.3       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 6.4         |
|    n_updates            | 2810        |
|    policy_gradient_loss | -0.000596   |
|    reward               | 0.35080633  |
|    std                  | 107         |
|    value_loss           | 14.5        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 141         |
|    iterations           | 283   

----------------------------------------
| time/                   |            |
|    fps                  | 142        |
|    iterations           | 292        |
|    time_elapsed         | 262        |
|    total_timesteps      | 37376      |
| train/                  |            |
|    approx_kl            | 0.09055373 |
|    clip_fraction        | 0.379      |
|    clip_range           | 0.2        |
|    entropy_loss         | -43.2      |
|    explained_variance   | 5.96e-08   |
|    learning_rate        | 0.0421     |
|    loss                 | -0.0842    |
|    n_updates            | 2910       |
|    policy_gradient_loss | -0.025     |
|    reward               | 0.9964561  |
|    std                  | 118        |
|    value_loss           | 1.14       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 142        |
|    iterations           | 293        |
|    time_elapse

-----------------------------------------
| time/                   |             |
|    fps                  | 141         |
|    iterations           | 302         |
|    time_elapsed         | 272         |
|    total_timesteps      | 38656       |
| train/                  |             |
|    approx_kl            | 0.117676795 |
|    clip_fraction        | 0.383       |
|    clip_range           | 0.2         |
|    entropy_loss         | -44.9       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 17.1        |
|    n_updates            | 3010        |
|    policy_gradient_loss | -0.0319     |
|    reward               | -6.941301   |
|    std                  | 158         |
|    value_loss           | 34.8        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 141         |
|    iterations           | 303   

-----------------------------------------
| time/                   |             |
|    fps                  | 142         |
|    iterations           | 312         |
|    time_elapsed         | 281         |
|    total_timesteps      | 39936       |
| train/                  |             |
|    approx_kl            | 0.01133213  |
|    clip_fraction        | 0.0305      |
|    clip_range           | 0.2         |
|    entropy_loss         | -45.4       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 189         |
|    n_updates            | 3110        |
|    policy_gradient_loss | -0.0102     |
|    reward               | -0.35023224 |
|    std                  | 155         |
|    value_loss           | 387         |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 141        |
|    iterations           | 313      

-----------------------------------------
| time/                   |             |
|    fps                  | 140         |
|    iterations           | 322         |
|    time_elapsed         | 293         |
|    total_timesteps      | 41216       |
| train/                  |             |
|    approx_kl            | 0.021522144 |
|    clip_fraction        | 0.311       |
|    clip_range           | 0.2         |
|    entropy_loss         | -44.5       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | 8.68        |
|    n_updates            | 3210        |
|    policy_gradient_loss | -0.0177     |
|    reward               | -1.7523512  |
|    std                  | 192         |
|    value_loss           | 18.4        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 140        |
|    iterations           | 323      

----------------------------------------
| time/                   |            |
|    fps                  | 140        |
|    iterations           | 332        |
|    time_elapsed         | 301        |
|    total_timesteps      | 42496      |
| train/                  |            |
|    approx_kl            | 0.04422719 |
|    clip_fraction        | 0.508      |
|    clip_range           | 0.2        |
|    entropy_loss         | -45        |
|    explained_variance   | -1.19e-07  |
|    learning_rate        | 0.0421     |
|    loss                 | 50         |
|    n_updates            | 3310       |
|    policy_gradient_loss | 0.0162     |
|    reward               | 2.6735463  |
|    std                  | 217        |
|    value_loss           | 101        |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 141        |
|    iterations           | 333        |
|    time_elapse

----------------------------------------
| time/                   |            |
|    fps                  | 142        |
|    iterations           | 342        |
|    time_elapsed         | 308        |
|    total_timesteps      | 43776      |
| train/                  |            |
|    approx_kl            | 0.06955702 |
|    clip_fraction        | 0.485      |
|    clip_range           | 0.2        |
|    entropy_loss         | -46        |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 3.12       |
|    n_updates            | 3410       |
|    policy_gradient_loss | 0.0254     |
|    reward               | 2.879355   |
|    std                  | 199        |
|    value_loss           | 7.55       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 142        |
|    iterations           | 343        |
|    time_elapse

-----------------------------------------
| time/                   |             |
|    fps                  | 141         |
|    iterations           | 352         |
|    time_elapsed         | 318         |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.020181566 |
|    clip_fraction        | 0.155       |
|    clip_range           | 0.2         |
|    entropy_loss         | -46.5       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 42.3        |
|    n_updates            | 3510        |
|    policy_gradient_loss | -0.00669    |
|    reward               | -0.08373779 |
|    std                  | 201         |
|    value_loss           | 85          |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 141        |
|    iterations           | 353      

-----------------------------------------
| time/                   |             |
|    fps                  | 141         |
|    iterations           | 362         |
|    time_elapsed         | 327         |
|    total_timesteps      | 46336       |
| train/                  |             |
|    approx_kl            | 0.030780725 |
|    clip_fraction        | 0.0977      |
|    clip_range           | 0.2         |
|    entropy_loss         | -46.5       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | 2.47        |
|    n_updates            | 3610        |
|    policy_gradient_loss | -0.0136     |
|    reward               | -0.29638553 |
|    std                  | 226         |
|    value_loss           | 6.41        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 141         |
|    iterations           | 363   

----------------------------------------
| time/                   |            |
|    fps                  | 141        |
|    iterations           | 372        |
|    time_elapsed         | 335        |
|    total_timesteps      | 47616      |
| train/                  |            |
|    approx_kl            | 0.12338145 |
|    clip_fraction        | 0.386      |
|    clip_range           | 0.2        |
|    entropy_loss         | -47.7      |
|    explained_variance   | 1.79e-07   |
|    learning_rate        | 0.0421     |
|    loss                 | 9.04       |
|    n_updates            | 3710       |
|    policy_gradient_loss | 0.0363     |
|    reward               | 2.3730922  |
|    std                  | 284        |
|    value_loss           | 18         |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 142        |
|    iterations           | 373        |
|    time_elapse

---------------------------------------
| time/                   |           |
|    fps                  | 141       |
|    iterations           | 382       |
|    time_elapsed         | 345       |
|    total_timesteps      | 48896     |
| train/                  |           |
|    approx_kl            | 0.0473171 |
|    clip_fraction        | 0.1       |
|    clip_range           | 0.2       |
|    entropy_loss         | -49.2     |
|    explained_variance   | 0         |
|    learning_rate        | 0.0421    |
|    loss                 | 60.2      |
|    n_updates            | 3810      |
|    policy_gradient_loss | -0.0105   |
|    reward               | 2.669528  |
|    std                  | 365       |
|    value_loss           | 120       |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 141        |
|    iterations           | 383        |
|    time_elapsed         | 346     

-----------------------------------------
| time/                   |             |
|    fps                  | 142         |
|    iterations           | 392         |
|    time_elapsed         | 353         |
|    total_timesteps      | 50176       |
| train/                  |             |
|    approx_kl            | 0.016817544 |
|    clip_fraction        | 0.0742      |
|    clip_range           | 0.2         |
|    entropy_loss         | -51         |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0421      |
|    loss                 | 6.74        |
|    n_updates            | 3910        |
|    policy_gradient_loss | -0.0133     |
|    reward               | 0.68144804  |
|    std                  | 534         |
|    value_loss           | 16.4        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 142         |
|    iterations           | 393   

-----------------------------------------
| time/                   |             |
|    fps                  | 142         |
|    iterations           | 402         |
|    time_elapsed         | 361         |
|    total_timesteps      | 51456       |
| train/                  |             |
|    approx_kl            | 0.021994397 |
|    clip_fraction        | 0.176       |
|    clip_range           | 0.2         |
|    entropy_loss         | -52.2       |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0421      |
|    loss                 | 8.74        |
|    n_updates            | 4010        |
|    policy_gradient_loss | 0.0165      |
|    reward               | 3.0078306   |
|    std                  | 599         |
|    value_loss           | 19.8        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 142         |
|    iterations           | 403   

-----------------------------------------
| time/                   |             |
|    fps                  | 142         |
|    iterations           | 412         |
|    time_elapsed         | 370         |
|    total_timesteps      | 52736       |
| train/                  |             |
|    approx_kl            | 0.03719943  |
|    clip_fraction        | 0.141       |
|    clip_range           | 0.2         |
|    entropy_loss         | -53.3       |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 0.0421      |
|    loss                 | -0.205      |
|    n_updates            | 4110        |
|    policy_gradient_loss | -0.0173     |
|    reward               | -0.15413123 |
|    std                  | 685         |
|    value_loss           | 1           |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 142        |
|    iterations           | 413      

-----------------------------------------
| time/                   |             |
|    fps                  | 143         |
|    iterations           | 422         |
|    time_elapsed         | 377         |
|    total_timesteps      | 54016       |
| train/                  |             |
|    approx_kl            | 0.076676056 |
|    clip_fraction        | 0.266       |
|    clip_range           | 0.2         |
|    entropy_loss         | -53.9       |
|    explained_variance   | 0.0037      |
|    learning_rate        | 0.0421      |
|    loss                 | 9.13        |
|    n_updates            | 4210        |
|    policy_gradient_loss | -0.00606    |
|    reward               | 1.1165688   |
|    std                  | 892         |
|    value_loss           | 21.4        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 143          |
|    iterations           | 423

------------------------------------------
| time/                   |              |
|    fps                  | 143          |
|    iterations           | 432          |
|    time_elapsed         | 386          |
|    total_timesteps      | 55296        |
| train/                  |              |
|    approx_kl            | 0.0144443065 |
|    clip_fraction        | 0.0305       |
|    clip_range           | 0.2          |
|    entropy_loss         | -54.3        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 38.1         |
|    n_updates            | 4310         |
|    policy_gradient_loss | -0.00608     |
|    reward               | -0.324247    |
|    std                  | 1.14e+03     |
|    value_loss           | 82.4         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 143         |
|    iteration

----------------------------------------
| time/                   |            |
|    fps                  | 142        |
|    iterations           | 442        |
|    time_elapsed         | 397        |
|    total_timesteps      | 56576      |
| train/                  |            |
|    approx_kl            | 0.09004791 |
|    clip_fraction        | 0.245      |
|    clip_range           | 0.2        |
|    entropy_loss         | -54.6      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 9.56       |
|    n_updates            | 4410       |
|    policy_gradient_loss | -0.0113    |
|    reward               | 1.3807346  |
|    std                  | 1.11e+03   |
|    value_loss           | 21.3       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 142        |
|    iterations           | 443        |
|    time_elapse

-----------------------------------------
| time/                   |             |
|    fps                  | 139         |
|    iterations           | 452         |
|    time_elapsed         | 415         |
|    total_timesteps      | 57856       |
| train/                  |             |
|    approx_kl            | 0.043519408 |
|    clip_fraction        | 0.102       |
|    clip_range           | 0.2         |
|    entropy_loss         | -55.4       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 50.6        |
|    n_updates            | 4510        |
|    policy_gradient_loss | -0.00548    |
|    reward               | 0.110408515 |
|    std                  | 1.03e+03    |
|    value_loss           | 103         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 138         |
|    iterations           | 453   

-----------------------------------------
| time/                   |             |
|    fps                  | 137         |
|    iterations           | 462         |
|    time_elapsed         | 430         |
|    total_timesteps      | 59136       |
| train/                  |             |
|    approx_kl            | 0.035189644 |
|    clip_fraction        | 0.216       |
|    clip_range           | 0.2         |
|    entropy_loss         | -55.8       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 1.41        |
|    n_updates            | 4610        |
|    policy_gradient_loss | -0.00696    |
|    reward               | 0.47639868  |
|    std                  | 1.07e+03    |
|    value_loss           | 4.44        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 137        |
|    iterations           | 463      

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 472         |
|    time_elapsed         | 445         |
|    total_timesteps      | 60416       |
| train/                  |             |
|    approx_kl            | 0.042696953 |
|    clip_fraction        | 0.167       |
|    clip_range           | 0.2         |
|    entropy_loss         | -55.3       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 20.4        |
|    n_updates            | 4710        |
|    policy_gradient_loss | -0.0158     |
|    reward               | -2.079297   |
|    std                  | 1.36e+03    |
|    value_loss           | 42          |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 473   

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 482         |
|    time_elapsed         | 458         |
|    total_timesteps      | 61696       |
| train/                  |             |
|    approx_kl            | 0.040718798 |
|    clip_fraction        | 0.172       |
|    clip_range           | 0.2         |
|    entropy_loss         | -55.9       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 0.144       |
|    n_updates            | 4810        |
|    policy_gradient_loss | -0.0182     |
|    reward               | -0.4478501  |
|    std                  | 1.32e+03    |
|    value_loss           | 2.07        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 483   

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 492         |
|    time_elapsed         | 473         |
|    total_timesteps      | 62976       |
| train/                  |             |
|    approx_kl            | 0.009606513 |
|    clip_fraction        | 0.0789      |
|    clip_range           | 0.2         |
|    entropy_loss         | -55.9       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 7.11        |
|    n_updates            | 4910        |
|    policy_gradient_loss | 0.0113      |
|    reward               | 2.5195036   |
|    std                  | 1.94e+03    |
|    value_loss           | 14.9        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 493   

-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 502         |
|    time_elapsed         | 485         |
|    total_timesteps      | 64256       |
| train/                  |             |
|    approx_kl            | 0.037667222 |
|    clip_fraction        | 0.195       |
|    clip_range           | 0.2         |
|    entropy_loss         | -56.8       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | 64.8        |
|    n_updates            | 5010        |
|    policy_gradient_loss | -0.00933    |
|    reward               | -4.9438524  |
|    std                  | 2.93e+03    |
|    value_loss           | 143         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 132          |
|    iterations           | 503

------------------------------------------
| time/                   |              |
|    fps                  | 131          |
|    iterations           | 512          |
|    time_elapsed         | 499          |
|    total_timesteps      | 65536        |
| train/                  |              |
|    approx_kl            | 0.027668271  |
|    clip_fraction        | 0.148        |
|    clip_range           | 0.2          |
|    entropy_loss         | -57.2        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 5.1          |
|    n_updates            | 5110         |
|    policy_gradient_loss | -0.000732    |
|    reward               | -0.028875465 |
|    std                  | 4.66e+03     |
|    value_loss           | 12.7         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iteration

----------------------------------------
| time/                   |            |
|    fps                  | 130        |
|    iterations           | 522        |
|    time_elapsed         | 512        |
|    total_timesteps      | 66816      |
| train/                  |            |
|    approx_kl            | 0.17867365 |
|    clip_fraction        | 0.295      |
|    clip_range           | 0.2        |
|    entropy_loss         | -58.8      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 16.2       |
|    n_updates            | 5210       |
|    policy_gradient_loss | -0.00967   |
|    reward               | 4.690829   |
|    std                  | 6.43e+03   |
|    value_loss           | 35.4       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 523         |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 532         |
|    time_elapsed         | 522         |
|    total_timesteps      | 68096       |
| train/                  |             |
|    approx_kl            | 0.055385042 |
|    clip_fraction        | 0.248       |
|    clip_range           | 0.2         |
|    entropy_loss         | -59.6       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 0.543       |
|    n_updates            | 5310        |
|    policy_gradient_loss | -0.0111     |
|    reward               | 0.1901833   |
|    std                  | 6.73e+03    |
|    value_loss           | 2.57        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 533   

----------------------------------------
| time/                   |            |
|    fps                  | 129        |
|    iterations           | 542        |
|    time_elapsed         | 536        |
|    total_timesteps      | 69376      |
| train/                  |            |
|    approx_kl            | 0.04156255 |
|    clip_fraction        | 0.21       |
|    clip_range           | 0.2        |
|    entropy_loss         | -60.6      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 11.7       |
|    n_updates            | 5410       |
|    policy_gradient_loss | 0.0025     |
|    reward               | -1.8353668 |
|    std                  | 7.45e+03   |
|    value_loss           | 25         |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 129         |
|    iterations           | 543         |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 129         |
|    iterations           | 552         |
|    time_elapsed         | 547         |
|    total_timesteps      | 70656       |
| train/                  |             |
|    approx_kl            | 0.021546734 |
|    clip_fraction        | 0.125       |
|    clip_range           | 0.2         |
|    entropy_loss         | -60.9       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | 1.38        |
|    n_updates            | 5510        |
|    policy_gradient_loss | -0.0133     |
|    reward               | 0.5253485   |
|    std                  | 7.72e+03    |
|    value_loss           | 5.15        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 129         |
|    iterations           | 553   

-----------------------------------------
| time/                   |             |
|    fps                  | 129         |
|    iterations           | 562         |
|    time_elapsed         | 555         |
|    total_timesteps      | 71936       |
| train/                  |             |
|    approx_kl            | 0.023260104 |
|    clip_fraction        | 0.14        |
|    clip_range           | 0.2         |
|    entropy_loss         | -61.4       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 3.23        |
|    n_updates            | 5610        |
|    policy_gradient_loss | -0.0103     |
|    reward               | 1.8479632   |
|    std                  | 1.14e+04    |
|    value_loss           | 7.9         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 129         |
|    iterations           | 563   

----------------------------------------
| time/                   |            |
|    fps                  | 129        |
|    iterations           | 572        |
|    time_elapsed         | 565        |
|    total_timesteps      | 73216      |
| train/                  |            |
|    approx_kl            | 0.07187133 |
|    clip_fraction        | 0.228      |
|    clip_range           | 0.2        |
|    entropy_loss         | -62.7      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 14.2       |
|    n_updates            | 5710       |
|    policy_gradient_loss | -0.0297    |
|    reward               | -2.6382089 |
|    std                  | 1.17e+04   |
|    value_loss           | 29.9       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 129         |
|    iterations           | 573         |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 129         |
|    iterations           | 582         |
|    time_elapsed         | 574         |
|    total_timesteps      | 74496       |
| train/                  |             |
|    approx_kl            | 0.025532769 |
|    clip_fraction        | 0.113       |
|    clip_range           | 0.2         |
|    entropy_loss         | -63.3       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 1.28        |
|    n_updates            | 5810        |
|    policy_gradient_loss | -0.00845    |
|    reward               | 0.50413924  |
|    std                  | 1.9e+04     |
|    value_loss           | 3.83        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 129         |
|    iterations           | 583   

----------------------------------------
| time/                   |            |
|    fps                  | 130        |
|    iterations           | 592        |
|    time_elapsed         | 582        |
|    total_timesteps      | 75776      |
| train/                  |            |
|    approx_kl            | 0.2220312  |
|    clip_fraction        | 0.277      |
|    clip_range           | 0.2        |
|    entropy_loss         | -63.5      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 9.44       |
|    n_updates            | 5910       |
|    policy_gradient_loss | -0.00816   |
|    reward               | 0.42621526 |
|    std                  | 2.46e+04   |
|    value_loss           | 21.1       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 593         |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 602         |
|    time_elapsed         | 592         |
|    total_timesteps      | 77056       |
| train/                  |             |
|    approx_kl            | 0.029320452 |
|    clip_fraction        | 0.182       |
|    clip_range           | 0.2         |
|    entropy_loss         | -62.4       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -0.501      |
|    n_updates            | 6010        |
|    policy_gradient_loss | -0.014      |
|    reward               | 0.039141335 |
|    std                  | 2.21e+04    |
|    value_loss           | 1           |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 603   

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 612         |
|    time_elapsed         | 600         |
|    total_timesteps      | 78336       |
| train/                  |             |
|    approx_kl            | 0.048714608 |
|    clip_fraction        | 0.162       |
|    clip_range           | 0.2         |
|    entropy_loss         | -63.6       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | 1.15        |
|    n_updates            | 6110        |
|    policy_gradient_loss | -0.0153     |
|    reward               | -1.4914904  |
|    std                  | 3.68e+04    |
|    value_loss           | 3.95        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 613   

------------------------------------------
| time/                   |              |
|    fps                  | 130          |
|    iterations           | 622          |
|    time_elapsed         | 610          |
|    total_timesteps      | 79616        |
| train/                  |              |
|    approx_kl            | 0.029200483  |
|    clip_fraction        | 0.188        |
|    clip_range           | 0.2          |
|    entropy_loss         | -64          |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 42.6         |
|    n_updates            | 6210         |
|    policy_gradient_loss | -0.0123      |
|    reward               | -0.049288474 |
|    std                  | 3.88e+04     |
|    value_loss           | 86.5         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 632         |
|    time_elapsed         | 619         |
|    total_timesteps      | 80896       |
| train/                  |             |
|    approx_kl            | 0.018010395 |
|    clip_fraction        | 0.068       |
|    clip_range           | 0.2         |
|    entropy_loss         | -64.8       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 0.807       |
|    n_updates            | 6310        |
|    policy_gradient_loss | -0.018      |
|    reward               | -1.0140607  |
|    std                  | 4e+04       |
|    value_loss           | 3.34        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 130        |
|    iterations           | 633      

------------------------------------------
| time/                   |              |
|    fps                  | 131          |
|    iterations           | 642          |
|    time_elapsed         | 625          |
|    total_timesteps      | 82176        |
| train/                  |              |
|    approx_kl            | 0.0047042393 |
|    clip_fraction        | 0.0133       |
|    clip_range           | 0.2          |
|    entropy_loss         | -65.2        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 12.3         |
|    n_updates            | 6410         |
|    policy_gradient_loss | -0.00315     |
|    reward               | 4.330316     |
|    std                  | 4.92e+04     |
|    value_loss           | 29.5         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 652         |
|    time_elapsed         | 632         |
|    total_timesteps      | 83456       |
| train/                  |             |
|    approx_kl            | 0.053737044 |
|    clip_fraction        | 0.301       |
|    clip_range           | 0.2         |
|    entropy_loss         | -65.2       |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 0.0421      |
|    loss                 | -0.0184     |
|    n_updates            | 6510        |
|    policy_gradient_loss | -0.0169     |
|    reward               | 0.74537873  |
|    std                  | 5.11e+04    |
|    value_loss           | 1.45        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 653   

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 662         |
|    time_elapsed         | 645         |
|    total_timesteps      | 84736       |
| train/                  |             |
|    approx_kl            | 0.029856982 |
|    clip_fraction        | 0.105       |
|    clip_range           | 0.2         |
|    entropy_loss         | -65.9       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 56.7        |
|    n_updates            | 6610        |
|    policy_gradient_loss | -0.0131     |
|    reward               | 1.5823272   |
|    std                  | 4.94e+04    |
|    value_loss           | 117         |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 131        |
|    iterations           | 663      

------------------------------------------
| time/                   |              |
|    fps                  | 131          |
|    iterations           | 672          |
|    time_elapsed         | 653          |
|    total_timesteps      | 86016        |
| train/                  |              |
|    approx_kl            | 0.0032082647 |
|    clip_fraction        | 0.00781      |
|    clip_range           | 0.2          |
|    entropy_loss         | -65.1        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 1.51         |
|    n_updates            | 6710         |
|    policy_gradient_loss | -0.00712     |
|    reward               | -0.3622205   |
|    std                  | 5.82e+04     |
|    value_loss           | 8.55         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 682         |
|    time_elapsed         | 663         |
|    total_timesteps      | 87296       |
| train/                  |             |
|    approx_kl            | 0.049423866 |
|    clip_fraction        | 0.208       |
|    clip_range           | 0.2         |
|    entropy_loss         | -65.5       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 9.16        |
|    n_updates            | 6810        |
|    policy_gradient_loss | -0.0197     |
|    reward               | 0.23389558  |
|    std                  | 8.03e+04    |
|    value_loss           | 19.8        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 683   

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 692         |
|    time_elapsed         | 671         |
|    total_timesteps      | 88576       |
| train/                  |             |
|    approx_kl            | 0.040646445 |
|    clip_fraction        | 0.113       |
|    clip_range           | 0.2         |
|    entropy_loss         | -68         |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 24.5        |
|    n_updates            | 6910        |
|    policy_gradient_loss | -0.0151     |
|    reward               | 0.8473551   |
|    std                  | 8.61e+04    |
|    value_loss           | 52.6        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 693   

-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 702         |
|    time_elapsed         | 680         |
|    total_timesteps      | 89856       |
| train/                  |             |
|    approx_kl            | 0.026926361 |
|    clip_fraction        | 0.159       |
|    clip_range           | 0.2         |
|    entropy_loss         | -67.9       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 9.57        |
|    n_updates            | 7010        |
|    policy_gradient_loss | 0.00657     |
|    reward               | -0.04545697 |
|    std                  | 7.35e+04    |
|    value_loss           | 20.9        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 703   

-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 712         |
|    time_elapsed         | 688         |
|    total_timesteps      | 91136       |
| train/                  |             |
|    approx_kl            | 0.018838292 |
|    clip_fraction        | 0.173       |
|    clip_range           | 0.2         |
|    entropy_loss         | -68.3       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 20.1        |
|    n_updates            | 7110        |
|    policy_gradient_loss | 0.00702     |
|    reward               | 1.3986032   |
|    std                  | 7.01e+04    |
|    value_loss           | 41.9        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 713   

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 722         |
|    time_elapsed         | 701         |
|    total_timesteps      | 92416       |
| train/                  |             |
|    approx_kl            | 0.062137466 |
|    clip_fraction        | 0.247       |
|    clip_range           | 0.2         |
|    entropy_loss         | -68.5       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 0.0941      |
|    n_updates            | 7210        |
|    policy_gradient_loss | -0.0255     |
|    reward               | -1.5609695  |
|    std                  | 5.33e+04    |
|    value_loss           | 1.83        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 723   

----------------------------------------
| time/                   |            |
|    fps                  | 130        |
|    iterations           | 732        |
|    time_elapsed         | 717        |
|    total_timesteps      | 93696      |
| train/                  |            |
|    approx_kl            | 0.04524607 |
|    clip_fraction        | 0.216      |
|    clip_range           | 0.2        |
|    entropy_loss         | -69.3      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 5.72       |
|    n_updates            | 7310       |
|    policy_gradient_loss | -0.00733   |
|    reward               | -0.3381303 |
|    std                  | 7.68e+04   |
|    value_loss           | 13         |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 733         |
|    time_el

------------------------------------------
| time/                   |              |
|    fps                  | 130          |
|    iterations           | 742          |
|    time_elapsed         | 725          |
|    total_timesteps      | 94976        |
| train/                  |              |
|    approx_kl            | 0.0065754117 |
|    clip_fraction        | 0.0219       |
|    clip_range           | 0.2          |
|    entropy_loss         | -68.8        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 83           |
|    n_updates            | 7410         |
|    policy_gradient_loss | -0.00719     |
|    reward               | -0.12512171  |
|    std                  | 7.32e+04     |
|    value_loss           | 166          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iteration

---------------------------------------
| time/                   |           |
|    fps                  | 130       |
|    iterations           | 752       |
|    time_elapsed         | 734       |
|    total_timesteps      | 96256     |
| train/                  |           |
|    approx_kl            | 0.0164999 |
|    clip_fraction        | 0.0719    |
|    clip_range           | 0.2       |
|    entropy_loss         | -69       |
|    explained_variance   | 5.96e-08  |
|    learning_rate        | 0.0421    |
|    loss                 | 3.27      |
|    n_updates            | 7510      |
|    policy_gradient_loss | -0.011    |
|    reward               | -3.05778  |
|    std                  | 8.48e+04  |
|    value_loss           | 8.85      |
---------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 130          |
|    iterations           | 753          |
|    time_elapsed         | 

----------------------------------------
| time/                   |            |
|    fps                  | 130        |
|    iterations           | 762        |
|    time_elapsed         | 745        |
|    total_timesteps      | 97536      |
| train/                  |            |
|    approx_kl            | 0.02537684 |
|    clip_fraction        | 0.175      |
|    clip_range           | 0.2        |
|    entropy_loss         | -69.9      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 21.2       |
|    n_updates            | 7610       |
|    policy_gradient_loss | -0.014     |
|    reward               | 3.0389943  |
|    std                  | 7.38e+04   |
|    value_loss           | 44.4       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 763         |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 772         |
|    time_elapsed         | 753         |
|    total_timesteps      | 98816       |
| train/                  |             |
|    approx_kl            | 0.026429165 |
|    clip_fraction        | 0.205       |
|    clip_range           | 0.2         |
|    entropy_loss         | -68.7       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 1.1         |
|    n_updates            | 7710        |
|    policy_gradient_loss | -0.00375    |
|    reward               | 0.07631422  |
|    std                  | 6.19e+04    |
|    value_loss           | 3.66        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 773   

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 782         |
|    time_elapsed         | 762         |
|    total_timesteps      | 100096      |
| train/                  |             |
|    approx_kl            | 0.009760727 |
|    clip_fraction        | 0.0383      |
|    clip_range           | 0.2         |
|    entropy_loss         | -69.7       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 46.1        |
|    n_updates            | 7810        |
|    policy_gradient_loss | -0.00404    |
|    reward               | 3.89969     |
|    std                  | 9.04e+04    |
|    value_loss           | 96.2        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 131        |
|    iterations           | 783      

------------------------------------------
| time/                   |              |
|    fps                  | 131          |
|    iterations           | 792          |
|    time_elapsed         | 770          |
|    total_timesteps      | 101376       |
| train/                  |              |
|    approx_kl            | 0.0076611065 |
|    clip_fraction        | 0.0273       |
|    clip_range           | 0.2          |
|    entropy_loss         | -69.6        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 10.5         |
|    n_updates            | 7910         |
|    policy_gradient_loss | -0.0103      |
|    reward               | 0.048214503  |
|    std                  | 8.13e+04     |
|    value_loss           | 17.1         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iteration

----------------------------------------
| time/                   |            |
|    fps                  | 131        |
|    iterations           | 802        |
|    time_elapsed         | 778        |
|    total_timesteps      | 102656     |
| train/                  |            |
|    approx_kl            | 0.03756463 |
|    clip_fraction        | 0.107      |
|    clip_range           | 0.2        |
|    entropy_loss         | -69.5      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 5.24       |
|    n_updates            | 8010       |
|    policy_gradient_loss | -0.0155    |
|    reward               | 0.5354688  |
|    std                  | 6.97e+04   |
|    value_loss           | 13.1       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 803         |
|    time_el

------------------------------------------
| time/                   |              |
|    fps                  | 131          |
|    iterations           | 812          |
|    time_elapsed         | 787          |
|    total_timesteps      | 103936       |
| train/                  |              |
|    approx_kl            | 0.0090670455 |
|    clip_fraction        | 0.0531       |
|    clip_range           | 0.2          |
|    entropy_loss         | -69.2        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 31.3         |
|    n_updates            | 8110         |
|    policy_gradient_loss | 0.00604      |
|    reward               | 4.3729115    |
|    std                  | 8.12e+04     |
|    value_loss           | 71.1         |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 131        |
|    iterations  

----------------------------------------
| time/                   |            |
|    fps                  | 132        |
|    iterations           | 822        |
|    time_elapsed         | 796        |
|    total_timesteps      | 105216     |
| train/                  |            |
|    approx_kl            | 0.02030658 |
|    clip_fraction        | 0.0781     |
|    clip_range           | 0.2        |
|    entropy_loss         | -69.9      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 4.86       |
|    n_updates            | 8210       |
|    policy_gradient_loss | -0.00752   |
|    reward               | 2.433372   |
|    std                  | 6.37e+04   |
|    value_loss           | 11.1       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 823         |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 832         |
|    time_elapsed         | 805         |
|    total_timesteps      | 106496      |
| train/                  |             |
|    approx_kl            | 0.015858332 |
|    clip_fraction        | 0.057       |
|    clip_range           | 0.2         |
|    entropy_loss         | -69.8       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | 19.8        |
|    n_updates            | 8310        |
|    policy_gradient_loss | -0.00499    |
|    reward               | 0.54090285  |
|    std                  | 6.14e+04    |
|    value_loss           | 66          |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 132          |
|    iterations           | 833

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 842         |
|    time_elapsed         | 825         |
|    total_timesteps      | 107776      |
| train/                  |             |
|    approx_kl            | 0.051527787 |
|    clip_fraction        | 0.328       |
|    clip_range           | 0.2         |
|    entropy_loss         | -70.4       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -0.25       |
|    n_updates            | 8410        |
|    policy_gradient_loss | -0.0151     |
|    reward               | 0.67312664  |
|    std                  | 7.44e+04    |
|    value_loss           | 1.09        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 843   

----------------------------------------
| time/                   |            |
|    fps                  | 130        |
|    iterations           | 852        |
|    time_elapsed         | 835        |
|    total_timesteps      | 109056     |
| train/                  |            |
|    approx_kl            | 0.07539653 |
|    clip_fraction        | 0.215      |
|    clip_range           | 0.2        |
|    entropy_loss         | -71.9      |
|    explained_variance   | 1.79e-07   |
|    learning_rate        | 0.0421     |
|    loss                 | 10.1       |
|    n_updates            | 8510       |
|    policy_gradient_loss | -0.0162    |
|    reward               | -1.663593  |
|    std                  | 8.86e+04   |
|    value_loss           | 21.9       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 853         |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 862         |
|    time_elapsed         | 843         |
|    total_timesteps      | 110336      |
| train/                  |             |
|    approx_kl            | 0.007583879 |
|    clip_fraction        | 0.0383      |
|    clip_range           | 0.2         |
|    entropy_loss         | -71.1       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 340         |
|    n_updates            | 8610        |
|    policy_gradient_loss | -0.0049     |
|    reward               | 0.16869241  |
|    std                  | 6.52e+04    |
|    value_loss           | 697         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 863   

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 872         |
|    time_elapsed         | 854         |
|    total_timesteps      | 111616      |
| train/                  |             |
|    approx_kl            | 0.034231417 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -70.8       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 6.58        |
|    n_updates            | 8710        |
|    policy_gradient_loss | -0.0173     |
|    reward               | 1.5575956   |
|    std                  | 6.06e+04    |
|    value_loss           | 14.3        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 873   

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 882         |
|    time_elapsed         | 862         |
|    total_timesteps      | 112896      |
| train/                  |             |
|    approx_kl            | 0.027519174 |
|    clip_fraction        | 0.157       |
|    clip_range           | 0.2         |
|    entropy_loss         | -72.8       |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0421      |
|    loss                 | 34.2        |
|    n_updates            | 8810        |
|    policy_gradient_loss | 0.00689     |
|    reward               | -2.5880435  |
|    std                  | 5.32e+04    |
|    value_loss           | 99.6        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 883   

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 892         |
|    time_elapsed         | 874         |
|    total_timesteps      | 114176      |
| train/                  |             |
|    approx_kl            | 0.018350024 |
|    clip_fraction        | 0.0883      |
|    clip_range           | 0.2         |
|    entropy_loss         | -73.8       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 0.684       |
|    n_updates            | 8910        |
|    policy_gradient_loss | -0.00961    |
|    reward               | 1.6070638   |
|    std                  | 6.88e+04    |
|    value_loss           | 3.52        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 130        |
|    iterations           | 893      

----------------------------------------
| time/                   |            |
|    fps                  | 130        |
|    iterations           | 902        |
|    time_elapsed         | 886        |
|    total_timesteps      | 115456     |
| train/                  |            |
|    approx_kl            | 0.02173491 |
|    clip_fraction        | 0.0938     |
|    clip_range           | 0.2        |
|    entropy_loss         | -74.8      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 29.6       |
|    n_updates            | 9010       |
|    policy_gradient_loss | -0.009     |
|    reward               | -4.3444667 |
|    std                  | 7.8e+04    |
|    value_loss           | 61.8       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 903         |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 912         |
|    time_elapsed         | 895         |
|    total_timesteps      | 116736      |
| train/                  |             |
|    approx_kl            | 0.013523275 |
|    clip_fraction        | 0.0688      |
|    clip_range           | 0.2         |
|    entropy_loss         | -74.9       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 1.37        |
|    n_updates            | 9110        |
|    policy_gradient_loss | -0.0174     |
|    reward               | -0.337761   |
|    std                  | 9.62e+04    |
|    value_loss           | 5.16        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 913   

------------------------------------------
| time/                   |              |
|    fps                  | 130          |
|    iterations           | 922          |
|    time_elapsed         | 905          |
|    total_timesteps      | 118016       |
| train/                  |              |
|    approx_kl            | 0.0140721975 |
|    clip_fraction        | 0.0586       |
|    clip_range           | 0.2          |
|    entropy_loss         | -76.1        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 5.46         |
|    n_updates            | 9210         |
|    policy_gradient_loss | -0.006       |
|    reward               | -0.15389678  |
|    std                  | 1.02e+05     |
|    value_loss           | 13.6         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 932         |
|    time_elapsed         | 915         |
|    total_timesteps      | 119296      |
| train/                  |             |
|    approx_kl            | 0.019316437 |
|    clip_fraction        | 0.0633      |
|    clip_range           | 0.2         |
|    entropy_loss         | -77.2       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 107         |
|    n_updates            | 9310        |
|    policy_gradient_loss | -0.00753    |
|    reward               | -2.4496286  |
|    std                  | 8.26e+04    |
|    value_loss           | 216         |
-----------------------------------------
day: 3060, episode: 40
begin_total_asset: 1000000.00
end_total_asset: 4089359.91
total_reward: 3089359.91
total_cost: 13197.27
total_trades: 19020
Sharpe: 0.710

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 942         |
|    time_elapsed         | 924         |
|    total_timesteps      | 120576      |
| train/                  |             |
|    approx_kl            | 0.017842602 |
|    clip_fraction        | 0.0961      |
|    clip_range           | 0.2         |
|    entropy_loss         | -79.3       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 4.39        |
|    n_updates            | 9410        |
|    policy_gradient_loss | -0.00925    |
|    reward               | 0.8961123   |
|    std                  | 7.88e+04    |
|    value_loss           | 12.1        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 943   

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 952         |
|    time_elapsed         | 934         |
|    total_timesteps      | 121856      |
| train/                  |             |
|    approx_kl            | 0.046035264 |
|    clip_fraction        | 0.206       |
|    clip_range           | 0.2         |
|    entropy_loss         | -80.1       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 46.6        |
|    n_updates            | 9510        |
|    policy_gradient_loss | 0.0103      |
|    reward               | 0.92481357  |
|    std                  | 7.96e+04    |
|    value_loss           | 99.2        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 953   

----------------------------------------
| time/                   |            |
|    fps                  | 130        |
|    iterations           | 962        |
|    time_elapsed         | 940        |
|    total_timesteps      | 123136     |
| train/                  |            |
|    approx_kl            | 0.04562018 |
|    clip_fraction        | 0.203      |
|    clip_range           | 0.2        |
|    entropy_loss         | -79.6      |
|    explained_variance   | 5.96e-08   |
|    learning_rate        | 0.0421     |
|    loss                 | 0.856      |
|    n_updates            | 9610       |
|    policy_gradient_loss | -0.0172    |
|    reward               | 0.36947566 |
|    std                  | 7.07e+04   |
|    value_loss           | 3.62       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 963         |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 972         |
|    time_elapsed         | 949         |
|    total_timesteps      | 124416      |
| train/                  |             |
|    approx_kl            | 0.028225264 |
|    clip_fraction        | 0.183       |
|    clip_range           | 0.2         |
|    entropy_loss         | -81.7       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 9.51        |
|    n_updates            | 9710        |
|    policy_gradient_loss | -0.00857    |
|    reward               | 1.6343206   |
|    std                  | 1.13e+05    |
|    value_loss           | 20.9        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 973   

------------------------------------------
| time/                   |              |
|    fps                  | 131          |
|    iterations           | 982          |
|    time_elapsed         | 957          |
|    total_timesteps      | 125696       |
| train/                  |              |
|    approx_kl            | 0.0014378163 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -82.4        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 287          |
|    n_updates            | 9810         |
|    policy_gradient_loss | -0.00145     |
|    reward               | -0.7932071   |
|    std                  | 1.52e+05     |
|    value_loss           | 676          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 131          |
|    iterat

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 992         |
|    time_elapsed         | 966         |
|    total_timesteps      | 126976      |
| train/                  |             |
|    approx_kl            | 0.032825798 |
|    clip_fraction        | 0.14        |
|    clip_range           | 0.2         |
|    entropy_loss         | -83.2       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 2.13        |
|    n_updates            | 9910        |
|    policy_gradient_loss | -0.00543    |
|    reward               | 4.7717595   |
|    std                  | 1.75e+05    |
|    value_loss           | 6.62        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 993   

-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 1002        |
|    time_elapsed         | 979         |
|    total_timesteps      | 128256      |
| train/                  |             |
|    approx_kl            | 0.013881585 |
|    clip_fraction        | 0.0477      |
|    clip_range           | 0.2         |
|    entropy_loss         | -83.3       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | 33.4        |
|    n_updates            | 10010       |
|    policy_gradient_loss | -0.0126     |
|    reward               | -2.933574   |
|    std                  | 1.55e+05    |
|    value_loss           | 69.2        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 1003  

----------------------------------------
| time/                   |            |
|    fps                  | 131        |
|    iterations           | 1012       |
|    time_elapsed         | 988        |
|    total_timesteps      | 129536     |
| train/                  |            |
|    approx_kl            | 0.06542098 |
|    clip_fraction        | 0.205      |
|    clip_range           | 0.2        |
|    entropy_loss         | -83.7      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -0.000124  |
|    n_updates            | 10110      |
|    policy_gradient_loss | -0.0155    |
|    reward               | 0.34625685 |
|    std                  | 1.98e+05   |
|    value_loss           | 1.94       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1013        |
|    time_el

----------------------------------------
| time/                   |            |
|    fps                  | 131        |
|    iterations           | 1022       |
|    time_elapsed         | 997        |
|    total_timesteps      | 130816     |
| train/                  |            |
|    approx_kl            | 0.06974025 |
|    clip_fraction        | 0.192      |
|    clip_range           | 0.2        |
|    entropy_loss         | -84.1      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 19.4       |
|    n_updates            | 10210      |
|    policy_gradient_loss | -0.0205    |
|    reward               | 0.08830018 |
|    std                  | 2.53e+05   |
|    value_loss           | 40.2       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1023        |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1032        |
|    time_elapsed         | 1006        |
|    total_timesteps      | 132096      |
| train/                  |             |
|    approx_kl            | 0.025685688 |
|    clip_fraction        | 0.103       |
|    clip_range           | 0.2         |
|    entropy_loss         | -83.1       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 0.704       |
|    n_updates            | 10310       |
|    policy_gradient_loss | -0.0191     |
|    reward               | 0.11586492  |
|    std                  | 1.8e+05     |
|    value_loss           | 3.37        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1033  

----------------------------------------
| time/                   |            |
|    fps                  | 131        |
|    iterations           | 1042       |
|    time_elapsed         | 1014       |
|    total_timesteps      | 133376     |
| train/                  |            |
|    approx_kl            | 0.05047386 |
|    clip_fraction        | 0.298      |
|    clip_range           | 0.2        |
|    entropy_loss         | -84.1      |
|    explained_variance   | -1.19e-07  |
|    learning_rate        | 0.0421     |
|    loss                 | 3.52       |
|    n_updates            | 10410      |
|    policy_gradient_loss | -0.013     |
|    reward               | 2.7824972  |
|    std                  | 2.31e+05   |
|    value_loss           | 8.97       |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 131       |
|    iterations           | 1043      |
|    time_elapsed   

------------------------------------------
| time/                   |              |
|    fps                  | 131          |
|    iterations           | 1052         |
|    time_elapsed         | 1023         |
|    total_timesteps      | 134656       |
| train/                  |              |
|    approx_kl            | 0.0065981853 |
|    clip_fraction        | 0.0258       |
|    clip_range           | 0.2          |
|    entropy_loss         | -84.7        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 30.8         |
|    n_updates            | 10510        |
|    policy_gradient_loss | -0.00103     |
|    reward               | -3.808875    |
|    std                  | 2.64e+05     |
|    value_loss           | 69.7         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 131          |
|    iterat

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1062        |
|    time_elapsed         | 1035        |
|    total_timesteps      | 135936      |
| train/                  |             |
|    approx_kl            | 0.030136202 |
|    clip_fraction        | 0.202       |
|    clip_range           | 0.2         |
|    entropy_loss         | -86         |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 5.08        |
|    n_updates            | 10610       |
|    policy_gradient_loss | -0.0176     |
|    reward               | 1.2857832   |
|    std                  | 3.46e+05    |
|    value_loss           | 12.4        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1063  

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1072        |
|    time_elapsed         | 1045        |
|    total_timesteps      | 137216      |
| train/                  |             |
|    approx_kl            | 0.056346763 |
|    clip_fraction        | 0.312       |
|    clip_range           | 0.2         |
|    entropy_loss         | -86.9       |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0421      |
|    loss                 | 16.7        |
|    n_updates            | 10710       |
|    policy_gradient_loss | -0.0156     |
|    reward               | -0.30544388 |
|    std                  | 3.14e+05    |
|    value_loss           | 35.5        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1073  

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1082        |
|    time_elapsed         | 1055        |
|    total_timesteps      | 138496      |
| train/                  |             |
|    approx_kl            | 0.04979903  |
|    clip_fraction        | 0.291       |
|    clip_range           | 0.2         |
|    entropy_loss         | -87.9       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -0.284      |
|    n_updates            | 10810       |
|    policy_gradient_loss | -0.00857    |
|    reward               | -0.07672199 |
|    std                  | 3.91e+05    |
|    value_loss           | 1.33        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1083  

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1092        |
|    time_elapsed         | 1062        |
|    total_timesteps      | 139776      |
| train/                  |             |
|    approx_kl            | 0.060669105 |
|    clip_fraction        | 0.276       |
|    clip_range           | 0.2         |
|    entropy_loss         | -90.1       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -0.0458     |
|    n_updates            | 10910       |
|    policy_gradient_loss | -0.0117     |
|    reward               | -0.3248533  |
|    std                  | 4.95e+05    |
|    value_loss           | 1.89        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1093  

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1102        |
|    time_elapsed         | 1070        |
|    total_timesteps      | 141056      |
| train/                  |             |
|    approx_kl            | 0.028054165 |
|    clip_fraction        | 0.1         |
|    clip_range           | 0.2         |
|    entropy_loss         | -91.4       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 18.1        |
|    n_updates            | 11010       |
|    policy_gradient_loss | -0.0119     |
|    reward               | -0.03670807 |
|    std                  | 5.01e+05    |
|    value_loss           | 37.3        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1103  

----------------------------------------
| time/                   |            |
|    fps                  | 131        |
|    iterations           | 1112       |
|    time_elapsed         | 1079       |
|    total_timesteps      | 142336     |
| train/                  |            |
|    approx_kl            | 0.01777735 |
|    clip_fraction        | 0.141      |
|    clip_range           | 0.2        |
|    entropy_loss         | -91.7      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -0.656     |
|    n_updates            | 11110      |
|    policy_gradient_loss | -0.00644   |
|    reward               | 0.5468827  |
|    std                  | 4.32e+05   |
|    value_loss           | 0.713      |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1113        |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 1122        |
|    time_elapsed         | 1087        |
|    total_timesteps      | 143616      |
| train/                  |             |
|    approx_kl            | 0.024754934 |
|    clip_fraction        | 0.155       |
|    clip_range           | 0.2         |
|    entropy_loss         | -91.9       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 6.98        |
|    n_updates            | 11210       |
|    policy_gradient_loss | -0.0122     |
|    reward               | -0.14319703 |
|    std                  | 4.52e+05    |
|    value_loss           | 16.1        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 132        |
|    iterations           | 1123     

----------------------------------------
| time/                   |            |
|    fps                  | 132        |
|    iterations           | 1132       |
|    time_elapsed         | 1095       |
|    total_timesteps      | 144896     |
| train/                  |            |
|    approx_kl            | 0.06839147 |
|    clip_fraction        | 0.23       |
|    clip_range           | 0.2        |
|    entropy_loss         | -93.6      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -0.812     |
|    n_updates            | 11310      |
|    policy_gradient_loss | -0.0244    |
|    reward               | 0.12506638 |
|    std                  | 6.36e+05   |
|    value_loss           | 0.417      |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 1133        |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 1142        |
|    time_elapsed         | 1104        |
|    total_timesteps      | 146176      |
| train/                  |             |
|    approx_kl            | 0.034883942 |
|    clip_fraction        | 0.136       |
|    clip_range           | 0.2         |
|    entropy_loss         | -94.2       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -0.108      |
|    n_updates            | 11410       |
|    policy_gradient_loss | -0.00787    |
|    reward               | -0.64816403 |
|    std                  | 6.38e+05    |
|    value_loss           | 1.8         |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 132        |
|    iterations           | 1143     

-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 1152        |
|    time_elapsed         | 1113        |
|    total_timesteps      | 147456      |
| train/                  |             |
|    approx_kl            | 0.025681397 |
|    clip_fraction        | 0.159       |
|    clip_range           | 0.2         |
|    entropy_loss         | -94.2       |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0421      |
|    loss                 | -0.796      |
|    n_updates            | 11510       |
|    policy_gradient_loss | -0.00785    |
|    reward               | 0.19650672  |
|    std                  | 6.52e+05    |
|    value_loss           | 0.38        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 1153  

----------------------------------------
| time/                   |            |
|    fps                  | 132        |
|    iterations           | 1162       |
|    time_elapsed         | 1121       |
|    total_timesteps      | 148736     |
| train/                  |            |
|    approx_kl            | 0.11652033 |
|    clip_fraction        | 0.346      |
|    clip_range           | 0.2        |
|    entropy_loss         | -94.3      |
|    explained_variance   | -1.19e-07  |
|    learning_rate        | 0.0421     |
|    loss                 | -0.63      |
|    n_updates            | 11610      |
|    policy_gradient_loss | -0.0151    |
|    reward               | -1.5891843 |
|    std                  | 7.01e+05   |
|    value_loss           | 0.84       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 132        |
|    iterations           | 1163       |
|    time_elapse

day: 3060, episode: 50
begin_total_asset: 1000000.00
end_total_asset: 1835323.50
total_reward: 835323.50
total_cost: 32935.42
total_trades: 24124
Sharpe: 0.556
-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 1172        |
|    time_elapsed         | 1132        |
|    total_timesteps      | 150016      |
| train/                  |             |
|    approx_kl            | 0.026928216 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -95.4       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 11.2        |
|    n_updates            | 11710       |
|    policy_gradient_loss | -0.00589    |
|    reward               | 0.002065941 |
|    std                  | 8.85e+05    |
|    value_loss           | 24.6        |
-----------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 1182        |
|    time_elapsed         | 1142        |
|    total_timesteps      | 151296      |
| train/                  |             |
|    approx_kl            | 0.11987476  |
|    clip_fraction        | 0.366       |
|    clip_range           | 0.2         |
|    entropy_loss         | -96.9       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | -0.446      |
|    n_updates            | 11810       |
|    policy_gradient_loss | -0.0275     |
|    reward               | -0.02016111 |
|    std                  | 9.97e+05    |
|    value_loss           | 1.47        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 1183  

----------------------------------------
| time/                   |            |
|    fps                  | 132        |
|    iterations           | 1192       |
|    time_elapsed         | 1152       |
|    total_timesteps      | 152576     |
| train/                  |            |
|    approx_kl            | 0.04760541 |
|    clip_fraction        | 0.279      |
|    clip_range           | 0.2        |
|    entropy_loss         | -99.2      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -0.53      |
|    n_updates            | 11910      |
|    policy_gradient_loss | -0.00967   |
|    reward               | 1.4648789  |
|    std                  | 1.37e+06   |
|    value_loss           | 1.24       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 1193        |
|    time_el

----------------------------------------
| time/                   |            |
|    fps                  | 131        |
|    iterations           | 1202       |
|    time_elapsed         | 1167       |
|    total_timesteps      | 153856     |
| train/                  |            |
|    approx_kl            | 0.06436169 |
|    clip_fraction        | 0.302      |
|    clip_range           | 0.2        |
|    entropy_loss         | -101       |
|    explained_variance   | -1.19e-07  |
|    learning_rate        | 0.0421     |
|    loss                 | -1.07      |
|    n_updates            | 12010      |
|    policy_gradient_loss | -0.0146    |
|    reward               | -0.1958685 |
|    std                  | 1.72e+06   |
|    value_loss           | 0.128      |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 131        |
|    iterations           | 1203       |
|    time_elapse

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1212        |
|    time_elapsed         | 1177        |
|    total_timesteps      | 155136      |
| train/                  |             |
|    approx_kl            | 0.034069225 |
|    clip_fraction        | 0.123       |
|    clip_range           | 0.2         |
|    entropy_loss         | -101        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | -0.615      |
|    n_updates            | 12110       |
|    policy_gradient_loss | -0.00245    |
|    reward               | -0.11690305 |
|    std                  | 1.8e+06     |
|    value_loss           | 1.1         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1213  

-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1222        |
|    time_elapsed         | 1185        |
|    total_timesteps      | 156416      |
| train/                  |             |
|    approx_kl            | 0.10211007  |
|    clip_fraction        | 0.367       |
|    clip_range           | 0.2         |
|    entropy_loss         | -104        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -0.804      |
|    n_updates            | 12210       |
|    policy_gradient_loss | -0.0254     |
|    reward               | -0.33891693 |
|    std                  | 2.35e+06    |
|    value_loss           | 0.638       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 131         |
|    iterations           | 1223  

------------------------------------------
| time/                   |              |
|    fps                  | 132          |
|    iterations           | 1232         |
|    time_elapsed         | 1192         |
|    total_timesteps      | 157696       |
| train/                  |              |
|    approx_kl            | 0.042423226  |
|    clip_fraction        | 0.163        |
|    clip_range           | 0.2          |
|    entropy_loss         | -105         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | -1.07        |
|    n_updates            | 12310        |
|    policy_gradient_loss | -0.0162      |
|    reward               | -0.042844765 |
|    std                  | 3.65e+06     |
|    value_loss           | 0.27         |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 132        |
|    iterations  

----------------------------------------
| time/                   |            |
|    fps                  | 132        |
|    iterations           | 1242       |
|    time_elapsed         | 1198       |
|    total_timesteps      | 158976     |
| train/                  |            |
|    approx_kl            | 0.01968617 |
|    clip_fraction        | 0.0781     |
|    clip_range           | 0.2        |
|    entropy_loss         | -105       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 0.902      |
|    n_updates            | 12410      |
|    policy_gradient_loss | -0.00941   |
|    reward               | 0.76486105 |
|    std                  | 2.94e+06   |
|    value_loss           | 4.33       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 132        |
|    iterations           | 1243       |
|    time_elapse

-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 1252        |
|    time_elapsed         | 1205        |
|    total_timesteps      | 160256      |
| train/                  |             |
|    approx_kl            | 0.03571627  |
|    clip_fraction        | 0.196       |
|    clip_range           | 0.2         |
|    entropy_loss         | -105        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -0.795      |
|    n_updates            | 12510       |
|    policy_gradient_loss | -0.0135     |
|    reward               | 0.017025627 |
|    std                  | 3.53e+06    |
|    value_loss           | 0.917       |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 132          |
|    iterations           | 125

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1262        |
|    time_elapsed         | 1214        |
|    total_timesteps      | 161536      |
| train/                  |             |
|    approx_kl            | 0.035101667 |
|    clip_fraction        | 0.196       |
|    clip_range           | 0.2         |
|    entropy_loss         | -106        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | -0.0941     |
|    n_updates            | 12610       |
|    policy_gradient_loss | -0.0141     |
|    reward               | 0.41345248  |
|    std                  | 2.64e+06    |
|    value_loss           | 2.41        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1263  

------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 1272         |
|    time_elapsed         | 1223         |
|    total_timesteps      | 162816       |
| train/                  |              |
|    approx_kl            | 0.051505648  |
|    clip_fraction        | 0.224        |
|    clip_range           | 0.2          |
|    entropy_loss         | -108         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | -1.16        |
|    n_updates            | 12710        |
|    policy_gradient_loss | -0.0124      |
|    reward               | -0.097166315 |
|    std                  | 3.69e+06     |
|    value_loss           | 0.0552       |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iteration

------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 1282         |
|    time_elapsed         | 1231         |
|    total_timesteps      | 164096       |
| train/                  |              |
|    approx_kl            | 0.031943344  |
|    clip_fraction        | 0.127        |
|    clip_range           | 0.2          |
|    entropy_loss         | -108         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | -1.11        |
|    n_updates            | 12810        |
|    policy_gradient_loss | -0.00993     |
|    reward               | -0.022561004 |
|    std                  | 3.11e+06     |
|    value_loss           | 0.195        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iteration

------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 1292         |
|    time_elapsed         | 1241         |
|    total_timesteps      | 165376       |
| train/                  |              |
|    approx_kl            | 0.029541336  |
|    clip_fraction        | 0.164        |
|    clip_range           | 0.2          |
|    entropy_loss         | -109         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | -0.0255      |
|    n_updates            | 12910        |
|    policy_gradient_loss | -0.00335     |
|    reward               | -0.014539543 |
|    std                  | 3.15e+06     |
|    value_loss           | 2.51         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1302        |
|    time_elapsed         | 1249        |
|    total_timesteps      | 166656      |
| train/                  |             |
|    approx_kl            | 0.02911871  |
|    clip_fraction        | 0.245       |
|    clip_range           | 0.2         |
|    entropy_loss         | -110        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | -1.15       |
|    n_updates            | 13010       |
|    policy_gradient_loss | -0.00791    |
|    reward               | -0.23350182 |
|    std                  | 3.6e+06     |
|    value_loss           | 0.141       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1303  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1312        |
|    time_elapsed         | 1258        |
|    total_timesteps      | 167936      |
| train/                  |             |
|    approx_kl            | 0.032496363 |
|    clip_fraction        | 0.215       |
|    clip_range           | 0.2         |
|    entropy_loss         | -110        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -0.598      |
|    n_updates            | 13110       |
|    policy_gradient_loss | -0.00197    |
|    reward               | -0.10144351 |
|    std                  | 3.66e+06    |
|    value_loss           | 2.21        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1313  

------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 1322         |
|    time_elapsed         | 1268         |
|    total_timesteps      | 169216       |
| train/                  |              |
|    approx_kl            | 0.018421197  |
|    clip_fraction        | 0.175        |
|    clip_range           | 0.2          |
|    entropy_loss         | -112         |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0421       |
|    loss                 | -1.17        |
|    n_updates            | 13210        |
|    policy_gradient_loss | -0.00781     |
|    reward               | -0.020685302 |
|    std                  | 5.06e+06     |
|    value_loss           | 0.0933       |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1332        |
|    time_elapsed         | 1275        |
|    total_timesteps      | 170496      |
| train/                  |             |
|    approx_kl            | 0.018584674 |
|    clip_fraction        | 0.0773      |
|    clip_range           | 0.2         |
|    entropy_loss         | -113        |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 0.0421      |
|    loss                 | 0.764       |
|    n_updates            | 13310       |
|    policy_gradient_loss | -0.0047     |
|    reward               | -0.49446726 |
|    std                  | 5.38e+06    |
|    value_loss           | 4.19        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1333  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1342        |
|    time_elapsed         | 1283        |
|    total_timesteps      | 171776      |
| train/                  |             |
|    approx_kl            | 0.035753872 |
|    clip_fraction        | 0.156       |
|    clip_range           | 0.2         |
|    entropy_loss         | -114        |
|    explained_variance   | 1.79e-07    |
|    learning_rate        | 0.0421      |
|    loss                 | -1.18       |
|    n_updates            | 13410       |
|    policy_gradient_loss | -0.0125     |
|    reward               | 0.024905019 |
|    std                  | 5.45e+06    |
|    value_loss           | 0.183       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1343  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1352        |
|    time_elapsed         | 1296        |
|    total_timesteps      | 173056      |
| train/                  |             |
|    approx_kl            | 0.032334704 |
|    clip_fraction        | 0.134       |
|    clip_range           | 0.2         |
|    entropy_loss         | -114        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | -1.14       |
|    n_updates            | 13510       |
|    policy_gradient_loss | -0.00495    |
|    reward               | 0.04926469  |
|    std                  | 4.51e+06    |
|    value_loss           | 0.214       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1353  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1362        |
|    time_elapsed         | 1302        |
|    total_timesteps      | 174336      |
| train/                  |             |
|    approx_kl            | 0.061261967 |
|    clip_fraction        | 0.258       |
|    clip_range           | 0.2         |
|    entropy_loss         | -115        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | 0.828       |
|    n_updates            | 13610       |
|    policy_gradient_loss | -0.0117     |
|    reward               | 0.7674702   |
|    std                  | 4.35e+06    |
|    value_loss           | 4.24        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1363  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1372        |
|    time_elapsed         | 1311        |
|    total_timesteps      | 175616      |
| train/                  |             |
|    approx_kl            | 0.066023536 |
|    clip_fraction        | 0.295       |
|    clip_range           | 0.2         |
|    entropy_loss         | -117        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -0.503      |
|    n_updates            | 13710       |
|    policy_gradient_loss | -0.0201     |
|    reward               | -0.34724686 |
|    std                  | 6.68e+06    |
|    value_loss           | 1.63        |
-----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 133       |
|    iterations           | 1373      |


-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1382        |
|    time_elapsed         | 1320        |
|    total_timesteps      | 176896      |
| train/                  |             |
|    approx_kl            | 0.07120371  |
|    clip_fraction        | 0.312       |
|    clip_range           | 0.2         |
|    entropy_loss         | -118        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.01       |
|    n_updates            | 13810       |
|    policy_gradient_loss | -0.0125     |
|    reward               | -0.44213808 |
|    std                  | 5.81e+06    |
|    value_loss           | 0.464       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1383  

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1392        |
|    time_elapsed         | 1328        |
|    total_timesteps      | 178176      |
| train/                  |             |
|    approx_kl            | 0.0658758   |
|    clip_fraction        | 0.193       |
|    clip_range           | 0.2         |
|    entropy_loss         | -118        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.23       |
|    n_updates            | 13910       |
|    policy_gradient_loss | -0.013      |
|    reward               | 0.033885404 |
|    std                  | 4.73e+06    |
|    value_loss           | 0.125       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1393  

----------------------------------------
| time/                   |            |
|    fps                  | 134        |
|    iterations           | 1402       |
|    time_elapsed         | 1336       |
|    total_timesteps      | 179456     |
| train/                  |            |
|    approx_kl            | 0.04259461 |
|    clip_fraction        | 0.214      |
|    clip_range           | 0.2        |
|    entropy_loss         | -118       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -0.776     |
|    n_updates            | 14010      |
|    policy_gradient_loss | -0.0147    |
|    reward               | 0.20850022 |
|    std                  | 4.66e+06   |
|    value_loss           | 0.994      |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 134        |
|    iterations           | 1403       |
|    time_elapse

------------------------------------------
| time/                   |              |
|    fps                  | 134          |
|    iterations           | 1412         |
|    time_elapsed         | 1345         |
|    total_timesteps      | 180736       |
| train/                  |              |
|    approx_kl            | 0.027057337  |
|    clip_fraction        | 0.138        |
|    clip_range           | 0.2          |
|    entropy_loss         | -120         |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0421       |
|    loss                 | 12.7         |
|    n_updates            | 14110        |
|    policy_gradient_loss | -0.00891     |
|    reward               | -0.037339527 |
|    std                  | 6.46e+06     |
|    value_loss           | 28.2         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1422        |
|    time_elapsed         | 1352        |
|    total_timesteps      | 182016      |
| train/                  |             |
|    approx_kl            | 0.06360317  |
|    clip_fraction        | 0.265       |
|    clip_range           | 0.2         |
|    entropy_loss         | -120        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.1        |
|    n_updates            | 14210       |
|    policy_gradient_loss | -0.0108     |
|    reward               | -0.08978062 |
|    std                  | 8.34e+06    |
|    value_loss           | 0.37        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 134          |
|    iterations           | 142

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1432        |
|    time_elapsed         | 1361        |
|    total_timesteps      | 183296      |
| train/                  |             |
|    approx_kl            | 0.026821667 |
|    clip_fraction        | 0.135       |
|    clip_range           | 0.2         |
|    entropy_loss         | -121        |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 0.0421      |
|    loss                 | -0.46       |
|    n_updates            | 14310       |
|    policy_gradient_loss | -0.00764    |
|    reward               | 0.41984347  |
|    std                  | 1.3e+07     |
|    value_loss           | 1.88        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 134          |
|    iterations           | 143

----------------------------------------
| time/                   |            |
|    fps                  | 134        |
|    iterations           | 1442       |
|    time_elapsed         | 1369       |
|    total_timesteps      | 184576     |
| train/                  |            |
|    approx_kl            | 0.08449905 |
|    clip_fraction        | 0.292      |
|    clip_range           | 0.2        |
|    entropy_loss         | -122       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -1.26      |
|    n_updates            | 14410      |
|    policy_gradient_loss | -0.0181    |
|    reward               | -0.3819726 |
|    std                  | 1.48e+07   |
|    value_loss           | 0.143      |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1443        |
|    time_el

----------------------------------------
| time/                   |            |
|    fps                  | 134        |
|    iterations           | 1452       |
|    time_elapsed         | 1376       |
|    total_timesteps      | 185856     |
| train/                  |            |
|    approx_kl            | 0.04957698 |
|    clip_fraction        | 0.226      |
|    clip_range           | 0.2        |
|    entropy_loss         | -124       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | 4.01       |
|    n_updates            | 14510      |
|    policy_gradient_loss | -0.018     |
|    reward               | 0.50566536 |
|    std                  | 2.41e+07   |
|    value_loss           | 10.8       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 134        |
|    iterations           | 1453       |
|    time_elapse

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1462        |
|    time_elapsed         | 1385        |
|    total_timesteps      | 187136      |
| train/                  |             |
|    approx_kl            | 0.028557856 |
|    clip_fraction        | 0.131       |
|    clip_range           | 0.2         |
|    entropy_loss         | -124        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.01       |
|    n_updates            | 14610       |
|    policy_gradient_loss | -0.00912    |
|    reward               | 0.080043495 |
|    std                  | 2.43e+07    |
|    value_loss           | 0.686       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1463  

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1472        |
|    time_elapsed         | 1394        |
|    total_timesteps      | 188416      |
| train/                  |             |
|    approx_kl            | 0.039067086 |
|    clip_fraction        | 0.152       |
|    clip_range           | 0.2         |
|    entropy_loss         | -124        |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0421      |
|    loss                 | -1.24       |
|    n_updates            | 14710       |
|    policy_gradient_loss | -0.00996    |
|    reward               | -0.42673776 |
|    std                  | 2.04e+07    |
|    value_loss           | 0.264       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1473  

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1482        |
|    time_elapsed         | 1401        |
|    total_timesteps      | 189696      |
| train/                  |             |
|    approx_kl            | 0.038481846 |
|    clip_fraction        | 0.132       |
|    clip_range           | 0.2         |
|    entropy_loss         | -125        |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 0.0421      |
|    loss                 | 0.42        |
|    n_updates            | 14810       |
|    policy_gradient_loss | -0.00555    |
|    reward               | 1.6496948   |
|    std                  | 2.18e+07    |
|    value_loss           | 3.46        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 135          |
|    iterations           | 148

------------------------------------------
| time/                   |              |
|    fps                  | 135          |
|    iterations           | 1492         |
|    time_elapsed         | 1411         |
|    total_timesteps      | 190976       |
| train/                  |              |
|    approx_kl            | 0.031942602  |
|    clip_fraction        | 0.172        |
|    clip_range           | 0.2          |
|    entropy_loss         | -127         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | -0.464       |
|    n_updates            | 14910        |
|    policy_gradient_loss | -0.0126      |
|    reward               | -0.054824524 |
|    std                  | 2.38e+07     |
|    value_loss           | 1.29         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1502        |
|    time_elapsed         | 1422        |
|    total_timesteps      | 192256      |
| train/                  |             |
|    approx_kl            | 0.035350844 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -128        |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 0.0421      |
|    loss                 | -0.436      |
|    n_updates            | 15010       |
|    policy_gradient_loss | -0.0111     |
|    reward               | 0.4783602   |
|    std                  | 3.09e+07    |
|    value_loss           | 1.33        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 135          |
|    iterations           | 150

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1512        |
|    time_elapsed         | 1430        |
|    total_timesteps      | 193536      |
| train/                  |             |
|    approx_kl            | 0.05961164  |
|    clip_fraction        | 0.323       |
|    clip_range           | 0.2         |
|    entropy_loss         | -129        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.36       |
|    n_updates            | 15110       |
|    policy_gradient_loss | -0.0241     |
|    reward               | 0.040632457 |
|    std                  | 4.27e+07    |
|    value_loss           | 0.157       |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 135          |
|    iterations           | 151

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1522        |
|    time_elapsed         | 1440        |
|    total_timesteps      | 194816      |
| train/                  |             |
|    approx_kl            | 0.042702496 |
|    clip_fraction        | 0.316       |
|    clip_range           | 0.2         |
|    entropy_loss         | -128        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.38       |
|    n_updates            | 15210       |
|    policy_gradient_loss | -0.0198     |
|    reward               | 0.18907985  |
|    std                  | 3.76e+07    |
|    value_loss           | 0.0756      |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1523  

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1532        |
|    time_elapsed         | 1448        |
|    total_timesteps      | 196096      |
| train/                  |             |
|    approx_kl            | 0.048382625 |
|    clip_fraction        | 0.202       |
|    clip_range           | 0.2         |
|    entropy_loss         | -128        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 0.173       |
|    n_updates            | 15310       |
|    policy_gradient_loss | -0.0183     |
|    reward               | -0.22704922 |
|    std                  | 3.66e+07    |
|    value_loss           | 3.13        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1533  

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1542        |
|    time_elapsed         | 1458        |
|    total_timesteps      | 197376      |
| train/                  |             |
|    approx_kl            | 0.04055054  |
|    clip_fraction        | 0.291       |
|    clip_range           | 0.2         |
|    entropy_loss         | -130        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.35       |
|    n_updates            | 15410       |
|    policy_gradient_loss | -0.0171     |
|    reward               | 0.014463271 |
|    std                  | 4.27e+07    |
|    value_loss           | 0.145       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 135        |
|    iterations           | 1543     

------------------------------------------
| time/                   |              |
|    fps                  | 135          |
|    iterations           | 1552         |
|    time_elapsed         | 1467         |
|    total_timesteps      | 198656       |
| train/                  |              |
|    approx_kl            | 0.047934227  |
|    clip_fraction        | 0.16         |
|    clip_range           | 0.2          |
|    entropy_loss         | -131         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | -0.829       |
|    n_updates            | 15510        |
|    policy_gradient_loss | -0.00892     |
|    reward               | -0.018968256 |
|    std                  | 6.3e+07      |
|    value_loss           | 1.26         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1562        |
|    time_elapsed         | 1475        |
|    total_timesteps      | 199936      |
| train/                  |             |
|    approx_kl            | 0.020603433 |
|    clip_fraction        | 0.261       |
|    clip_range           | 0.2         |
|    entropy_loss         | -132        |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 0.0421      |
|    loss                 | -1.43       |
|    n_updates            | 15610       |
|    policy_gradient_loss | -0.0141     |
|    reward               | 0.04375513  |
|    std                  | 8.21e+07    |
|    value_loss           | 0.0562      |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1563  

----------------------------------------
| time/                   |            |
|    fps                  | 135        |
|    iterations           | 1572       |
|    time_elapsed         | 1484       |
|    total_timesteps      | 201216     |
| train/                  |            |
|    approx_kl            | 0.03492123 |
|    clip_fraction        | 0.0977     |
|    clip_range           | 0.2        |
|    entropy_loss         | -133       |
|    explained_variance   | 5.96e-08   |
|    learning_rate        | 0.0421     |
|    loss                 | 1.34       |
|    n_updates            | 15710      |
|    policy_gradient_loss | -0.0105    |
|    reward               | 0.9126368  |
|    std                  | 8.01e+07   |
|    value_loss           | 5.66       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1573        |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1582        |
|    time_elapsed         | 1492        |
|    total_timesteps      | 202496      |
| train/                  |             |
|    approx_kl            | 0.041646875 |
|    clip_fraction        | 0.145       |
|    clip_range           | 0.2         |
|    entropy_loss         | -135        |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 0.0421      |
|    loss                 | -1.37       |
|    n_updates            | 15810       |
|    policy_gradient_loss | -0.0148     |
|    reward               | 0.051506817 |
|    std                  | 8.74e+07    |
|    value_loss           | 0.213       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1583  

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1592        |
|    time_elapsed         | 1499        |
|    total_timesteps      | 203776      |
| train/                  |             |
|    approx_kl            | 0.034458164 |
|    clip_fraction        | 0.159       |
|    clip_range           | 0.2         |
|    entropy_loss         | -135        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.33       |
|    n_updates            | 15910       |
|    policy_gradient_loss | -0.0183     |
|    reward               | -0.59156203 |
|    std                  | 7.71e+07    |
|    value_loss           | 0.385       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 135        |
|    iterations           | 1593     

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1602        |
|    time_elapsed         | 1509        |
|    total_timesteps      | 205056      |
| train/                  |             |
|    approx_kl            | 0.08031568  |
|    clip_fraction        | 0.309       |
|    clip_range           | 0.2         |
|    entropy_loss         | -136        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 0.327       |
|    n_updates            | 16010       |
|    policy_gradient_loss | -0.016      |
|    reward               | -0.40455928 |
|    std                  | 1.11e+08    |
|    value_loss           | 3.21        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 135          |
|    iterations           | 160

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1612        |
|    time_elapsed         | 1518        |
|    total_timesteps      | 206336      |
| train/                  |             |
|    approx_kl            | 0.014881177 |
|    clip_fraction        | 0.0891      |
|    clip_range           | 0.2         |
|    entropy_loss         | -136        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -0.736      |
|    n_updates            | 16110       |
|    policy_gradient_loss | -0.00739    |
|    reward               | 0.5073384   |
|    std                  | 9.91e+07    |
|    value_loss           | 2.01        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1613  

----------------------------------------
| time/                   |            |
|    fps                  | 136        |
|    iterations           | 1622       |
|    time_elapsed         | 1526       |
|    total_timesteps      | 207616     |
| train/                  |            |
|    approx_kl            | 0.10022206 |
|    clip_fraction        | 0.284      |
|    clip_range           | 0.2        |
|    entropy_loss         | -136       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -0.683     |
|    n_updates            | 16210      |
|    policy_gradient_loss | -0.0269    |
|    reward               | -0.5205235 |
|    std                  | 1.11e+08   |
|    value_loss           | 1.7        |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 136         |
|    iterations           | 1623        |
|    time_el

----------------------------------------
| time/                   |            |
|    fps                  | 135        |
|    iterations           | 1632       |
|    time_elapsed         | 1538       |
|    total_timesteps      | 208896     |
| train/                  |            |
|    approx_kl            | 0.06591067 |
|    clip_fraction        | 0.203      |
|    clip_range           | 0.2        |
|    entropy_loss         | -135       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -1.34      |
|    n_updates            | 16310      |
|    policy_gradient_loss | -0.00943   |
|    reward               | 0.1226416  |
|    std                  | 9.67e+07   |
|    value_loss           | 0.301      |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1633        |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1642        |
|    time_elapsed         | 1546        |
|    total_timesteps      | 210176      |
| train/                  |             |
|    approx_kl            | 0.035949335 |
|    clip_fraction        | 0.223       |
|    clip_range           | 0.2         |
|    entropy_loss         | -136        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.05       |
|    n_updates            | 16410       |
|    policy_gradient_loss | -0.00648    |
|    reward               | 1.7433271   |
|    std                  | 1.02e+08    |
|    value_loss           | 0.87        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 135        |
|    iterations           | 1643     

-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1652        |
|    time_elapsed         | 1555        |
|    total_timesteps      | 211456      |
| train/                  |             |
|    approx_kl            | 0.040250286 |
|    clip_fraction        | 0.132       |
|    clip_range           | 0.2         |
|    entropy_loss         | -136        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | 4.23        |
|    n_updates            | 16510       |
|    policy_gradient_loss | -0.00903    |
|    reward               | -0.08756077 |
|    std                  | 9.54e+07    |
|    value_loss           | 12.3        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 1653  

----------------------------------------
| time/                   |            |
|    fps                  | 135        |
|    iterations           | 1662       |
|    time_elapsed         | 1565       |
|    total_timesteps      | 212736     |
| train/                  |            |
|    approx_kl            | 0.08088596 |
|    clip_fraction        | 0.338      |
|    clip_range           | 0.2        |
|    entropy_loss         | -136       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -1.26      |
|    n_updates            | 16610      |
|    policy_gradient_loss | -0.0218    |
|    reward               | 0.17461075 |
|    std                  | 8.2e+07    |
|    value_loss           | 0.49       |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 135          |
|    iterations           | 1663         |
|    tim

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1672        |
|    time_elapsed         | 1588        |
|    total_timesteps      | 214016      |
| train/                  |             |
|    approx_kl            | 0.030562777 |
|    clip_fraction        | 0.158       |
|    clip_range           | 0.2         |
|    entropy_loss         | -137        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -0.0342     |
|    n_updates            | 16710       |
|    policy_gradient_loss | -0.0117     |
|    reward               | 0.15700302  |
|    std                  | 1.06e+08    |
|    value_loss           | 2.96        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1673  

----------------------------------------
| time/                   |            |
|    fps                  | 134        |
|    iterations           | 1682       |
|    time_elapsed         | 1597       |
|    total_timesteps      | 215296     |
| train/                  |            |
|    approx_kl            | 0.06705527 |
|    clip_fraction        | 0.192      |
|    clip_range           | 0.2        |
|    entropy_loss         | -137       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -1.48      |
|    n_updates            | 16810      |
|    policy_gradient_loss | -0.0197    |
|    reward               | 0.12423276 |
|    std                  | 1.07e+08   |
|    value_loss           | 0.133      |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 134          |
|    iterations           | 1683         |
|    tim

----------------------------------------
| time/                   |            |
|    fps                  | 134        |
|    iterations           | 1692       |
|    time_elapsed         | 1605       |
|    total_timesteps      | 216576     |
| train/                  |            |
|    approx_kl            | 0.06382438 |
|    clip_fraction        | 0.227      |
|    clip_range           | 0.2        |
|    entropy_loss         | -138       |
|    explained_variance   | 5.96e-08   |
|    learning_rate        | 0.0421     |
|    loss                 | -1.07      |
|    n_updates            | 16910      |
|    policy_gradient_loss | -0.0149    |
|    reward               | 1.2466465  |
|    std                  | 1.19e+08   |
|    value_loss           | 1.09       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1693        |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1702        |
|    time_elapsed         | 1615        |
|    total_timesteps      | 217856      |
| train/                  |             |
|    approx_kl            | 0.063044354 |
|    clip_fraction        | 0.24        |
|    clip_range           | 0.2         |
|    entropy_loss         | -139        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.21       |
|    n_updates            | 17010       |
|    policy_gradient_loss | -0.0238     |
|    reward               | 0.1477433   |
|    std                  | 1.16e+08    |
|    value_loss           | 0.615       |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 134          |
|    iterations           | 170

------------------------------------------
| time/                   |              |
|    fps                  | 134          |
|    iterations           | 1712         |
|    time_elapsed         | 1627         |
|    total_timesteps      | 219136       |
| train/                  |              |
|    approx_kl            | 0.049480237  |
|    clip_fraction        | 0.173        |
|    clip_range           | 0.2          |
|    entropy_loss         | -139         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | -1.38        |
|    n_updates            | 17110        |
|    policy_gradient_loss | -0.0177      |
|    reward               | 0.0085952645 |
|    std                  | 1.37e+08     |
|    value_loss           | 0.304        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iteration

-------------------------------------------
| time/                   |               |
|    fps                  | 134           |
|    iterations           | 1722          |
|    time_elapsed         | 1638          |
|    total_timesteps      | 220416        |
| train/                  |               |
|    approx_kl            | 0.056771666   |
|    clip_fraction        | 0.259         |
|    clip_range           | 0.2           |
|    entropy_loss         | -141          |
|    explained_variance   | 0             |
|    learning_rate        | 0.0421        |
|    loss                 | -0.211        |
|    n_updates            | 17210         |
|    policy_gradient_loss | -0.0113       |
|    reward               | -0.0024634001 |
|    std                  | 2.24e+08      |
|    value_loss           | 2.85          |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134     

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1732        |
|    time_elapsed         | 1646        |
|    total_timesteps      | 221696      |
| train/                  |             |
|    approx_kl            | 0.050016575 |
|    clip_fraction        | 0.174       |
|    clip_range           | 0.2         |
|    entropy_loss         | -143        |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0421      |
|    loss                 | -1.35       |
|    n_updates            | 17310       |
|    policy_gradient_loss | -0.0134     |
|    reward               | 0.06610496  |
|    std                  | 2.74e+08    |
|    value_loss           | 0.403       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1733  

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1742        |
|    time_elapsed         | 1656        |
|    total_timesteps      | 222976      |
| train/                  |             |
|    approx_kl            | 0.027047265 |
|    clip_fraction        | 0.144       |
|    clip_range           | 0.2         |
|    entropy_loss         | -143        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | -1.23       |
|    n_updates            | 17410       |
|    policy_gradient_loss | -0.0122     |
|    reward               | 0.37848577  |
|    std                  | 2.87e+08    |
|    value_loss           | 0.677       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1743  

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1752        |
|    time_elapsed         | 1670        |
|    total_timesteps      | 224256      |
| train/                  |             |
|    approx_kl            | 0.08219044  |
|    clip_fraction        | 0.398       |
|    clip_range           | 0.2         |
|    entropy_loss         | -144        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.5        |
|    n_updates            | 17510       |
|    policy_gradient_loss | -0.0181     |
|    reward               | -0.18117848 |
|    std                  | 3.85e+08    |
|    value_loss           | 0.135       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1753  

----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 1762       |
|    time_elapsed         | 1683       |
|    total_timesteps      | 225536     |
| train/                  |            |
|    approx_kl            | 0.04180587 |
|    clip_fraction        | 0.233      |
|    clip_range           | 0.2        |
|    entropy_loss         | -144       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -0.132     |
|    n_updates            | 17610      |
|    policy_gradient_loss | -0.0133    |
|    reward               | 0.53099346 |
|    std                  | 3.6e+08    |
|    value_loss           | 2.24       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1763        |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1772        |
|    time_elapsed         | 1692        |
|    total_timesteps      | 226816      |
| train/                  |             |
|    approx_kl            | 0.08171045  |
|    clip_fraction        | 0.261       |
|    clip_range           | 0.2         |
|    entropy_loss         | -145        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.56       |
|    n_updates            | 17710       |
|    policy_gradient_loss | -0.0214     |
|    reward               | -0.11283528 |
|    std                  | 4.13e+08    |
|    value_loss           | 0.274       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1773  

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1782        |
|    time_elapsed         | 1699        |
|    total_timesteps      | 228096      |
| train/                  |             |
|    approx_kl            | 0.07164796  |
|    clip_fraction        | 0.358       |
|    clip_range           | 0.2         |
|    entropy_loss         | -144        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.43       |
|    n_updates            | 17810       |
|    policy_gradient_loss | -0.0134     |
|    reward               | 0.051021036 |
|    std                  | 3.83e+08    |
|    value_loss           | 0.307       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1783  

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1792        |
|    time_elapsed         | 1710        |
|    total_timesteps      | 229376      |
| train/                  |             |
|    approx_kl            | 0.027013918 |
|    clip_fraction        | 0.168       |
|    clip_range           | 0.2         |
|    entropy_loss         | -143        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -0.981      |
|    n_updates            | 17910       |
|    policy_gradient_loss | -0.00617    |
|    reward               | 0.3025829   |
|    std                  | 2.62e+08    |
|    value_loss           | 1.01        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1793  

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1802        |
|    time_elapsed         | 1718        |
|    total_timesteps      | 230656      |
| train/                  |             |
|    approx_kl            | 0.008028827 |
|    clip_fraction        | 0.0508      |
|    clip_range           | 0.2         |
|    entropy_loss         | -145        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.26       |
|    n_updates            | 18010       |
|    policy_gradient_loss | -0.00199    |
|    reward               | 0.36048532  |
|    std                  | 3.62e+08    |
|    value_loss           | 0.64        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1803  

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1812        |
|    time_elapsed         | 1728        |
|    total_timesteps      | 231936      |
| train/                  |             |
|    approx_kl            | 0.048399515 |
|    clip_fraction        | 0.153       |
|    clip_range           | 0.2         |
|    entropy_loss         | -146        |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 0.0421      |
|    loss                 | -1.1        |
|    n_updates            | 18110       |
|    policy_gradient_loss | -0.0198     |
|    reward               | 0.2918966   |
|    std                  | 4.85e+08    |
|    value_loss           | 1.6         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1813  

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1822        |
|    time_elapsed         | 1736        |
|    total_timesteps      | 233216      |
| train/                  |             |
|    approx_kl            | 0.045874774 |
|    clip_fraction        | 0.142       |
|    clip_range           | 0.2         |
|    entropy_loss         | -147        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.58       |
|    n_updates            | 18210       |
|    policy_gradient_loss | -0.012      |
|    reward               | 0.03442038  |
|    std                  | 5.09e+08    |
|    value_loss           | 0.132       |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 134          |
|    iterations           | 182

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1832        |
|    time_elapsed         | 1744        |
|    total_timesteps      | 234496      |
| train/                  |             |
|    approx_kl            | 0.16890714  |
|    clip_fraction        | 0.347       |
|    clip_range           | 0.2         |
|    entropy_loss         | -149        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | -1.36       |
|    n_updates            | 18310       |
|    policy_gradient_loss | -0.023      |
|    reward               | 0.010900654 |
|    std                  | 6.39e+08    |
|    value_loss           | 0.574       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1833  

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1842        |
|    time_elapsed         | 1754        |
|    total_timesteps      | 235776      |
| train/                  |             |
|    approx_kl            | 0.014375048 |
|    clip_fraction        | 0.0734      |
|    clip_range           | 0.2         |
|    entropy_loss         | -149        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 4.57        |
|    n_updates            | 18410       |
|    policy_gradient_loss | -0.000708   |
|    reward               | 0.0465636   |
|    std                  | 8.38e+08    |
|    value_loss           | 13.2        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1843  

----------------------------------------
| time/                   |            |
|    fps                  | 134        |
|    iterations           | 1852       |
|    time_elapsed         | 1763       |
|    total_timesteps      | 237056     |
| train/                  |            |
|    approx_kl            | 0.08538254 |
|    clip_fraction        | 0.42       |
|    clip_range           | 0.2        |
|    entropy_loss         | -150       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -1.61      |
|    n_updates            | 18510      |
|    policy_gradient_loss | -0.0263    |
|    reward               | 0.1726939  |
|    std                  | 1.09e+09   |
|    value_loss           | 0.0818     |
----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 134           |
|    iterations           | 1853          |
|   

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1862        |
|    time_elapsed         | 1773        |
|    total_timesteps      | 238336      |
| train/                  |             |
|    approx_kl            | 0.012149058 |
|    clip_fraction        | 0.0609      |
|    clip_range           | 0.2         |
|    entropy_loss         | -151        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.09       |
|    n_updates            | 18610       |
|    policy_gradient_loss | -0.0096     |
|    reward               | 0.07610658  |
|    std                  | 1.29e+09    |
|    value_loss           | 1.77        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1863  

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1872        |
|    time_elapsed         | 1781        |
|    total_timesteps      | 239616      |
| train/                  |             |
|    approx_kl            | 0.007892037 |
|    clip_fraction        | 0.0281      |
|    clip_range           | 0.2         |
|    entropy_loss         | -152        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.48       |
|    n_updates            | 18710       |
|    policy_gradient_loss | -0.00442    |
|    reward               | 0.28967774  |
|    std                  | 1.62e+09    |
|    value_loss           | 0.477       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1873  

-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1882        |
|    time_elapsed         | 1795        |
|    total_timesteps      | 240896      |
| train/                  |             |
|    approx_kl            | 0.038158663 |
|    clip_fraction        | 0.236       |
|    clip_range           | 0.2         |
|    entropy_loss         | -153        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -0.43       |
|    n_updates            | 18810       |
|    policy_gradient_loss | -0.0186     |
|    reward               | 0.90350384  |
|    std                  | 1.8e+09     |
|    value_loss           | 2.74        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 1883  

------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 1892         |
|    time_elapsed         | 1807         |
|    total_timesteps      | 242176       |
| train/                  |              |
|    approx_kl            | 0.01972408   |
|    clip_fraction        | 0.0914       |
|    clip_range           | 0.2          |
|    entropy_loss         | -154         |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0421       |
|    loss                 | -1.51        |
|    n_updates            | 18910        |
|    policy_gradient_loss | -0.0128      |
|    reward               | -0.034116406 |
|    std                  | 2.11e+09     |
|    value_loss           | 0.405        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1902        |
|    time_elapsed         | 1819        |
|    total_timesteps      | 243456      |
| train/                  |             |
|    approx_kl            | 0.022170277 |
|    clip_fraction        | 0.125       |
|    clip_range           | 0.2         |
|    entropy_loss         | -155        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.46       |
|    n_updates            | 19010       |
|    policy_gradient_loss | -0.0029     |
|    reward               | 0.12931333  |
|    std                  | 2.73e+09    |
|    value_loss           | 0.168       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1903  

----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 1912       |
|    time_elapsed         | 1828       |
|    total_timesteps      | 244736     |
| train/                  |            |
|    approx_kl            | 0.06920603 |
|    clip_fraction        | 0.235      |
|    clip_range           | 0.2        |
|    entropy_loss         | -156       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -0.873     |
|    n_updates            | 19110      |
|    policy_gradient_loss | -0.0214    |
|    reward               | -1.8535466 |
|    std                  | 2.78e+09   |
|    value_loss           | 1.74       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1913        |
|    time_el

----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 1922       |
|    time_elapsed         | 1838       |
|    total_timesteps      | 246016     |
| train/                  |            |
|    approx_kl            | 0.12275859 |
|    clip_fraction        | 0.327      |
|    clip_range           | 0.2        |
|    entropy_loss         | -157       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -1.42      |
|    n_updates            | 19210      |
|    policy_gradient_loss | -0.0196    |
|    reward               | -0.2501251 |
|    std                  | 3.51e+09   |
|    value_loss           | 0.662      |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 1923         |
|    tim

------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 1932         |
|    time_elapsed         | 1847         |
|    total_timesteps      | 247296       |
| train/                  |              |
|    approx_kl            | 0.024794018  |
|    clip_fraction        | 0.12         |
|    clip_range           | 0.2          |
|    entropy_loss         | -157         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | -1.2         |
|    n_updates            | 19310        |
|    policy_gradient_loss | -0.0109      |
|    reward               | -0.042062666 |
|    std                  | 3e+09        |
|    value_loss           | 1.26         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1942        |
|    time_elapsed         | 1855        |
|    total_timesteps      | 248576      |
| train/                  |             |
|    approx_kl            | 0.08085145  |
|    clip_fraction        | 0.352       |
|    clip_range           | 0.2         |
|    entropy_loss         | -158        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.73       |
|    n_updates            | 19410       |
|    policy_gradient_loss | -0.0233     |
|    reward               | 0.009505894 |
|    std                  | 2.59e+09    |
|    value_loss           | 0.0659      |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1943  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1952        |
|    time_elapsed         | 1866        |
|    total_timesteps      | 249856      |
| train/                  |             |
|    approx_kl            | 0.052077718 |
|    clip_fraction        | 0.241       |
|    clip_range           | 0.2         |
|    entropy_loss         | -159        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.42       |
|    n_updates            | 19510       |
|    policy_gradient_loss | -0.014      |
|    reward               | -0.8237791  |
|    std                  | 6.13e+09    |
|    value_loss           | 0.59        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1953  

------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 1962         |
|    time_elapsed         | 1877         |
|    total_timesteps      | 251136       |
| train/                  |              |
|    approx_kl            | 0.057379995  |
|    clip_fraction        | 0.237        |
|    clip_range           | 0.2          |
|    entropy_loss         | -160         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 6.85         |
|    n_updates            | 19610        |
|    policy_gradient_loss | -0.0201      |
|    reward               | -0.062932625 |
|    std                  | 8.31e+09     |
|    value_loss           | 16.7         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterat

------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 1972         |
|    time_elapsed         | 1888         |
|    total_timesteps      | 252416       |
| train/                  |              |
|    approx_kl            | 0.078190625  |
|    clip_fraction        | 0.378        |
|    clip_range           | 0.2          |
|    entropy_loss         | -161         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | -1.74        |
|    n_updates            | 19710        |
|    policy_gradient_loss | -0.0307      |
|    reward               | -0.010669843 |
|    std                  | 8.74e+09     |
|    value_loss           | 0.11         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iteration

----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 1982       |
|    time_elapsed         | 1899       |
|    total_timesteps      | 253696     |
| train/                  |            |
|    approx_kl            | 0.03819165 |
|    clip_fraction        | 0.138      |
|    clip_range           | 0.2        |
|    entropy_loss         | -161       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -1.4       |
|    n_updates            | 19810      |
|    policy_gradient_loss | -0.0125    |
|    reward               | 0.34510374 |
|    std                  | 7.8e+09    |
|    value_loss           | 1.25       |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 1983         |
|    tim

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1992        |
|    time_elapsed         | 1908        |
|    total_timesteps      | 254976      |
| train/                  |             |
|    approx_kl            | 0.052856486 |
|    clip_fraction        | 0.188       |
|    clip_range           | 0.2         |
|    entropy_loss         | -161        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | -1.73       |
|    n_updates            | 19910       |
|    policy_gradient_loss | -0.0109     |
|    reward               | -0.2123525  |
|    std                  | 9.52e+09    |
|    value_loss           | 0.0775      |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 1993  

----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2002       |
|    time_elapsed         | 1917       |
|    total_timesteps      | 256256     |
| train/                  |            |
|    approx_kl            | 0.0686448  |
|    clip_fraction        | 0.27       |
|    clip_range           | 0.2        |
|    entropy_loss         | -161       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -0.319     |
|    n_updates            | 20010      |
|    policy_gradient_loss | -0.0153    |
|    reward               | -0.2371111 |
|    std                  | 1.5e+10    |
|    value_loss           | 2.75       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2003        |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2012        |
|    time_elapsed         | 1927        |
|    total_timesteps      | 257536      |
| train/                  |             |
|    approx_kl            | 0.048532397 |
|    clip_fraction        | 0.227       |
|    clip_range           | 0.2         |
|    entropy_loss         | -162        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.57       |
|    n_updates            | 20110       |
|    policy_gradient_loss | -0.019      |
|    reward               | 0.15904024  |
|    std                  | 2.27e+10    |
|    value_loss           | 0.448       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2013  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2022        |
|    time_elapsed         | 1936        |
|    total_timesteps      | 258816      |
| train/                  |             |
|    approx_kl            | 0.06480865  |
|    clip_fraction        | 0.32        |
|    clip_range           | 0.2         |
|    entropy_loss         | -164        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.73       |
|    n_updates            | 20210       |
|    policy_gradient_loss | -0.0106     |
|    reward               | -0.17830202 |
|    std                  | 2.16e+10    |
|    value_loss           | 0.157       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2023  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2032        |
|    time_elapsed         | 1946        |
|    total_timesteps      | 260096      |
| train/                  |             |
|    approx_kl            | 0.021535814 |
|    clip_fraction        | 0.11        |
|    clip_range           | 0.2         |
|    entropy_loss         | -166        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1          |
|    n_updates            | 20310       |
|    policy_gradient_loss | -0.0098     |
|    reward               | 0.29385197  |
|    std                  | 2.75e+10    |
|    value_loss           | 1.66        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2033  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2042        |
|    time_elapsed         | 1955        |
|    total_timesteps      | 261376      |
| train/                  |             |
|    approx_kl            | 0.056979015 |
|    clip_fraction        | 0.177       |
|    clip_range           | 0.2         |
|    entropy_loss         | -168        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.68       |
|    n_updates            | 20410       |
|    policy_gradient_loss | -0.00992    |
|    reward               | -0.07226068 |
|    std                  | 2.65e+10    |
|    value_loss           | 0.395       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2043     

------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 2052         |
|    time_elapsed         | 1965         |
|    total_timesteps      | 262656       |
| train/                  |              |
|    approx_kl            | 0.0132466825 |
|    clip_fraction        | 0.0539       |
|    clip_range           | 0.2          |
|    entropy_loss         | -170         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 1.16         |
|    n_updates            | 20510        |
|    policy_gradient_loss | -0.000531    |
|    reward               | 2.3370686    |
|    std                  | 2.11e+10     |
|    value_loss           | 6.43         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iteration

------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 2062         |
|    time_elapsed         | 1974         |
|    total_timesteps      | 263936       |
| train/                  |              |
|    approx_kl            | 0.02355377   |
|    clip_fraction        | 0.141        |
|    clip_range           | 0.2          |
|    entropy_loss         | -171         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | -1.78        |
|    n_updates            | 20610        |
|    policy_gradient_loss | -0.00668     |
|    reward               | -0.016248614 |
|    std                  | 3.05e+10     |
|    value_loss           | 0.147        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2072        |
|    time_elapsed         | 1988        |
|    total_timesteps      | 265216      |
| train/                  |             |
|    approx_kl            | 0.028939659 |
|    clip_fraction        | 0.202       |
|    clip_range           | 0.2         |
|    entropy_loss         | -171        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.51       |
|    n_updates            | 20710       |
|    policy_gradient_loss | -0.00627    |
|    reward               | 0.5495424   |
|    std                  | 3.61e+10    |
|    value_loss           | 0.63        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2073     

------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 2082         |
|    time_elapsed         | 2001         |
|    total_timesteps      | 266496       |
| train/                  |              |
|    approx_kl            | 0.018920787  |
|    clip_fraction        | 0.0953       |
|    clip_range           | 0.2          |
|    entropy_loss         | -170         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | 3.56         |
|    n_updates            | 20810        |
|    policy_gradient_loss | -0.00537     |
|    reward               | -0.002960077 |
|    std                  | 3.53e+10     |
|    value_loss           | 12.4         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iteration

------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 2092         |
|    time_elapsed         | 2010         |
|    total_timesteps      | 267776       |
| train/                  |              |
|    approx_kl            | 0.059363     |
|    clip_fraction        | 0.252        |
|    clip_range           | 0.2          |
|    entropy_loss         | -170         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | -1.7         |
|    n_updates            | 20910        |
|    policy_gradient_loss | -0.00581     |
|    reward               | -0.076793745 |
|    std                  | 2.33e+10     |
|    value_loss           | 0.244        |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2102        |
|    time_elapsed         | 2020        |
|    total_timesteps      | 269056      |
| train/                  |             |
|    approx_kl            | 0.044206336 |
|    clip_fraction        | 0.214       |
|    clip_range           | 0.2         |
|    entropy_loss         | -171        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.23       |
|    n_updates            | 21010       |
|    policy_gradient_loss | -0.0182     |
|    reward               | 0.2301277   |
|    std                  | 2.94e+10    |
|    value_loss           | 1.33        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2103  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2112        |
|    time_elapsed         | 2031        |
|    total_timesteps      | 270336      |
| train/                  |             |
|    approx_kl            | 0.065983005 |
|    clip_fraction        | 0.224       |
|    clip_range           | 0.2         |
|    entropy_loss         | -174        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | -1.83       |
|    n_updates            | 21110       |
|    policy_gradient_loss | -0.0175     |
|    reward               | 0.42178237  |
|    std                  | 2.95e+10    |
|    value_loss           | 0.21        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2113  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2122        |
|    time_elapsed         | 2042        |
|    total_timesteps      | 271616      |
| train/                  |             |
|    approx_kl            | 0.020950008 |
|    clip_fraction        | 0.205       |
|    clip_range           | 0.2         |
|    entropy_loss         | -174        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | -0.648      |
|    n_updates            | 21210       |
|    policy_gradient_loss | -0.00589    |
|    reward               | 0.51488554  |
|    std                  | 2.77e+10    |
|    value_loss           | 2.49        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2123     

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2132        |
|    time_elapsed         | 2050        |
|    total_timesteps      | 272896      |
| train/                  |             |
|    approx_kl            | 0.037920166 |
|    clip_fraction        | 0.177       |
|    clip_range           | 0.2         |
|    entropy_loss         | -176        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.83       |
|    n_updates            | 21310       |
|    policy_gradient_loss | -0.00829    |
|    reward               | -0.04112985 |
|    std                  | 3.69e+10    |
|    value_loss           | 0.193       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2133  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2142        |
|    time_elapsed         | 2056        |
|    total_timesteps      | 274176      |
| train/                  |             |
|    approx_kl            | 0.022739002 |
|    clip_fraction        | 0.138       |
|    clip_range           | 0.2         |
|    entropy_loss         | -176        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.58       |
|    n_updates            | 21410       |
|    policy_gradient_loss | -0.0109     |
|    reward               | 0.033961084 |
|    std                  | 2.71e+10    |
|    value_loss           | 0.715       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2143     

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2152        |
|    time_elapsed         | 2063        |
|    total_timesteps      | 275456      |
| train/                  |             |
|    approx_kl            | 0.02163313  |
|    clip_fraction        | 0.159       |
|    clip_range           | 0.2         |
|    entropy_loss         | -177        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | 0.481       |
|    n_updates            | 21510       |
|    policy_gradient_loss | 0.00675     |
|    reward               | -0.15331037 |
|    std                  | 2.83e+10    |
|    value_loss           | 4.6         |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 133           |
|    iterations           | 

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2162        |
|    time_elapsed         | 2074        |
|    total_timesteps      | 276736      |
| train/                  |             |
|    approx_kl            | 0.041785367 |
|    clip_fraction        | 0.204       |
|    clip_range           | 0.2         |
|    entropy_loss         | -178        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | -1.31       |
|    n_updates            | 21610       |
|    policy_gradient_loss | -0.0139     |
|    reward               | -0.07377975 |
|    std                  | 2.29e+10    |
|    value_loss           | 0.808       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2163     

----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2172       |
|    time_elapsed         | 2082       |
|    total_timesteps      | 278016     |
| train/                  |            |
|    approx_kl            | 0.07535258 |
|    clip_fraction        | 0.201      |
|    clip_range           | 0.2        |
|    entropy_loss         | -178       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0421     |
|    loss                 | -1.08      |
|    n_updates            | 21710      |
|    policy_gradient_loss | -0.0158    |
|    reward               | 1.010347   |
|    std                  | 1.67e+10   |
|    value_loss           | 1.69       |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 2173         |
|    tim

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2182        |
|    time_elapsed         | 2092        |
|    total_timesteps      | 279296      |
| train/                  |             |
|    approx_kl            | 0.039197538 |
|    clip_fraction        | 0.211       |
|    clip_range           | 0.2         |
|    entropy_loss         | -179        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.75       |
|    n_updates            | 21810       |
|    policy_gradient_loss | -0.0109     |
|    reward               | -0.09793375 |
|    std                  | 1.69e+10    |
|    value_loss           | 0.292       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2183  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2192        |
|    time_elapsed         | 2106        |
|    total_timesteps      | 280576      |
| train/                  |             |
|    approx_kl            | 0.022388492 |
|    clip_fraction        | 0.1         |
|    clip_range           | 0.2         |
|    entropy_loss         | -181        |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0421      |
|    loss                 | -1.59       |
|    n_updates            | 21910       |
|    policy_gradient_loss | -0.0194     |
|    reward               | -0.0375754  |
|    std                  | 2.19e+10    |
|    value_loss           | 0.855       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2193  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2202        |
|    time_elapsed         | 2116        |
|    total_timesteps      | 281856      |
| train/                  |             |
|    approx_kl            | 0.045132257 |
|    clip_fraction        | 0.151       |
|    clip_range           | 0.2         |
|    entropy_loss         | -182        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 2.5         |
|    n_updates            | 22010       |
|    policy_gradient_loss | -0.0163     |
|    reward               | -0.11097402 |
|    std                  | 2.22e+10    |
|    value_loss           | 9.72        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2203  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2212        |
|    time_elapsed         | 2125        |
|    total_timesteps      | 283136      |
| train/                  |             |
|    approx_kl            | 0.10999675  |
|    clip_fraction        | 0.301       |
|    clip_range           | 0.2         |
|    entropy_loss         | -183        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.76       |
|    n_updates            | 22110       |
|    policy_gradient_loss | -0.0163     |
|    reward               | 0.026235629 |
|    std                  | 2.08e+10    |
|    value_loss           | 0.408       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2213  

----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2222       |
|    time_elapsed         | 2134       |
|    total_timesteps      | 284416     |
| train/                  |            |
|    approx_kl            | 0.06819006 |
|    clip_fraction        | 0.24       |
|    clip_range           | 0.2        |
|    entropy_loss         | -183       |
|    explained_variance   | 1.19e-07   |
|    learning_rate        | 0.0421     |
|    loss                 | -1         |
|    n_updates            | 22210      |
|    policy_gradient_loss | -0.0189    |
|    reward               | 0.32161582 |
|    std                  | 2.22e+10   |
|    value_loss           | 1.91       |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 133       |
|    iterations           | 2223      |
|    time_elapsed   

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2232        |
|    time_elapsed         | 2146        |
|    total_timesteps      | 285696      |
| train/                  |             |
|    approx_kl            | 0.032568205 |
|    clip_fraction        | 0.272       |
|    clip_range           | 0.2         |
|    entropy_loss         | -186        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -2          |
|    n_updates            | 22310       |
|    policy_gradient_loss | -0.0185     |
|    reward               | -0.02296817 |
|    std                  | 3.18e+10    |
|    value_loss           | 0.14        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2233  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2242        |
|    time_elapsed         | 2155        |
|    total_timesteps      | 286976      |
| train/                  |             |
|    approx_kl            | 0.021734754 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -186        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0421      |
|    loss                 | -1.77       |
|    n_updates            | 22410       |
|    policy_gradient_loss | -0.00636    |
|    reward               | -0.10911515 |
|    std                  | 4.31e+10    |
|    value_loss           | 0.478       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2243  

-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 2252        |
|    time_elapsed         | 2167        |
|    total_timesteps      | 288256      |
| train/                  |             |
|    approx_kl            | 0.015653843 |
|    clip_fraction        | 0.0781      |
|    clip_range           | 0.2         |
|    entropy_loss         | -185        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.7        |
|    n_updates            | 22510       |
|    policy_gradient_loss | -0.00462    |
|    reward               | 0.049568854 |
|    std                  | 4.7e+10     |
|    value_loss           | 0.879       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2253     

----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2262       |
|    time_elapsed         | 2175       |
|    total_timesteps      | 289536     |
| train/                  |            |
|    approx_kl            | 0.04162695 |
|    clip_fraction        | 0.255      |
|    clip_range           | 0.2        |
|    entropy_loss         | -186       |
|    explained_variance   | -1.19e-07  |
|    learning_rate        | 0.0421     |
|    loss                 | -1.77      |
|    n_updates            | 22610      |
|    policy_gradient_loss | -0.0106    |
|    reward               | 0.51580137 |
|    std                  | 4.34e+10   |
|    value_loss           | 0.583      |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2263        |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2272        |
|    time_elapsed         | 2183        |
|    total_timesteps      | 290816      |
| train/                  |             |
|    approx_kl            | 0.059366267 |
|    clip_fraction        | 0.184       |
|    clip_range           | 0.2         |
|    entropy_loss         | -188        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | 2.49        |
|    n_updates            | 22710       |
|    policy_gradient_loss | -0.00996    |
|    reward               | 0.110924035 |
|    std                  | 4.81e+10    |
|    value_loss           | 9.09        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2273  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2282        |
|    time_elapsed         | 2194        |
|    total_timesteps      | 292096      |
| train/                  |             |
|    approx_kl            | 0.047052402 |
|    clip_fraction        | 0.263       |
|    clip_range           | 0.2         |
|    entropy_loss         | -189        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -1.91       |
|    n_updates            | 22810       |
|    policy_gradient_loss | -0.00848    |
|    reward               | 0.11517123  |
|    std                  | 6.18e+10    |
|    value_loss           | 0.293       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2283     

----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2292       |
|    time_elapsed         | 2202       |
|    total_timesteps      | 293376     |
| train/                  |            |
|    approx_kl            | 0.04634144 |
|    clip_fraction        | 0.189      |
|    clip_range           | 0.2        |
|    entropy_loss         | -190       |
|    explained_variance   | 1.19e-07   |
|    learning_rate        | 0.0421     |
|    loss                 | -1.77      |
|    n_updates            | 22910      |
|    policy_gradient_loss | -0.0174    |
|    reward               | 0.8316935  |
|    std                  | 6.45e+10   |
|    value_loss           | 0.602      |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2293        |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2302        |
|    time_elapsed         | 2215        |
|    total_timesteps      | 294656      |
| train/                  |             |
|    approx_kl            | 0.08546441  |
|    clip_fraction        | 0.352       |
|    clip_range           | 0.2         |
|    entropy_loss         | -190        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -2.03       |
|    n_updates            | 23010       |
|    policy_gradient_loss | -0.0222     |
|    reward               | -0.14137077 |
|    std                  | 9.45e+10    |
|    value_loss           | 0.158       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2303     

------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 2312         |
|    time_elapsed         | 2224         |
|    total_timesteps      | 295936       |
| train/                  |              |
|    approx_kl            | 0.0075497217 |
|    clip_fraction        | 0.0312       |
|    clip_range           | 0.2          |
|    entropy_loss         | -190         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0421       |
|    loss                 | -0.648       |
|    n_updates            | 23110        |
|    policy_gradient_loss | -0.00651     |
|    reward               | -0.9389227   |
|    std                  | 8.99e+10     |
|    value_loss           | 2.84         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2322        |
|    time_elapsed         | 2233        |
|    total_timesteps      | 297216      |
| train/                  |             |
|    approx_kl            | 0.033044998 |
|    clip_fraction        | 0.14        |
|    clip_range           | 0.2         |
|    entropy_loss         | -192        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -2.1        |
|    n_updates            | 23210       |
|    policy_gradient_loss | -0.0148     |
|    reward               | 0.1464799   |
|    std                  | 1e+11       |
|    value_loss           | 0.169       |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 232

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2332        |
|    time_elapsed         | 2242        |
|    total_timesteps      | 298496      |
| train/                  |             |
|    approx_kl            | 0.015630743 |
|    clip_fraction        | 0.191       |
|    clip_range           | 0.2         |
|    entropy_loss         | -191        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0421      |
|    loss                 | -2.04       |
|    n_updates            | 23310       |
|    policy_gradient_loss | -0.00811    |
|    reward               | -0.51166147 |
|    std                  | 1.17e+11    |
|    value_loss           | 0.115       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2333  

-----------------------------------------
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2342        |
|    time_elapsed         | 2252        |
|    total_timesteps      | 299776      |
| train/                  |             |
|    approx_kl            | 0.040756315 |
|    clip_fraction        | 0.255       |
|    clip_range           | 0.2         |
|    entropy_loss         | -194        |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0421      |
|    loss                 | -0.749      |
|    n_updates            | 23410       |
|    policy_gradient_loss | -0.00589    |
|    reward               | -1.9736437  |
|    std                  | 1.87e+11    |
|    value_loss           | 2.77        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 2343     

0,1
entropy_loss,████▇▇▇▇▇▆▆▆▆▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
loss,▁▃▂█▂▁▁▁▁▂▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
value_loss,▁▃▁█▁▁▁▁▁▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
entropy_loss,-194.55592
loss,3.77078
value_loss,11.82341


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 27bm6zep with config:
[34m[1mwandb[0m: 	batch_size: 283
[34m[1mwandb[0m: 	ent_coef: 0.09465194189906524
[34m[1mwandb[0m: 	learning_rate: 0.08132518883096407
[34m[1mwandb[0m: 	n_steps: 4098
[34m[1mwandb[0m: 	total_timesteps: 200000


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01678674236666969, max=1.0)…

{'ent_coef': 0.09465194189906524, 'n_steps': 4098, 'learning_rate': 0.08132518883096407, 'batch_size': 283}
Using cpu device


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=4098 and n_envs=1)
  f"You have specified a mini-batch size of {batch_size},"


-----------------------------------
| time/              |            |
|    fps             | 216        |
|    iterations      | 1          |
|    time_elapsed    | 18         |
|    total_timesteps | 4098       |
| train/             |            |
|    reward          | 0.07370392 |
-----------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 194        |
|    iterations           | 2          |
|    time_elapsed         | 42         |
|    total_timesteps      | 8196       |
| train/                  |            |
|    approx_kl            | 74.09546   |
|    clip_fraction        | 0.993      |
|    clip_range           | 0.2        |
|    entropy_loss         | -50.6      |
|    explained_variance   | 1.19e-07   |
|    learning_rate        | 0.0813     |
|    loss                 | -8.12      |
|    n_updates            | 10         |
|    policy_gradient_loss | 0.242      |
|    reward         

0,1
entropy_loss,█▆▅▂▁
loss,█▆▅▂▁
value_loss,▁▂█▃▅

0,1
entropy_loss,-330.18262
loss,-29.64627
value_loss,2.67896


Run 27bm6zep errored: ValueError('Expected parameter loc (Tensor of shape (283, 8)) of distribution Normal(loc: torch.Size([283, 8]), scale: torch.Size([283, 8])) to satisfy the constraint Real(), but found invalid values:\ntensor([[nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        ...,\n        [nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan]], grad_fn=<AddmmBackward0>)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 27bm6zep errored: ValueError('Expected parameter loc (Tensor of shape (283, 8)) of distribution Normal(loc: torch.Size([283, 8]), scale: torch.Size([283, 8])) to satisfy the constraint Real(), but found invalid values:\ntensor([[nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        ...,\n        [nan, nan, nan,  ..., nan

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016835304866670717, max=1.0…

{'ent_coef': 0.03660023748735838, 'n_steps': 256, 'learning_rate': 0.0916384297718498, 'batch_size': 51}
Using cpu device


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=256 and n_envs=1)
  f"You have specified a mini-batch size of {batch_size},"


------------------------------------
| time/              |             |
|    fps             | 140         |
|    iterations      | 1           |
|    time_elapsed    | 1           |
|    total_timesteps | 256         |
| train/             |             |
|    reward          | -0.23859808 |
------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 122        |
|    iterations           | 2          |
|    time_elapsed         | 4          |
|    total_timesteps      | 512        |
| train/                  |            |
|    approx_kl            | 19.393034  |
|    clip_fraction        | 0.978      |
|    clip_range           | 0.2        |
|    entropy_loss         | -19.2      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0916     |
|    loss                 | -0.934     |
|    n_updates            | 10         |
|    policy_gradient_loss | 0.245      |
|    reward

day: 3060, episode: 110
begin_total_asset: 1000000.00
end_total_asset: 3921573.36
total_reward: 2921573.36
total_cost: 21636.29
total_trades: 20856
Sharpe: 0.774
------------------------------------------
| time/                   |              |
|    fps                  | 126          |
|    iterations           | 12           |
|    time_elapsed         | 24           |
|    total_timesteps      | 3072         |
| train/                  |              |
|    approx_kl            | 0.3469371    |
|    clip_fraction        | 0.585        |
|    clip_range           | 0.2          |
|    entropy_loss         | -58.6        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0916       |
|    loss                 | -5.58        |
|    n_updates            | 110          |
|    policy_gradient_loss | -0.557       |
|    reward               | -0.044893343 |
|    std                  | 766          |
|    value_loss           | 78.7         |
---------------------

----------------------------------------
| time/                   |            |
|    fps                  | 126        |
|    iterations           | 22         |
|    time_elapsed         | 44         |
|    total_timesteps      | 5632       |
| train/                  |            |
|    approx_kl            | 0.29496172 |
|    clip_fraction        | 0.573      |
|    clip_range           | 0.2        |
|    entropy_loss         | -68.5      |
|    explained_variance   | -2.38e-07  |
|    learning_rate        | 0.0916     |
|    loss                 | -2.43      |
|    n_updates            | 210        |
|    policy_gradient_loss | -0.022     |
|    reward               | 0.56443995 |
|    std                  | 4.68e+03   |
|    value_loss           | 18.7       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 125         |
|    iterations           | 23          |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 109         |
|    iterations           | 32          |
|    time_elapsed         | 74          |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 1.0415784   |
|    clip_fraction        | 0.765       |
|    clip_range           | 0.2         |
|    entropy_loss         | -83.8       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0916      |
|    loss                 | -2.61       |
|    n_updates            | 310         |
|    policy_gradient_loss | 0.158       |
|    reward               | -0.36180374 |
|    std                  | 9.4e+04     |
|    value_loss           | 2.85        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 109         |
|    iterations           | 33    

------------------------------------------
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 42           |
|    time_elapsed         | 160          |
|    total_timesteps      | 10752        |
| train/                  |              |
|    approx_kl            | 0.14423291   |
|    clip_fraction        | 0.505        |
|    clip_range           | 0.2          |
|    entropy_loss         | -97.5        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0916       |
|    loss                 | 0.284        |
|    n_updates            | 410          |
|    policy_gradient_loss | 0.117        |
|    reward               | -0.056614995 |
|    std                  | 6.81e+05     |
|    value_loss           | 1.17         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 67           |
|    iterat

-----------------------------------------
| time/                   |             |
|    fps                  | 72          |
|    iterations           | 52          |
|    time_elapsed         | 184         |
|    total_timesteps      | 13312       |
| train/                  |             |
|    approx_kl            | 0.13038914  |
|    clip_fraction        | 0.62        |
|    clip_range           | 0.2         |
|    entropy_loss         | -102        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0916      |
|    loss                 | -3.89       |
|    n_updates            | 510         |
|    policy_gradient_loss | 0.0312      |
|    reward               | -0.25321642 |
|    std                  | 9.88e+05    |
|    value_loss           | 1.11        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 72          |
|    iterations           | 53    

----------------------------------------
| time/                   |            |
|    fps                  | 77         |
|    iterations           | 62         |
|    time_elapsed         | 204        |
|    total_timesteps      | 15872      |
| train/                  |            |
|    approx_kl            | 0.15218209 |
|    clip_fraction        | 0.594      |
|    clip_range           | 0.2        |
|    entropy_loss         | -106       |
|    explained_variance   | 5.96e-08   |
|    learning_rate        | 0.0916     |
|    loss                 | -1.65      |
|    n_updates            | 610        |
|    policy_gradient_loss | 0.00185    |
|    reward               | 0.29031023 |
|    std                  | 1.58e+06   |
|    value_loss           | 2.02       |
----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 78            |
|    iterations           | 63            |
|   

------------------------------------------
| time/                   |              |
|    fps                  | 80           |
|    iterations           | 72           |
|    time_elapsed         | 229          |
|    total_timesteps      | 18432        |
| train/                  |              |
|    approx_kl            | 0.11927366   |
|    clip_fraction        | 0.568        |
|    clip_range           | 0.2          |
|    entropy_loss         | -110         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0916       |
|    loss                 | -3.57        |
|    n_updates            | 710          |
|    policy_gradient_loss | 0.0891       |
|    reward               | -0.011180052 |
|    std                  | 2.76e+06     |
|    value_loss           | 6.66         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 80          |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 81          |
|    iterations           | 82          |
|    time_elapsed         | 257         |
|    total_timesteps      | 20992       |
| train/                  |             |
|    approx_kl            | 0.5586543   |
|    clip_fraction        | 0.806       |
|    clip_range           | 0.2         |
|    entropy_loss         | -118        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | 2.11        |
|    n_updates            | 810         |
|    policy_gradient_loss | 0.04        |
|    reward               | -0.30864188 |
|    std                  | 8.89e+06    |
|    value_loss           | 6.07        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 81          |
|    iterations           | 83    

----------------------------------------
| time/                   |            |
|    fps                  | 82         |
|    iterations           | 92         |
|    time_elapsed         | 287        |
|    total_timesteps      | 23552      |
| train/                  |            |
|    approx_kl            | 0.16349734 |
|    clip_fraction        | 0.529      |
|    clip_range           | 0.2        |
|    entropy_loss         | -120       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0916     |
|    loss                 | -5.56      |
|    n_updates            | 910        |
|    policy_gradient_loss | -0.0903    |
|    reward               | -0.3933492 |
|    std                  | 9.91e+06   |
|    value_loss           | 0.967      |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 82          |
|    iterations           | 93          |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 80          |
|    iterations           | 102         |
|    time_elapsed         | 324         |
|    total_timesteps      | 26112       |
| train/                  |             |
|    approx_kl            | 0.047659617 |
|    clip_fraction        | 0.353       |
|    clip_range           | 0.2         |
|    entropy_loss         | -127        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | -4.33       |
|    n_updates            | 1010        |
|    policy_gradient_loss | -0.031      |
|    reward               | 0.049288154 |
|    std                  | 2.33e+07    |
|    value_loss           | 0.832       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 80         |
|    iterations           | 103      

----------------------------------------
| time/                   |            |
|    fps                  | 79         |
|    iterations           | 112        |
|    time_elapsed         | 361        |
|    total_timesteps      | 28672      |
| train/                  |            |
|    approx_kl            | 0.13069844 |
|    clip_fraction        | 0.626      |
|    clip_range           | 0.2        |
|    entropy_loss         | -130       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0916     |
|    loss                 | -4.76      |
|    n_updates            | 1110       |
|    policy_gradient_loss | 0.0592     |
|    reward               | 0.16315155 |
|    std                  | 1.63e+07   |
|    value_loss           | 0.619      |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 79         |
|    iterations           | 113        |
|    time_elapse

----------------------------------------
| time/                   |            |
|    fps                  | 79         |
|    iterations           | 122        |
|    time_elapsed         | 394        |
|    total_timesteps      | 31232      |
| train/                  |            |
|    approx_kl            | 0.21601568 |
|    clip_fraction        | 0.58       |
|    clip_range           | 0.2        |
|    entropy_loss         | -134       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0916     |
|    loss                 | -3.18      |
|    n_updates            | 1210       |
|    policy_gradient_loss | 0.231      |
|    reward               | -0.4365762 |
|    std                  | 2.22e+07   |
|    value_loss           | 1.33       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 79         |
|    iterations           | 123        |
|    time_elapse

day: 3060, episode: 120
begin_total_asset: 1000000.00
end_total_asset: 1355566.96
total_reward: 355566.96
total_cost: 33136.99
total_trades: 24231
Sharpe: 0.392
----------------------------------------
| time/                   |            |
|    fps                  | 80         |
|    iterations           | 132        |
|    time_elapsed         | 420        |
|    total_timesteps      | 33792      |
| train/                  |            |
|    approx_kl            | 0.21769054 |
|    clip_fraction        | 0.467      |
|    clip_range           | 0.2        |
|    entropy_loss         | -140       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0916     |
|    loss                 | 11.9       |
|    n_updates            | 1310       |
|    policy_gradient_loss | 0.175      |
|    reward               | 0.05848848 |
|    std                  | 1.08e+08   |
|    value_loss           | 8.14       |
----------------------------------------
-------------------

----------------------------------------
| time/                   |            |
|    fps                  | 80         |
|    iterations           | 142        |
|    time_elapsed         | 453        |
|    total_timesteps      | 36352      |
| train/                  |            |
|    approx_kl            | 0.16393106 |
|    clip_fraction        | 0.564      |
|    clip_range           | 0.2        |
|    entropy_loss         | -150       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0916     |
|    loss                 | -7.24      |
|    n_updates            | 1410       |
|    policy_gradient_loss | -0.118     |
|    reward               | 0.4727399  |
|    std                  | 4.34e+08   |
|    value_loss           | 7.33       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 80          |
|    iterations           | 143         |
|    time_el

----------------------------------------
| time/                   |            |
|    fps                  | 81         |
|    iterations           | 152        |
|    time_elapsed         | 478        |
|    total_timesteps      | 38912      |
| train/                  |            |
|    approx_kl            | 0.05216834 |
|    clip_fraction        | 0.382      |
|    clip_range           | 0.2        |
|    entropy_loss         | -154       |
|    explained_variance   | -1.19e-07  |
|    learning_rate        | 0.0916     |
|    loss                 | -5.21      |
|    n_updates            | 1510       |
|    policy_gradient_loss | 0.0205     |
|    reward               | 0.17578149 |
|    std                  | 4.88e+08   |
|    value_loss           | 1.93       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 81          |
|    iterations           | 153         |
|    time_el

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 162          |
|    time_elapsed         | 496          |
|    total_timesteps      | 41472        |
| train/                  |              |
|    approx_kl            | 0.15457574   |
|    clip_fraction        | 0.55         |
|    clip_range           | 0.2          |
|    entropy_loss         | -158         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0916       |
|    loss                 | -5.54        |
|    n_updates            | 1610         |
|    policy_gradient_loss | 0.0451       |
|    reward               | -0.048558097 |
|    std                  | 1.14e+09     |
|    value_loss           | 0.332        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 85          |
|    iterations           | 172         |
|    time_elapsed         | 513         |
|    total_timesteps      | 44032       |
| train/                  |             |
|    approx_kl            | 0.11863029  |
|    clip_fraction        | 0.693       |
|    clip_range           | 0.2         |
|    entropy_loss         | -164        |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 0.0916      |
|    loss                 | -5.56       |
|    n_updates            | 1710        |
|    policy_gradient_loss | 0.0836      |
|    reward               | -0.23888943 |
|    std                  | 2.37e+09    |
|    value_loss           | 4.01        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 173   

-----------------------------------------
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 182         |
|    time_elapsed         | 529         |
|    total_timesteps      | 46592       |
| train/                  |             |
|    approx_kl            | 0.4373671   |
|    clip_fraction        | 0.656       |
|    clip_range           | 0.2         |
|    entropy_loss         | -172        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | -5.82       |
|    n_updates            | 1810        |
|    policy_gradient_loss | 0.132       |
|    reward               | 0.112809844 |
|    std                  | 8.01e+09    |
|    value_loss           | 1.55        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 88         |
|    iterations           | 183      

-----------------------------------------
| time/                   |             |
|    fps                  | 89          |
|    iterations           | 192         |
|    time_elapsed         | 546         |
|    total_timesteps      | 49152       |
| train/                  |             |
|    approx_kl            | 0.20476806  |
|    clip_fraction        | 0.534       |
|    clip_range           | 0.2         |
|    entropy_loss         | -179        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | 9.9         |
|    n_updates            | 1910        |
|    policy_gradient_loss | 0.29        |
|    reward               | -0.01186762 |
|    std                  | 2.01e+10    |
|    value_loss           | 17.8        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 90          |
|    iterations           | 193   

----------------------------------------
| time/                   |            |
|    fps                  | 90         |
|    iterations           | 202        |
|    time_elapsed         | 569        |
|    total_timesteps      | 51712      |
| train/                  |            |
|    approx_kl            | 0.10339857 |
|    clip_fraction        | 0.523      |
|    clip_range           | 0.2        |
|    entropy_loss         | -183       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0916     |
|    loss                 | -5.51      |
|    n_updates            | 2010       |
|    policy_gradient_loss | -0.213     |
|    reward               | -4.5598288 |
|    std                  | 2.55e+10   |
|    value_loss           | 6.67       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 91         |
|    iterations           | 203        |
|    time_elapse

----------------------------------------
| time/                   |            |
|    fps                  | 92         |
|    iterations           | 212        |
|    time_elapsed         | 585        |
|    total_timesteps      | 54272      |
| train/                  |            |
|    approx_kl            | 0.1628343  |
|    clip_fraction        | 0.606      |
|    clip_range           | 0.2        |
|    entropy_loss         | -187       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0916     |
|    loss                 | -6.8       |
|    n_updates            | 2110       |
|    policy_gradient_loss | -0.104     |
|    reward               | 0.78962535 |
|    std                  | 5.99e+10   |
|    value_loss           | 9.57       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 92         |
|    iterations           | 213        |
|    time_elapse

-----------------------------------------
| time/                   |             |
|    fps                  | 93          |
|    iterations           | 222         |
|    time_elapsed         | 605         |
|    total_timesteps      | 56832       |
| train/                  |             |
|    approx_kl            | 0.02349779  |
|    clip_fraction        | 0.566       |
|    clip_range           | 0.2         |
|    entropy_loss         | -195        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | -5.86       |
|    n_updates            | 2210        |
|    policy_gradient_loss | -0.0377     |
|    reward               | -0.13757701 |
|    std                  | 1.44e+11    |
|    value_loss           | 0.954       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 93          |
|    iterations           | 223   

-----------------------------------------
| time/                   |             |
|    fps                  | 95          |
|    iterations           | 232         |
|    time_elapsed         | 623         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.040376097 |
|    clip_fraction        | 0.483       |
|    clip_range           | 0.2         |
|    entropy_loss         | -201        |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0916      |
|    loss                 | -7.31       |
|    n_updates            | 2310        |
|    policy_gradient_loss | -0.0332     |
|    reward               | 0.3087995   |
|    std                  | 3e+11       |
|    value_loss           | 0.509       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 95          |
|    iterations           | 233   

-----------------------------------------
| time/                   |             |
|    fps                  | 96          |
|    iterations           | 242         |
|    time_elapsed         | 641         |
|    total_timesteps      | 61952       |
| train/                  |             |
|    approx_kl            | 0.032446686 |
|    clip_fraction        | 0.509       |
|    clip_range           | 0.2         |
|    entropy_loss         | -206        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | -7.52       |
|    n_updates            | 2410        |
|    policy_gradient_loss | -0.0568     |
|    reward               | -0.14677235 |
|    std                  | 6.07e+11    |
|    value_loss           | 0.505       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 96         |
|    iterations           | 243      

day: 3060, episode: 130
begin_total_asset: 1000000.00
end_total_asset: 1347760.02
total_reward: 347760.02
total_cost: 32962.31
total_trades: 24168
Sharpe: 0.430
----------------------------------------
| time/                   |            |
|    fps                  | 96         |
|    iterations           | 252        |
|    time_elapsed         | 667        |
|    total_timesteps      | 64512      |
| train/                  |            |
|    approx_kl            | 0.13740365 |
|    clip_fraction        | 0.667      |
|    clip_range           | 0.2        |
|    entropy_loss         | -208       |
|    explained_variance   | -1.19e-07  |
|    learning_rate        | 0.0916     |
|    loss                 | -8.93      |
|    n_updates            | 2510       |
|    policy_gradient_loss | 0.129      |
|    reward               | -0.4474704 |
|    std                  | 1.09e+12   |
|    value_loss           | 8.4        |
----------------------------------------
-------------------

-----------------------------------------
| time/                   |             |
|    fps                  | 96          |
|    iterations           | 262         |
|    time_elapsed         | 691         |
|    total_timesteps      | 67072       |
| train/                  |             |
|    approx_kl            | 0.067582525 |
|    clip_fraction        | 0.505       |
|    clip_range           | 0.2         |
|    entropy_loss         | -214        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | -7.51       |
|    n_updates            | 2610        |
|    policy_gradient_loss | -0.0683     |
|    reward               | 0.5282364   |
|    std                  | 1.74e+12    |
|    value_loss           | 1.53        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 96         |
|    iterations           | 263      

-----------------------------------------
| time/                   |             |
|    fps                  | 96          |
|    iterations           | 272         |
|    time_elapsed         | 718         |
|    total_timesteps      | 69632       |
| train/                  |             |
|    approx_kl            | 0.20618208  |
|    clip_fraction        | 0.725       |
|    clip_range           | 0.2         |
|    entropy_loss         | -222        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | -4.98       |
|    n_updates            | 2710        |
|    policy_gradient_loss | 0.348       |
|    reward               | -0.06726576 |
|    std                  | 6.36e+12    |
|    value_loss           | 4.41        |
-----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 96        |
|    iterations           | 273       |


----------------------------------------
| time/                   |            |
|    fps                  | 97         |
|    iterations           | 282        |
|    time_elapsed         | 743        |
|    total_timesteps      | 72192      |
| train/                  |            |
|    approx_kl            | 0.67539406 |
|    clip_fraction        | 0.786      |
|    clip_range           | 0.2        |
|    entropy_loss         | -232       |
|    explained_variance   | 5.96e-08   |
|    learning_rate        | 0.0916     |
|    loss                 | -8.07      |
|    n_updates            | 2810       |
|    policy_gradient_loss | 0.151      |
|    reward               | 0.27165523 |
|    std                  | 2.93e+13   |
|    value_loss           | 1.02       |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 97        |
|    iterations           | 283       |
|    time_elapsed   

------------------------------------------
| time/                   |              |
|    fps                  | 97           |
|    iterations           | 292          |
|    time_elapsed         | 769          |
|    total_timesteps      | 74752        |
| train/                  |              |
|    approx_kl            | 1.321052     |
|    clip_fraction        | 0.667        |
|    clip_range           | 0.2          |
|    entropy_loss         | -241         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0916       |
|    loss                 | -4.18        |
|    n_updates            | 2910         |
|    policy_gradient_loss | 0.187        |
|    reward               | -0.058521505 |
|    std                  | 8.96e+13     |
|    value_loss           | 1.52         |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 97         |
|    iterations  

------------------------------------------
| time/                   |              |
|    fps                  | 96           |
|    iterations           | 302          |
|    time_elapsed         | 799          |
|    total_timesteps      | 77312        |
| train/                  |              |
|    approx_kl            | 0.102998056  |
|    clip_fraction        | 0.546        |
|    clip_range           | 0.2          |
|    entropy_loss         | -246         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0916       |
|    loss                 | -8.55        |
|    n_updates            | 3010         |
|    policy_gradient_loss | 0.0871       |
|    reward               | -0.004445005 |
|    std                  | 1.31e+14     |
|    value_loss           | 0.733        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 96          |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 95          |
|    iterations           | 312         |
|    time_elapsed         | 832         |
|    total_timesteps      | 79872       |
| train/                  |             |
|    approx_kl            | 0.15602432  |
|    clip_fraction        | 0.534       |
|    clip_range           | 0.2         |
|    entropy_loss         | -250        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | -2.09       |
|    n_updates            | 3110        |
|    policy_gradient_loss | 0.482       |
|    reward               | -0.03352193 |
|    std                  | 2.21e+14    |
|    value_loss           | 42.3        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 95         |
|    iterations           | 313      

----------------------------------------
| time/                   |            |
|    fps                  | 95         |
|    iterations           | 322        |
|    time_elapsed         | 860        |
|    total_timesteps      | 82432      |
| train/                  |            |
|    approx_kl            | 0.15871155 |
|    clip_fraction        | 0.584      |
|    clip_range           | 0.2        |
|    entropy_loss         | -253       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0916     |
|    loss                 | -6.25      |
|    n_updates            | 3210       |
|    policy_gradient_loss | -0.0314    |
|    reward               | -0.2136644 |
|    std                  | 2.95e+14   |
|    value_loss           | 3.66       |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 323          |
|    tim

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 332          |
|    time_elapsed         | 889          |
|    total_timesteps      | 84992        |
| train/                  |              |
|    approx_kl            | 0.074073754  |
|    clip_fraction        | 0.526        |
|    clip_range           | 0.2          |
|    entropy_loss         | -260         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0916       |
|    loss                 | -6.62        |
|    n_updates            | 3310         |
|    policy_gradient_loss | -0.00722     |
|    reward               | -0.011175106 |
|    std                  | 6.45e+14     |
|    value_loss           | 1.43         |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 95         |
|    iterations  

-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 342         |
|    time_elapsed         | 925         |
|    total_timesteps      | 87552       |
| train/                  |             |
|    approx_kl            | 0.06547597  |
|    clip_fraction        | 0.531       |
|    clip_range           | 0.2         |
|    entropy_loss         | -268        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | -10.9       |
|    n_updates            | 3410        |
|    policy_gradient_loss | -0.0367     |
|    reward               | -0.16482416 |
|    std                  | 2.24e+15    |
|    value_loss           | 0.389       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 94         |
|    iterations           | 343      

----------------------------------------
| time/                   |            |
|    fps                  | 94         |
|    iterations           | 352        |
|    time_elapsed         | 957        |
|    total_timesteps      | 90112      |
| train/                  |            |
|    approx_kl            | 0.13027509 |
|    clip_fraction        | 0.524      |
|    clip_range           | 0.2        |
|    entropy_loss         | -270       |
|    explained_variance   | -1.19e-07  |
|    learning_rate        | 0.0916     |
|    loss                 | -9.8       |
|    n_updates            | 3510       |
|    policy_gradient_loss | -0.00105   |
|    reward               | 0.29191443 |
|    std                  | 3.43e+15   |
|    value_loss           | 1.96       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 353         |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 93          |
|    iterations           | 362         |
|    time_elapsed         | 988         |
|    total_timesteps      | 92672       |
| train/                  |             |
|    approx_kl            | 0.24333525  |
|    clip_fraction        | 0.63        |
|    clip_range           | 0.2         |
|    entropy_loss         | -275        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | -9.89       |
|    n_updates            | 3610        |
|    policy_gradient_loss | 0.116       |
|    reward               | -0.18977083 |
|    std                  | 7.27e+15    |
|    value_loss           | 0.665       |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 363

-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 372         |
|    time_elapsed         | 1009        |
|    total_timesteps      | 95232       |
| train/                  |             |
|    approx_kl            | 0.45696446  |
|    clip_fraction        | 0.667       |
|    clip_range           | 0.2         |
|    entropy_loss         | -280        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | -2.22       |
|    n_updates            | 3710        |
|    policy_gradient_loss | 0.0203      |
|    reward               | -0.17106502 |
|    std                  | 2.24e+16    |
|    value_loss           | 13.8        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 373

----------------------------------------
| time/                   |            |
|    fps                  | 94         |
|    iterations           | 382        |
|    time_elapsed         | 1034       |
|    total_timesteps      | 97792      |
| train/                  |            |
|    approx_kl            | 0.29386702 |
|    clip_fraction        | 0.765      |
|    clip_range           | 0.2        |
|    entropy_loss         | -288       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0916     |
|    loss                 | -10.9      |
|    n_updates            | 3810       |
|    policy_gradient_loss | 0.127      |
|    reward               | 1.3276339  |
|    std                  | 5.96e+16   |
|    value_loss           | 5.93       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 383         |
|    time_el

-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 392         |
|    time_elapsed         | 1058        |
|    total_timesteps      | 100352      |
| train/                  |             |
|    approx_kl            | 0.109885246 |
|    clip_fraction        | 0.635       |
|    clip_range           | 0.2         |
|    entropy_loss         | -292        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | -10.7       |
|    n_updates            | 3910        |
|    policy_gradient_loss | 0.197       |
|    reward               | 0.17196171  |
|    std                  | 7.5e+16     |
|    value_loss           | 2.84        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 393   

-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 402         |
|    time_elapsed         | 1091        |
|    total_timesteps      | 102912      |
| train/                  |             |
|    approx_kl            | 0.044762526 |
|    clip_fraction        | 0.225       |
|    clip_range           | 0.2         |
|    entropy_loss         | -297        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | -10.8       |
|    n_updates            | 4010        |
|    policy_gradient_loss | 0.0332      |
|    reward               | 0.09246244  |
|    std                  | 1.95e+17    |
|    value_loss           | 0.559       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 403   

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 412          |
|    time_elapsed         | 1115         |
|    total_timesteps      | 105472       |
| train/                  |              |
|    approx_kl            | 0.028674832  |
|    clip_fraction        | 0.561        |
|    clip_range           | 0.2          |
|    entropy_loss         | -300         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0916       |
|    loss                 | -9.82        |
|    n_updates            | 4110         |
|    policy_gradient_loss | 0.0951       |
|    reward               | -0.018978138 |
|    std                  | 2.9e+17      |
|    value_loss           | 0.725        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iteration

-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 422         |
|    time_elapsed         | 1144        |
|    total_timesteps      | 108032      |
| train/                  |             |
|    approx_kl            | 0.21969457  |
|    clip_fraction        | 0.562       |
|    clip_range           | 0.2         |
|    entropy_loss         | -305        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0916      |
|    loss                 | -9.68       |
|    n_updates            | 4210        |
|    policy_gradient_loss | 0.0343      |
|    reward               | 0.019418914 |
|    std                  | 4.74e+17    |
|    value_loss           | 1.21        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 423

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 432          |
|    time_elapsed         | 1170         |
|    total_timesteps      | 110592       |
| train/                  |              |
|    approx_kl            | 0.17259842   |
|    clip_fraction        | 0.67         |
|    clip_range           | 0.2          |
|    entropy_loss         | -309         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0916       |
|    loss                 | -5.74        |
|    n_updates            | 4310         |
|    policy_gradient_loss | 0.0653       |
|    reward               | -0.020154623 |
|    std                  | 9.84e+17     |
|    value_loss           | 3.04         |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 94         |
|    iterations  

----------------------------------------
| time/                   |            |
|    fps                  | 94         |
|    iterations           | 442        |
|    time_elapsed         | 1194       |
|    total_timesteps      | 113152     |
| train/                  |            |
|    approx_kl            | 0.13283633 |
|    clip_fraction        | 0.602      |
|    clip_range           | 0.2        |
|    entropy_loss         | -313       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0916     |
|    loss                 | -8.16      |
|    n_updates            | 4410       |
|    policy_gradient_loss | 0.0634     |
|    reward               | 0.35667813 |
|    std                  | 1.35e+18   |
|    value_loss           | 3.76       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 443         |
|    time_el

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
entropy_loss,██▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁
loss,▂▂▃▂▂▂▂▁▁▂▁▁▂▁▁▁█▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁
value_loss,▁▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
entropy_loss,-313.19748
loss,-9.45943
value_loss,2.89565


Run 6t59nh3x errored: ValueError('Expected parameter loc (Tensor of shape (51, 8)) of distribution Normal(loc: torch.Size([51, 8]), scale: torch.Size([51, 8])) to satisfy the constraint Real(), but found invalid values:\ntensor([[nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, nan, nan, nan, nan, nan, nan, nan],\n        [nan, 

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016839323616666964, max=1.0…

{'ent_coef': 0.053618160611286506, 'n_steps': 2048, 'learning_rate': 0.04621210110865151, 'batch_size': 162}
Using cpu device


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2048 and n_envs=1)
  f"You have specified a mini-batch size of {batch_size},"


----------------------------------
| time/              |           |
|    fps             | 106       |
|    iterations      | 1         |
|    time_elapsed    | 19        |
|    total_timesteps | 2048      |
| train/             |           |
|    reward          | 0.4094982 |
----------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 111         |
|    iterations           | 2           |
|    time_elapsed         | 36          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 32.37876    |
|    clip_fraction        | 0.992       |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.2       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0462      |
|    loss                 | -2.21       |
|    n_updates            | 10          |
|    policy_gradient_loss | 0.281       |
|    reward  

-------------------------------------------
| time/                   |               |
|    fps                  | 115           |
|    iterations           | 12            |
|    time_elapsed         | 212           |
|    total_timesteps      | 24576         |
| train/                  |               |
|    approx_kl            | 0.05144752    |
|    clip_fraction        | 0.475         |
|    clip_range           | 0.2           |
|    entropy_loss         | -88.1         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0462        |
|    loss                 | -3.07         |
|    n_updates            | 110           |
|    policy_gradient_loss | 0.0235        |
|    reward               | -0.0052566044 |
|    std                  | 1.72e+04      |
|    value_loss           | 3.94          |
-------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 110       

-----------------------------------------
| time/                   |             |
|    fps                  | 104         |
|    iterations           | 22          |
|    time_elapsed         | 430         |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.021657271 |
|    clip_fraction        | 0.311       |
|    clip_range           | 0.2         |
|    entropy_loss         | -91.1       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0462      |
|    loss                 | -3.56       |
|    n_updates            | 210         |
|    policy_gradient_loss | 0.0166      |
|    reward               | -0.36413372 |
|    std                  | 2.59e+04    |
|    value_loss           | 3.76        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 106         |
|    iterations           | 23    

-----------------------------------------
| time/                   |             |
|    fps                  | 90          |
|    iterations           | 32          |
|    time_elapsed         | 723         |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.019889113 |
|    clip_fraction        | 0.198       |
|    clip_range           | 0.2         |
|    entropy_loss         | -94.4       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0462      |
|    loss                 | -4.36       |
|    n_updates            | 310         |
|    policy_gradient_loss | 0.0172      |
|    reward               | -0.0682954  |
|    std                  | 3.95e+04    |
|    value_loss           | 1.54        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 91          |
|    iterations           | 33    

-----------------------------------------
| time/                   |             |
|    fps                  | 96          |
|    iterations           | 42          |
|    time_elapsed         | 894         |
|    total_timesteps      | 86016       |
| train/                  |             |
|    approx_kl            | 0.019514011 |
|    clip_fraction        | 0.313       |
|    clip_range           | 0.2         |
|    entropy_loss         | -97.8       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0462      |
|    loss                 | -3.34       |
|    n_updates            | 410         |
|    policy_gradient_loss | 0.016       |
|    reward               | -0.13231343 |
|    std                  | 6.27e+04    |
|    value_loss           | 2.45        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 96           |
|    iterations           | 43 

-----------------------------------------
| time/                   |             |
|    fps                  | 99          |
|    iterations           | 52          |
|    time_elapsed         | 1069        |
|    total_timesteps      | 106496      |
| train/                  |             |
|    approx_kl            | 0.028563768 |
|    clip_fraction        | 0.318       |
|    clip_range           | 0.2         |
|    entropy_loss         | -101        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0462      |
|    loss                 | -4.64       |
|    n_updates            | 510         |
|    policy_gradient_loss | 0.024       |
|    reward               | 0.043099727 |
|    std                  | 8.94e+04    |
|    value_loss           | 1.56        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 99          |
|    iterations           | 53    

-----------------------------------------
| time/                   |             |
|    fps                  | 99          |
|    iterations           | 62          |
|    time_elapsed         | 1272        |
|    total_timesteps      | 126976      |
| train/                  |             |
|    approx_kl            | 0.044360824 |
|    clip_fraction        | 0.329       |
|    clip_range           | 0.2         |
|    entropy_loss         | -104        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0462      |
|    loss                 | -4.94       |
|    n_updates            | 610         |
|    policy_gradient_loss | 0.0214      |
|    reward               | 0.40774348  |
|    std                  | 1.52e+05    |
|    value_loss           | 1.04        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 100         |
|    iterations           | 63    

---------------------------------------
| time/                   |           |
|    fps                  | 102       |
|    iterations           | 72        |
|    time_elapsed         | 1436      |
|    total_timesteps      | 147456    |
| train/                  |           |
|    approx_kl            | 0.0929949 |
|    clip_fraction        | 0.565     |
|    clip_range           | 0.2       |
|    entropy_loss         | -108      |
|    explained_variance   | 0         |
|    learning_rate        | 0.0462    |
|    loss                 | -5.23     |
|    n_updates            | 710       |
|    policy_gradient_loss | 0.031     |
|    reward               | 0.302255  |
|    std                  | 2.48e+05  |
|    value_loss           | 2.12      |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 102        |
|    iterations           | 73         |
|    time_elapsed         | 1456    

------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 82           |
|    time_elapsed         | 1620         |
|    total_timesteps      | 167936       |
| train/                  |              |
|    approx_kl            | 0.045392852  |
|    clip_fraction        | 0.35         |
|    clip_range           | 0.2          |
|    entropy_loss         | -112         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0462       |
|    loss                 | -5.27        |
|    n_updates            | 810          |
|    policy_gradient_loss | 0.0132       |
|    reward               | -0.119351804 |
|    std                  | 4.27e+05     |
|    value_loss           | 2.38         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterat

-----------------------------------------
| time/                   |             |
|    fps                  | 104         |
|    iterations           | 92          |
|    time_elapsed         | 1808        |
|    total_timesteps      | 188416      |
| train/                  |             |
|    approx_kl            | 0.032414928 |
|    clip_fraction        | 0.306       |
|    clip_range           | 0.2         |
|    entropy_loss         | -115        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0462      |
|    loss                 | -5.55       |
|    n_updates            | 910         |
|    policy_gradient_loss | 0.0199      |
|    reward               | -0.7396853  |
|    std                  | 5.81e+05    |
|    value_loss           | 1.42        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 104         |
|    iterations           | 93    

-----------------------------------------
| time/                   |             |
|    fps                  | 105         |
|    iterations           | 102         |
|    time_elapsed         | 1983        |
|    total_timesteps      | 208896      |
| train/                  |             |
|    approx_kl            | 0.036514528 |
|    clip_fraction        | 0.432       |
|    clip_range           | 0.2         |
|    entropy_loss         | -119        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0462      |
|    loss                 | -4.49       |
|    n_updates            | 1010        |
|    policy_gradient_loss | 0.0134      |
|    reward               | 0.080246836 |
|    std                  | 9.77e+05    |
|    value_loss           | 3.19        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 105         |
|    iterations           | 103   

-----------------------------------------
| time/                   |             |
|    fps                  | 107         |
|    iterations           | 112         |
|    time_elapsed         | 2132        |
|    total_timesteps      | 229376      |
| train/                  |             |
|    approx_kl            | 0.032097127 |
|    clip_fraction        | 0.371       |
|    clip_range           | 0.2         |
|    entropy_loss         | -124        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0462      |
|    loss                 | -4.5        |
|    n_updates            | 1110        |
|    policy_gradient_loss | 0.0164      |
|    reward               | 0.57861507  |
|    std                  | 1.89e+06    |
|    value_loss           | 1.71        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 107         |
|    iterations           | 113   

-----------------------------------------
| time/                   |             |
|    fps                  | 107         |
|    iterations           | 122         |
|    time_elapsed         | 2315        |
|    total_timesteps      | 249856      |
| train/                  |             |
|    approx_kl            | 0.028969847 |
|    clip_fraction        | 0.367       |
|    clip_range           | 0.2         |
|    entropy_loss         | -128        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0462      |
|    loss                 | -4.81       |
|    n_updates            | 1210        |
|    policy_gradient_loss | 0.0144      |
|    reward               | -0.8721697  |
|    std                  | 3.01e+06    |
|    value_loss           | 5.9         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 101          |
|    iterations           | 123

----------------------------------------
| time/                   |            |
|    fps                  | 100        |
|    iterations           | 132        |
|    time_elapsed         | 2687       |
|    total_timesteps      | 270336     |
| train/                  |            |
|    approx_kl            | 0.08801199 |
|    clip_fraction        | 0.449      |
|    clip_range           | 0.2        |
|    entropy_loss         | -132       |
|    explained_variance   | -1.19e-07  |
|    learning_rate        | 0.0462     |
|    loss                 | -5.93      |
|    n_updates            | 1310       |
|    policy_gradient_loss | 0.0212     |
|    reward               | 0.16096774 |
|    std                  | 5.31e+06   |
|    value_loss           | 1.97       |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 100          |
|    iterations           | 133          |
|    tim

-----------------------------------------
| time/                   |             |
|    fps                  | 101         |
|    iterations           | 142         |
|    time_elapsed         | 2851        |
|    total_timesteps      | 290816      |
| train/                  |             |
|    approx_kl            | 0.018714301 |
|    clip_fraction        | 0.316       |
|    clip_range           | 0.2         |
|    entropy_loss         | -136        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0462      |
|    loss                 | -5.93       |
|    n_updates            | 1410        |
|    policy_gradient_loss | 0.0175      |
|    reward               | 0.0515605   |
|    std                  | 9.41e+06    |
|    value_loss           | 4.2         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 143   

0,1
entropy_loss,███▇▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁
loss,▃▃█▄▃▃▃▃▃▂▃▃▃▃▃▃▄▂▂▄▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁
value_loss,▁▁█▂▁▂▂▂▂▁▂▂▁▂▂▃▃▁▁▃▂▂▁▂▂▁▂▂▂▂▂▂▂▂▂▂▁▂▁▁

0,1
entropy_loss,-138.35916
loss,-6.14077
value_loss,2.53704


[34m[1mwandb[0m: Agent Starting Run: 3shcnosf with config:
[34m[1mwandb[0m: 	batch_size: 206
[34m[1mwandb[0m: 	ent_coef: 0.09397004594450954
[34m[1mwandb[0m: 	learning_rate: 0.06457664953113439
[34m[1mwandb[0m: 	n_steps: 128
[34m[1mwandb[0m: 	total_timesteps: 500000


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016753438883324635, max=1.0…

{'ent_coef': 0.09397004594450954, 'n_steps': 128, 'learning_rate': 0.06457664953113439, 'batch_size': 206}
Using cpu device


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=128 and n_envs=1)
  f"You have specified a mini-batch size of {batch_size},"


-------------------------------------
| time/              |              |
|    fps             | 86           |
|    iterations      | 1            |
|    time_elapsed    | 1            |
|    total_timesteps | 128          |
| train/             |              |
|    reward          | -0.047441825 |
-------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 75         |
|    iterations           | 2          |
|    time_elapsed         | 3          |
|    total_timesteps      | 256        |
| train/                  |            |
|    approx_kl            | 156.597    |
|    clip_fraction        | 0.881      |
|    clip_range           | 0.2        |
|    entropy_loss         | -12.3      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0646     |
|    loss                 | -0.893     |
|    n_updates            | 10         |
|    policy_gradient_loss | 0.259      |
| 

----------------------------------------
| time/                   |            |
|    fps                  | 127        |
|    iterations           | 12         |
|    time_elapsed         | 12         |
|    total_timesteps      | 1536       |
| train/                  |            |
|    approx_kl            | 0.26181108 |
|    clip_fraction        | 0.374      |
|    clip_range           | 0.2        |
|    entropy_loss         | -32.5      |
|    explained_variance   | -6.08e-06  |
|    learning_rate        | 0.0646     |
|    loss                 | 6.15       |
|    n_updates            | 110        |
|    policy_gradient_loss | 0.00187    |
|    reward               | 1.0088363  |
|    std                  | 16.9       |
|    value_loss           | 18.4       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 126        |
|    iterations           | 13         |
|    time_elapse

-----------------------------------------
| time/                   |             |
|    fps                  | 123         |
|    iterations           | 22          |
|    time_elapsed         | 22          |
|    total_timesteps      | 2816        |
| train/                  |             |
|    approx_kl            | 0.028313836 |
|    clip_fraction        | 0.209       |
|    clip_range           | 0.2         |
|    entropy_loss         | -41.6       |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0646      |
|    loss                 | 53.9        |
|    n_updates            | 210         |
|    policy_gradient_loss | 0.00203     |
|    reward               | 0.47220007  |
|    std                  | 58.8        |
|    value_loss           | 120         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 123         |
|    iterations           | 23    

-----------------------------------------
| time/                   |             |
|    fps                  | 128         |
|    iterations           | 32          |
|    time_elapsed         | 31          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.30146948  |
|    clip_fraction        | 0.477       |
|    clip_range           | 0.2         |
|    entropy_loss         | -51.9       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0646      |
|    loss                 | -3.34       |
|    n_updates            | 310         |
|    policy_gradient_loss | 0.0354      |
|    reward               | -0.13178718 |
|    std                  | 258         |
|    value_loss           | 3.7         |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 128        |
|    iterations           | 33       

-----------------------------------------
| time/                   |             |
|    fps                  | 124         |
|    iterations           | 42          |
|    time_elapsed         | 43          |
|    total_timesteps      | 5376        |
| train/                  |             |
|    approx_kl            | 0.09737699  |
|    clip_fraction        | 0.295       |
|    clip_range           | 0.2         |
|    entropy_loss         | -60.5       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0646      |
|    loss                 | -2.73       |
|    n_updates            | 410         |
|    policy_gradient_loss | 0.0023      |
|    reward               | -0.16803177 |
|    std                  | 679         |
|    value_loss           | 6.2         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 125         |
|    iterations           | 43    

----------------------------------------
| time/                   |            |
|    fps                  | 127        |
|    iterations           | 52         |
|    time_elapsed         | 52         |
|    total_timesteps      | 6656       |
| train/                  |            |
|    approx_kl            | 0.12826288 |
|    clip_fraction        | 0.416      |
|    clip_range           | 0.2        |
|    entropy_loss         | -70.5      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0646     |
|    loss                 | -6.41      |
|    n_updates            | 510        |
|    policy_gradient_loss | 0.0149     |
|    reward               | 0.26487294 |
|    std                  | 2.29e+03   |
|    value_loss           | 0.333      |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 127         |
|    iterations           | 53          |
|    time_el

----------------------------------------
| time/                   |            |
|    fps                  | 128        |
|    iterations           | 62         |
|    time_elapsed         | 61         |
|    total_timesteps      | 7936       |
| train/                  |            |
|    approx_kl            | 0.15147471 |
|    clip_fraction        | 0.48       |
|    clip_range           | 0.2        |
|    entropy_loss         | -83.5      |
|    explained_variance   | 0          |
|    learning_rate        | 0.0646     |
|    loss                 | -7.24      |
|    n_updates            | 610        |
|    policy_gradient_loss | 0.0125     |
|    reward               | 0.6494723  |
|    std                  | 1.4e+04    |
|    value_loss           | 1.37       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 128        |
|    iterations           | 63         |
|    time_elapse

-----------------------------------------
| time/                   |             |
|    fps                  | 126         |
|    iterations           | 72          |
|    time_elapsed         | 72          |
|    total_timesteps      | 9216        |
| train/                  |             |
|    approx_kl            | 0.38117546  |
|    clip_fraction        | 0.573       |
|    clip_range           | 0.2         |
|    entropy_loss         | -94.1       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0646      |
|    loss                 | -5.68       |
|    n_updates            | 710         |
|    policy_gradient_loss | 0.0466      |
|    reward               | 0.052737672 |
|    std                  | 9.46e+04    |
|    value_loss           | 6.33        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 127          |
|    iterations           | 73 

-----------------------------------------
| time/                   |             |
|    fps                  | 125         |
|    iterations           | 82          |
|    time_elapsed         | 83          |
|    total_timesteps      | 10496       |
| train/                  |             |
|    approx_kl            | 0.3312496   |
|    clip_fraction        | 0.579       |
|    clip_range           | 0.2         |
|    entropy_loss         | -109        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0646      |
|    loss                 | -9.9        |
|    n_updates            | 810         |
|    policy_gradient_loss | 0.0588      |
|    reward               | -0.15149957 |
|    std                  | 4.04e+05    |
|    value_loss           | 0.393       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 125         |
|    iterations           | 83    

-----------------------------------------
| time/                   |             |
|    fps                  | 126         |
|    iterations           | 92          |
|    time_elapsed         | 93          |
|    total_timesteps      | 11776       |
| train/                  |             |
|    approx_kl            | 0.13850494  |
|    clip_fraction        | 0.409       |
|    clip_range           | 0.2         |
|    entropy_loss         | -120        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0646      |
|    loss                 | -10.8       |
|    n_updates            | 910         |
|    policy_gradient_loss | 0.0338      |
|    reward               | -0.77520454 |
|    std                  | 2.27e+06    |
|    value_loss           | 1.46        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 126        |
|    iterations           | 93       

-----------------------------------------
| time/                   |             |
|    fps                  | 127         |
|    iterations           | 102         |
|    time_elapsed         | 102         |
|    total_timesteps      | 13056       |
| train/                  |             |
|    approx_kl            | 0.39198756  |
|    clip_fraction        | 0.51        |
|    clip_range           | 0.2         |
|    entropy_loss         | -131        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0646      |
|    loss                 | -12.3       |
|    n_updates            | 1010        |
|    policy_gradient_loss | 0.00116     |
|    reward               | 0.036208726 |
|    std                  | 9.87e+06    |
|    value_loss           | 0.157       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 128         |
|    iterations           | 103   

-----------------------------------------
| time/                   |             |
|    fps                  | 124         |
|    iterations           | 112         |
|    time_elapsed         | 115         |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.08521639  |
|    clip_fraction        | 0.438       |
|    clip_range           | 0.2         |
|    entropy_loss         | -150        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0646      |
|    loss                 | -13.7       |
|    n_updates            | 1110        |
|    policy_gradient_loss | 0.0193      |
|    reward               | -0.23108916 |
|    std                  | 1.1e+08     |
|    value_loss           | 1.04        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 121         |
|    iterations           | 113   

-----------------------------------------
| time/                   |             |
|    fps                  | 118         |
|    iterations           | 122         |
|    time_elapsed         | 131         |
|    total_timesteps      | 15616       |
| train/                  |             |
|    approx_kl            | 0.34587836  |
|    clip_fraction        | 0.523       |
|    clip_range           | 0.2         |
|    entropy_loss         | -163        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0646      |
|    loss                 | -15.2       |
|    n_updates            | 1210        |
|    policy_gradient_loss | 0.0227      |
|    reward               | -0.12504156 |
|    std                  | 6.67e+08    |
|    value_loss           | 0.125       |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 118          |
|    iterations           | 123

----------------------------------------
| time/                   |            |
|    fps                  | 116        |
|    iterations           | 132        |
|    time_elapsed         | 144        |
|    total_timesteps      | 16896      |
| train/                  |            |
|    approx_kl            | 0.23896825 |
|    clip_fraction        | 0.526      |
|    clip_range           | 0.2        |
|    entropy_loss         | -176       |
|    explained_variance   | 5.96e-08   |
|    learning_rate        | 0.0646     |
|    loss                 | -16.4      |
|    n_updates            | 1310       |
|    policy_gradient_loss | 0.0415     |
|    reward               | 0.18480964 |
|    std                  | 2.89e+09   |
|    value_loss           | 0.408      |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 117          |
|    iterations           | 133          |
|    tim

-----------------------------------------
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 142         |
|    time_elapsed         | 157         |
|    total_timesteps      | 18176       |
| train/                  |             |
|    approx_kl            | 0.046091594 |
|    clip_fraction        | 0.25        |
|    clip_range           | 0.2         |
|    entropy_loss         | -185        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0646      |
|    loss                 | -15.2       |
|    n_updates            | 1410        |
|    policy_gradient_loss | 0.0133      |
|    reward               | 0.015127885 |
|    std                  | 6.64e+09    |
|    value_loss           | 4.28        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 115        |
|    iterations           | 143      

-----------------------------------------
| time/                   |             |
|    fps                  | 117         |
|    iterations           | 152         |
|    time_elapsed         | 165         |
|    total_timesteps      | 19456       |
| train/                  |             |
|    approx_kl            | 0.15286905  |
|    clip_fraction        | 0.493       |
|    clip_range           | 0.2         |
|    entropy_loss         | -193        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0646      |
|    loss                 | -17.5       |
|    n_updates            | 1510        |
|    policy_gradient_loss | 0.0219      |
|    reward               | -0.09250541 |
|    std                  | 2.61e+10    |
|    value_loss           | 0.985       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 117        |
|    iterations           | 153      

----------------------------------------
| time/                   |            |
|    fps                  | 118        |
|    iterations           | 162        |
|    time_elapsed         | 175        |
|    total_timesteps      | 20736      |
| train/                  |            |
|    approx_kl            | 0.1804941  |
|    clip_fraction        | 0.341      |
|    clip_range           | 0.2        |
|    entropy_loss         | -205       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0646     |
|    loss                 | -18.7      |
|    n_updates            | 1610       |
|    policy_gradient_loss | 0.00543    |
|    reward               | 0.27567267 |
|    std                  | 1.83e+11   |
|    value_loss           | 1.12       |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 117          |
|    iterations           | 163          |
|    tim

-----------------------------------------
| time/                   |             |
|    fps                  | 118         |
|    iterations           | 172         |
|    time_elapsed         | 185         |
|    total_timesteps      | 22016       |
| train/                  |             |
|    approx_kl            | 0.058191743 |
|    clip_fraction        | 0.301       |
|    clip_range           | 0.2         |
|    entropy_loss         | -213        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0646      |
|    loss                 | -19.8       |
|    n_updates            | 1710        |
|    policy_gradient_loss | 0.0131      |
|    reward               | -0.26522234 |
|    std                  | 5.87e+11    |
|    value_loss           | 0.934       |
-----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 118       |
|    iterations           | 173       |


------------------------------------------
| time/                   |              |
|    fps                  | 119          |
|    iterations           | 182          |
|    time_elapsed         | 194          |
|    total_timesteps      | 23296        |
| train/                  |              |
|    approx_kl            | 0.24972843   |
|    clip_fraction        | 0.496        |
|    clip_range           | 0.2          |
|    entropy_loss         | -226         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0646       |
|    loss                 | -20.8        |
|    n_updates            | 1810         |
|    policy_gradient_loss | 0.0233       |
|    reward               | -0.051579777 |
|    std                  | 3.38e+12     |
|    value_loss           | 0.61         |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 119        |
|    iterations  

-----------------------------------------
| time/                   |             |
|    fps                  | 119         |
|    iterations           | 192         |
|    time_elapsed         | 205         |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.20513062  |
|    clip_fraction        | 0.475       |
|    clip_range           | 0.2         |
|    entropy_loss         | -235        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0646      |
|    loss                 | -18.5       |
|    n_updates            | 1910        |
|    policy_gradient_loss | 0.0263      |
|    reward               | 0.009674006 |
|    std                  | 9.33e+12    |
|    value_loss           | 7.47        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 119         |
|    iterations           | 193   

----------------------------------------
| time/                   |            |
|    fps                  | 119        |
|    iterations           | 202        |
|    time_elapsed         | 215        |
|    total_timesteps      | 25856      |
| train/                  |            |
|    approx_kl            | 0.27277318 |
|    clip_fraction        | 0.594      |
|    clip_range           | 0.2        |
|    entropy_loss         | -248       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0646     |
|    loss                 | -22.9      |
|    n_updates            | 2010       |
|    policy_gradient_loss | 0.0575     |
|    reward               | 0.13293844 |
|    std                  | 7.62e+13   |
|    value_loss           | 0.727      |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 120        |
|    iterations           | 203        |
|    time_elapse

-----------------------------------------
| time/                   |             |
|    fps                  | 121         |
|    iterations           | 212         |
|    time_elapsed         | 224         |
|    total_timesteps      | 27136       |
| train/                  |             |
|    approx_kl            | 0.05081427  |
|    clip_fraction        | 0.151       |
|    clip_range           | 0.2         |
|    entropy_loss         | -260        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0646      |
|    loss                 | -22.9       |
|    n_updates            | 2110        |
|    policy_gradient_loss | 0.00638     |
|    reward               | -0.19631757 |
|    std                  | 2.93e+14    |
|    value_loss           | 2.67        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 121         |
|    iterations           | 213   

-----------------------------------------
| time/                   |             |
|    fps                  | 121         |
|    iterations           | 222         |
|    time_elapsed         | 234         |
|    total_timesteps      | 28416       |
| train/                  |             |
|    approx_kl            | 0.8095094   |
|    clip_fraction        | 0.657       |
|    clip_range           | 0.2         |
|    entropy_loss         | -271        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0646      |
|    loss                 | -25.2       |
|    n_updates            | 2210        |
|    policy_gradient_loss | 0.0213      |
|    reward               | -0.20646684 |
|    std                  | 1.16e+15    |
|    value_loss           | 0.262       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 121         |
|    iterations           | 223   

-----------------------------------------
| time/                   |             |
|    fps                  | 122         |
|    iterations           | 232         |
|    time_elapsed         | 242         |
|    total_timesteps      | 29696       |
| train/                  |             |
|    approx_kl            | 0.268533    |
|    clip_fraction        | 0.467       |
|    clip_range           | 0.2         |
|    entropy_loss         | -289        |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0646      |
|    loss                 | -26.2       |
|    n_updates            | 2310        |
|    policy_gradient_loss | 0.0161      |
|    reward               | 0.054165367 |
|    std                  | 5.42e+15    |
|    value_loss           | 2.07        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 122         |
|    iterations           | 233   

------------------------------------------
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 242          |
|    time_elapsed         | 254          |
|    total_timesteps      | 30976        |
| train/                  |              |
|    approx_kl            | 0.0524618    |
|    clip_fraction        | 0.369        |
|    clip_range           | 0.2          |
|    entropy_loss         | -300         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0646       |
|    loss                 | -28          |
|    n_updates            | 2410         |
|    policy_gradient_loss | 0.00636      |
|    reward               | -0.066058345 |
|    std                  | 1.81e+16     |
|    value_loss           | 0.386        |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 121        |
|    iterations  

----------------------------------------
| time/                   |            |
|    fps                  | 121        |
|    iterations           | 252        |
|    time_elapsed         | 264        |
|    total_timesteps      | 32256      |
| train/                  |            |
|    approx_kl            | 0.1264719  |
|    clip_fraction        | 0.395      |
|    clip_range           | 0.2        |
|    entropy_loss         | -311       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0646     |
|    loss                 | -29.1      |
|    n_updates            | 2510       |
|    policy_gradient_loss | 0.0255     |
|    reward               | -0.3804767 |
|    std                  | 6.93e+16   |
|    value_loss           | 0.704      |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 253          |
|    tim

----------------------------------------
| time/                   |            |
|    fps                  | 121        |
|    iterations           | 262        |
|    time_elapsed         | 275        |
|    total_timesteps      | 33536      |
| train/                  |            |
|    approx_kl            | 0.05534294 |
|    clip_fraction        | 0.267      |
|    clip_range           | 0.2        |
|    entropy_loss         | -322       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0646     |
|    loss                 | -29.2      |
|    n_updates            | 2610       |
|    policy_gradient_loss | 0.00289    |
|    reward               | 1.1652516  |
|    std                  | 2.44e+17   |
|    value_loss           | 2.39       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 121        |
|    iterations           | 263        |
|    time_elapse

----------------------------------------
| time/                   |            |
|    fps                  | 122        |
|    iterations           | 272        |
|    time_elapsed         | 284        |
|    total_timesteps      | 34816      |
| train/                  |            |
|    approx_kl            | 0.28652757 |
|    clip_fraction        | 0.533      |
|    clip_range           | 0.2        |
|    entropy_loss         | -335       |
|    explained_variance   | -1.19e-07  |
|    learning_rate        | 0.0646     |
|    loss                 | -30.9      |
|    n_updates            | 2710       |
|    policy_gradient_loss | 0.0251     |
|    reward               | 0.10180222 |
|    std                  | 1.22e+18   |
|    value_loss           | 0.366      |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 122         |
|    iterations           | 273         |
|    time_el

0,1
entropy_loss,████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
loss,▂▃▃█▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
value_loss,▁▂▃█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
entropy_loss,-337.49808
loss,-31.61792
value_loss,0.14876


Run 3shcnosf errored: ValueError('Expected parameter loc (Tensor of shape (128, 8)) of distribution Normal(loc: torch.Size([128, 8]), scale: torch.Size([128, 8])) to satisfy the constraint Real(), but found invalid values:\ntensor([[nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        ...,\n        [nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan]], grad_fn=<AddmmBackward0>)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 3shcnosf errored: ValueError('Expected parameter loc (Tensor of shape (128, 8)) of distribution Normal(loc: torch.Size([128, 8]), scale: torch.Size([128, 8])) to satisfy the constraint Real(), but found invalid values:\ntensor([[nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        ...,\n        [nan, nan, nan,  ..., nan

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016759645833326432, max=1.0…

{'ent_coef': 0.02484215075396824, 'n_steps': 1024, 'learning_rate': 0.08856754659991965, 'batch_size': 568}
Using cpu device


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1024 and n_envs=1)
  f"You have specified a mini-batch size of {batch_size},"


------------------------------------
| time/              |             |
|    fps             | 120         |
|    iterations      | 1           |
|    time_elapsed    | 8           |
|    total_timesteps | 1024        |
| train/             |             |
|    reward          | 0.112849265 |
------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 111       |
|    iterations           | 2         |
|    time_elapsed         | 18        |
|    total_timesteps      | 2048      |
| train/                  |           |
|    approx_kl            | 299.25885 |
|    clip_fraction        | 0.946     |
|    clip_range           | 0.2       |
|    entropy_loss         | -13.2     |
|    explained_variance   | 1.19e-07  |
|    learning_rate        | 0.0886    |
|    loss                 | 0.963     |
|    n_updates            | 10        |
|    policy_gradient_loss | 0.267     |
|    reward               |

-----------------------------------------
| time/                   |             |
|    fps                  | 119         |
|    iterations           | 12          |
|    time_elapsed         | 102         |
|    total_timesteps      | 12288       |
| train/                  |             |
|    approx_kl            | 0.04240436  |
|    clip_fraction        | 0.591       |
|    clip_range           | 0.2         |
|    entropy_loss         | -39         |
|    explained_variance   | 0           |
|    learning_rate        | 0.0886      |
|    loss                 | 22.5        |
|    n_updates            | 110         |
|    policy_gradient_loss | 0.0495      |
|    reward               | -0.09619104 |
|    std                  | 49.3        |
|    value_loss           | 50.5        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 119        |
|    iterations           | 13       

-----------------------------------------
| time/                   |             |
|    fps                  | 106         |
|    iterations           | 22          |
|    time_elapsed         | 210         |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.019929033 |
|    clip_fraction        | 0.15        |
|    clip_range           | 0.2         |
|    entropy_loss         | -40.3       |
|    explained_variance   | 0.0261      |
|    learning_rate        | 0.0886      |
|    loss                 | 88.1        |
|    n_updates            | 210         |
|    policy_gradient_loss | -0.00488    |
|    reward               | -0.31297576 |
|    std                  | 60.7        |
|    value_loss           | 204         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 107         |
|    iterations           | 23    

-----------------------------------------
| time/                   |             |
|    fps                  | 109         |
|    iterations           | 32          |
|    time_elapsed         | 298         |
|    total_timesteps      | 32768       |
| train/                  |             |
|    approx_kl            | 0.018589709 |
|    clip_fraction        | 0.273       |
|    clip_range           | 0.2         |
|    entropy_loss         | -42.4       |
|    explained_variance   | 0.642       |
|    learning_rate        | 0.0886      |
|    loss                 | 6.66        |
|    n_updates            | 310         |
|    policy_gradient_loss | 0.00234     |
|    reward               | -0.6487618  |
|    std                  | 82.4        |
|    value_loss           | 13.6        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 33       

-----------------------------------------
| time/                   |             |
|    fps                  | 104         |
|    iterations           | 42          |
|    time_elapsed         | 409         |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.020598033 |
|    clip_fraction        | 0.158       |
|    clip_range           | 0.2         |
|    entropy_loss         | -45.6       |
|    explained_variance   | -8.34e-07   |
|    learning_rate        | 0.0886      |
|    loss                 | 20.6        |
|    n_updates            | 410         |
|    policy_gradient_loss | 0.000798    |
|    reward               | -0.78676593 |
|    std                  | 129         |
|    value_loss           | 44          |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 103         |
|    iterations           | 43    

0,1
entropy_loss,█▆▅▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
loss,▁▂▃▁▂▁▃▆▁▇▁▂▇▂▆▁▂▆▂▇▁▂▁▂▆▁▆▁▂▇▁▆▁▂▆▂█▁▂▁
value_loss,▁▂▃▁▂▁▃▆▁▇▁▂▇▂▆▁▂▆▂▇▁▂▁▂▆▁▆▁▂▇▁▆▁▂▆▂█▂▂▁

0,1
entropy_loss,-47.29173
loss,7.73088
value_loss,17.80465


[34m[1mwandb[0m: Agent Starting Run: ealt04l1 with config:
[34m[1mwandb[0m: 	batch_size: 228
[34m[1mwandb[0m: 	ent_coef: 0.06400806967973292
[34m[1mwandb[0m: 	learning_rate: 0.0957611758186866
[34m[1mwandb[0m: 	n_steps: 4098
[34m[1mwandb[0m: 	total_timesteps: 300000


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016837843050006997, max=1.0…

{'ent_coef': 0.06400806967973292, 'n_steps': 4098, 'learning_rate': 0.0957611758186866, 'batch_size': 228}
Using cpu device


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=4098 and n_envs=1)
  f"You have specified a mini-batch size of {batch_size},"


-----------------------------------
| time/              |            |
|    fps             | 89         |
|    iterations      | 1          |
|    time_elapsed    | 45         |
|    total_timesteps | 4098       |
| train/             |            |
|    reward          | 0.14023721 |
-----------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 99        |
|    iterations           | 2         |
|    time_elapsed         | 82        |
|    total_timesteps      | 8196      |
| train/                  |           |
|    approx_kl            | 90.10683  |
|    clip_fraction        | 0.994     |
|    clip_range           | 0.2       |
|    entropy_loss         | -60.4     |
|    explained_variance   | 0         |
|    learning_rate        | 0.0958    |
|    loss                 | -6.34     |
|    n_updates            | 10        |
|    policy_gradient_loss | 0.27      |
|    reward               | 0.670228

day: 3060, episode: 290
begin_total_asset: 1000000.00
end_total_asset: 1452199.55
total_reward: 452199.55
total_cost: 33217.04
total_trades: 24278
Sharpe: 0.393
----------------------------------------
| time/                   |            |
|    fps                  | 129        |
|    iterations           | 12         |
|    time_elapsed         | 379        |
|    total_timesteps      | 49176      |
| train/                  |            |
|    approx_kl            | 0.10418167 |
|    clip_fraction        | 0.476      |
|    clip_range           | 0.2        |
|    entropy_loss         | -311       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0958     |
|    loss                 | -18.9      |
|    n_updates            | 110        |
|    policy_gradient_loss | 0.0377     |
|    reward               | 0.0995701  |
|    std                  | 3.49e+16   |
|    value_loss           | 2.23       |
----------------------------------------
-------------------

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
entropy_loss,█▆▅▅▅▄▄▃▃▂▂▂▂▂▂▂▁
loss,█▆▅▄▅▄▃▃▃▂▂▃▂▂▂▁▁
value_loss,▂▄▇▂▄▂▃▃▇▃▃█▃▄▆▁▄

0,1
entropy_loss,-346.74887
loss,-20.4593
value_loss,2.90796


Run ealt04l1 errored: ValueError('Expected parameter loc (Tensor of shape (228, 8)) of distribution Normal(loc: torch.Size([228, 8]), scale: torch.Size([228, 8])) to satisfy the constraint Real(), but found invalid values:\ntensor([[nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        ...,\n        [nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan]], grad_fn=<AddmmBackward0>)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run ealt04l1 errored: ValueError('Expected parameter loc (Tensor of shape (228, 8)) of distribution Normal(loc: torch.Size([228, 8]), scale: torch.Size([228, 8])) to satisfy the constraint Real(), but found invalid values:\ntensor([[nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        [nan, nan, nan,  ..., nan, nan, nan],\n        ...,\n        [nan, nan, nan,  ..., nan

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016821409016665713, max=1.0…

{'ent_coef': 0.014870663587835365, 'n_steps': 2048, 'learning_rate': 0.03719362997945023, 'batch_size': 408}
Using cpu device


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2048 and n_envs=1)
  f"You have specified a mini-batch size of {batch_size},"


-----------------------------------
| time/              |            |
|    fps             | 131        |
|    iterations      | 1          |
|    time_elapsed    | 15         |
|    total_timesteps | 2048       |
| train/             |            |
|    reward          | 0.14328596 |
-----------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 135       |
|    iterations           | 2         |
|    time_elapsed         | 30        |
|    total_timesteps      | 4096      |
| train/                  |           |
|    approx_kl            | 15.356715 |
|    clip_fraction        | 0.982     |
|    clip_range           | 0.2       |
|    entropy_loss         | -14.9     |
|    explained_variance   | 0         |
|    learning_rate        | 0.0372    |
|    loss                 | 0.1       |
|    n_updates            | 10        |
|    policy_gradient_loss | 0.295     |
|    reward               | -1.09828

------------------------------------------
| time/                   |              |
|    fps                  | 55           |
|    iterations           | 12           |
|    time_elapsed         | 440          |
|    total_timesteps      | 24576        |
| train/                  |              |
|    approx_kl            | 0.08541      |
|    clip_fraction        | 0.631        |
|    clip_range           | 0.2          |
|    entropy_loss         | -31.9        |
|    explained_variance   | 0.594        |
|    learning_rate        | 0.0372       |
|    loss                 | 13.8         |
|    n_updates            | 110          |
|    policy_gradient_loss | 0.0799       |
|    reward               | -0.024545055 |
|    std                  | 16           |
|    value_loss           | 82.3         |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 59         |
|    iterations  

-----------------------------------------
| time/                   |             |
|    fps                  | 79          |
|    iterations           | 22          |
|    time_elapsed         | 566         |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.094988294 |
|    clip_fraction        | 0.67        |
|    clip_range           | 0.2         |
|    entropy_loss         | -34.2       |
|    explained_variance   | 0.292       |
|    learning_rate        | 0.0372      |
|    loss                 | 13          |
|    n_updates            | 210         |
|    policy_gradient_loss | 0.0677      |
|    reward               | -2.6540387  |
|    std                  | 21.7        |
|    value_loss           | 113         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 80          |
|    iterations           | 23    

-----------------------------------------
| time/                   |             |
|    fps                  | 85          |
|    iterations           | 32          |
|    time_elapsed         | 763         |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.09317931  |
|    clip_fraction        | 0.404       |
|    clip_range           | 0.2         |
|    entropy_loss         | -35.8       |
|    explained_variance   | 0.84        |
|    learning_rate        | 0.0372      |
|    loss                 | 5.94        |
|    n_updates            | 310         |
|    policy_gradient_loss | 0.033       |
|    reward               | -0.31424832 |
|    std                  | 26.5        |
|    value_loss           | 20.5        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 33    

-----------------------------------------
| time/                   |             |
|    fps                  | 93          |
|    iterations           | 42          |
|    time_elapsed         | 924         |
|    total_timesteps      | 86016       |
| train/                  |             |
|    approx_kl            | 0.09294591  |
|    clip_fraction        | 0.605       |
|    clip_range           | 0.2         |
|    entropy_loss         | -37.6       |
|    explained_variance   | 0.647       |
|    learning_rate        | 0.0372      |
|    loss                 | 11.3        |
|    n_updates            | 410         |
|    policy_gradient_loss | 0.0661      |
|    reward               | -0.57562315 |
|    std                  | 33.6        |
|    value_loss           | 99.1        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 93          |
|    iterations           | 43    

----------------------------------------
| time/                   |            |
|    fps                  | 100        |
|    iterations           | 52         |
|    time_elapsed         | 1057       |
|    total_timesteps      | 106496     |
| train/                  |            |
|    approx_kl            | 0.03743129 |
|    clip_fraction        | 0.365      |
|    clip_range           | 0.2        |
|    entropy_loss         | -39.4      |
|    explained_variance   | 0.552      |
|    learning_rate        | 0.0372     |
|    loss                 | 21.9       |
|    n_updates            | 510        |
|    policy_gradient_loss | 0.0255     |
|    reward               | 0.18828982 |
|    std                  | 43         |
|    value_loss           | 98.1       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 101        |
|    iterations           | 53         |
|    time_elapse

-----------------------------------------
| time/                   |             |
|    fps                  | 109         |
|    iterations           | 62          |
|    time_elapsed         | 1157        |
|    total_timesteps      | 126976      |
| train/                  |             |
|    approx_kl            | 0.010724747 |
|    clip_fraction        | 0.303       |
|    clip_range           | 0.2         |
|    entropy_loss         | -41.6       |
|    explained_variance   | 0.881       |
|    learning_rate        | 0.0372      |
|    loss                 | 5.03        |
|    n_updates            | 610         |
|    policy_gradient_loss | 0.0142      |
|    reward               | 1.3609204   |
|    std                  | 52.4        |
|    value_loss           | 20.2        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 110        |
|    iterations           | 63       

In [2]:
print(perf_stats_all)

                            0
Annual return        0.021349
Cumulative returns   0.039661
Annual volatility    0.066063
Sharpe ratio         0.353407
Calmar ratio         0.201586
Stability            0.425234
Max drawdown        -0.105903
Omega ratio          1.070212
Sortino ratio        0.517351
Skew                      NaN
Kurtosis                  NaN
Tail ratio           1.169491
Daily value at risk -0.008231


In [11]:
#trading



hit end!
           date  account_value
459  2021-12-23   1.140401e+06
460  2021-12-27   1.151409e+06
461  2021-12-28   1.153059e+06
462  2021-12-29   1.153755e+06
463  2021-12-30   1.151230e+06


Unnamed: 0_level_0,AAPL,CAT,DIS,GS,INTC,JPM,MMM,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-03-17,4,0,3,0,0,0,2,1
2021-03-18,4,0,3,0,0,0,2,1
2021-03-19,4,0,3,0,0,0,2,1
2021-03-22,4,0,3,0,0,0,2,1
2021-03-23,4,0,3,0,0,0,2,1
...,...,...,...,...,...,...,...,...
2021-12-22,4,0,3,0,0,0,2,1
2021-12-23,4,0,3,0,0,0,2,1
2021-12-27,4,0,3,0,0,0,2,1
2021-12-28,4,0,3,0,0,0,2,1


In [12]:
#Backtesting Results
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)
perf_stats_all.to_csv("./"+RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

Annual return          0.079487
Cumulative returns     0.151230
Annual volatility      0.063709
Sharpe ratio           1.235131
Calmar ratio           1.649271
Stability              0.929493
Max drawdown          -0.048195
Omega ratio            1.263219
Sortino ratio          1.801034
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.026593
Daily value at risk   -0.007714
dtype: float64


In [13]:
#df_actions.to_csv('action_ppo_s(h20)3.csv')


In [14]:
#a = perf_stats_all.to_dict()
#wandb.save(perf_stats_all)
#wandb.save(df_actions)
