## Import Dependencies

In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# trl: Transformer Reinforcement Learning library
from trl import PPOTrainer, PPOConfig, AutoModelForSeq2SeqLMWithValueHead, set_seed
from trl import create_reference_model

import torch
import evaluate

import numpy as np
import pandas as pd

# tqdm library makes the loops show a smart progress meter.
from tqdm import tqdm
tqdm.pandas()

## Load BioBart Model, Prepare Reward Model

In [2]:
model_name="./finetuned-checkpoints/biobart-base--mimic-cxr/checkpoint-19600"

model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

ppo_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(model, is_trainable=True)
ref_model = create_reference_model(ppo_model)

#from models.reward import get_reward_model
#reward_model = get_reward_model()

In [3]:
from radgraph import F1RadGraph

f1radgraph_reward = F1RadGraph(reward_level="partial")

## Build Dataset

In [4]:
from pathlib import Path
import datasets
from datasets import Image
from torchvision import transforms
import os.path

#dataset_config = 'mimic-cxr','mimic-iii'  
#split = 'train','validate',test
def build_dataset(dataset_config, tokenizer, split):
    def generate_image_path(line):
        return str(Path(data_path).joinpath(dataset_config).joinpath(line.strip().split(',')[0]))
    
    data_path = '/nfs/turbo/umms-vgvinodv/data/bioNLP23-Task-1B/data/'
    
    findings_file_path = Path(data_path).joinpath(dataset_config).joinpath(split+'.findings.tok')
    impression_file_path = Path(data_path).joinpath(dataset_config).joinpath(split+'.impression.tok')

    findings = [line.strip() for line in open(findings_file_path).readlines()]
    impression = [line.strip() for line in open(impression_file_path).readlines()]
    
    dataset = datasets.Dataset.from_dict({"text":findings, "query":impression})   
    
    def tokenize(samples):
        input_text = [" ".join(['summarize:',text]) for text in samples["text"]]
        samples["input_ids"] = tokenizer(input_text)["input_ids"]
        return samples
    
    dataset = dataset.map(tokenize, batched=True, num_proc=4, remove_columns=["text"])
    
    dataset.set_format(type="torch")
    return dataset

dataset_config = "mimic-cxr"
tokenized_train_data = build_dataset(dataset_config,tokenizer,"train")

Map (num_proc=4):   0%|          | 0/125417 [00:00<?, ? examples/s]

In [5]:
print(tokenized_train_data)

Dataset({
    features: ['query', 'input_ids'],
    num_rows: 125417
})


In [6]:
print(tokenized_train_data[0])

{'query': 'No acute cardiopulmonary process.', 'input_ids': tensor([    0, 18581,  3916,  2072,    35,   345,    16,   117, 22628, 13581,
            6, 16415,  9799, 22089, 15727,    50, 45001,  6157,   368,  3631,
            4,   163, 16908, 12029,  8244,  5963,  1043,  2192,    14,   144,
          533,  3594, 42490, 21841,     4,    20,  1886,   118, 12781,   118,
         1988,  6204, 33585,    16,  2340,     4,  2893,  7418,   695,    81,
            5,   314, 10665,     6,  2905,   624,     5,  6181,     4,    20,
         4356,  4628,  2853, 28398,    16, 40618, 36774,     4, 38207, 42011,
         1571,     9,     5, 41834,   314,  2958,     8,  3821, 21443,    32,
         1581,     4,     2])}


In [7]:
def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

In [8]:
learning_rate=1.41e-5
max_ppo_epochs=1
mini_batch_size=8
batch_size=32

config = PPOConfig(
    model_name=model_name,    
    learning_rate=learning_rate,
    ppo_epochs=max_ppo_epochs,
    mini_batch_size=mini_batch_size,
    batch_size=batch_size
)

ppo_trainer = PPOTrainer(config=config, 
                         model=ppo_model, 
                         ref_model=ref_model, 
                         tokenizer=tokenizer, 
                         dataset=tokenized_train_data, 
                         data_collator=collator)

In [9]:
generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "max_new_tokens": 512,
}

for step, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    prompt_tensors = batch["input_ids"]
    #print(batch["input_ids"])
    #print(batch["query"])
    #break

    # Get response from FLAN-T5/PEFT LLM.
    summary_tensors = []

    for prompt_tensor in prompt_tensors:        
        summary = ppo_trainer.generate(prompt_tensor, **generation_kwargs)        
        summary_tensors.append(summary.squeeze())
    
    # This needs to be called "response".
    batch["response"] = [tokenizer.decode(r.squeeze(),skip_special_tokens=True) for r in summary_tensors]
    
    #print(batch["response"])
    #break
    
    # Compute reward outputs.    
    rewards = f1radgraph_reward(hyps=batch["response"], refs=batch["query"])[1]
    reward_tensors = [torch.tensor(reward) for reward in rewards] 
    
    #print(reward_tensors)
    #break

    # Run PPO step.
    stats = ppo_trainer.step(prompt_tensors, summary_tensors, reward_tensors)
    ppo_trainer.log_stats(stats, batch, reward_tensors)
    
    print(f'objective/kl: {stats["objective/kl"]}')
    print(f'ppo/returns/mean: {stats["ppo/returns/mean"]}')
    print(f'ppo/policy/advantages_mean: {stats["ppo/policy/advantages_mean"]}')
    print('-'.join('' for x in range(100)))

0it [00:00, ?it/s]You're using a BartTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
1it [00:08,  8.30s/it]

objective/kl: 0.0
ppo/returns/mean: 0.5267552137374878
ppo/policy/advantages_mean: 0.0150148905813694
---------------------------------------------------------------------------------------------------


2it [00:14,  7.36s/it]

objective/kl: -0.0616590790450573
ppo/returns/mean: 0.5280265808105469
ppo/policy/advantages_mean: -0.02006658911705017
---------------------------------------------------------------------------------------------------


3it [00:24,  8.48s/it]

objective/kl: 0.2601274251937866
ppo/returns/mean: 0.4170551598072052
ppo/policy/advantages_mean: -0.03460247814655304
---------------------------------------------------------------------------------------------------


4it [00:31,  7.97s/it]

objective/kl: 0.447507381439209
ppo/returns/mean: 0.4807811379432678
ppo/policy/advantages_mean: 0.013372547924518585
---------------------------------------------------------------------------------------------------


5it [00:40,  8.03s/it]

objective/kl: 0.6174232363700867
ppo/returns/mean: 0.4594668447971344
ppo/policy/advantages_mean: 0.02052750252187252
---------------------------------------------------------------------------------------------------


6it [00:47,  7.82s/it]

objective/kl: 0.4928346276283264
ppo/returns/mean: 0.48939836025238037
ppo/policy/advantages_mean: 0.015052370727062225
---------------------------------------------------------------------------------------------------


7it [00:54,  7.70s/it]

objective/kl: 0.4346890449523926
ppo/returns/mean: 0.3601510524749756
ppo/policy/advantages_mean: -0.02631155215203762
---------------------------------------------------------------------------------------------------


8it [01:04,  8.22s/it]

objective/kl: 0.8526222705841064
ppo/returns/mean: 0.3559178113937378
ppo/policy/advantages_mean: 0.005494534969329834
---------------------------------------------------------------------------------------------------


9it [01:11,  8.05s/it]

objective/kl: 0.6051822900772095
ppo/returns/mean: 0.34942275285720825
ppo/policy/advantages_mean: 0.0118586840108037
---------------------------------------------------------------------------------------------------


10it [01:20,  8.07s/it]

objective/kl: 0.6638686656951904
ppo/returns/mean: 0.31329962611198425
ppo/policy/advantages_mean: 0.01140737533569336
---------------------------------------------------------------------------------------------------


11it [01:27,  7.88s/it]

objective/kl: 0.9391096830368042
ppo/returns/mean: 0.3485705852508545
ppo/policy/advantages_mean: 0.0858340710401535
---------------------------------------------------------------------------------------------------


12it [01:34,  7.48s/it]

objective/kl: 0.9632031917572021
ppo/returns/mean: 0.36163103580474854
ppo/policy/advantages_mean: -0.0012590251863002777
---------------------------------------------------------------------------------------------------


13it [01:42,  7.65s/it]

objective/kl: 0.3593898415565491
ppo/returns/mean: 0.36145666241645813
ppo/policy/advantages_mean: -0.03851776570081711
---------------------------------------------------------------------------------------------------


14it [01:49,  7.65s/it]

objective/kl: -0.09312435984611511
ppo/returns/mean: 0.413155734539032
ppo/policy/advantages_mean: 0.00019887881353497505
---------------------------------------------------------------------------------------------------


15it [02:00,  8.66s/it]

objective/kl: 0.18161641061306
ppo/returns/mean: 0.3837253153324127
ppo/policy/advantages_mean: 0.054766975343227386
---------------------------------------------------------------------------------------------------


16it [02:08,  8.46s/it]

objective/kl: -0.14140279591083527
ppo/returns/mean: 0.44878089427948
ppo/policy/advantages_mean: 0.0032177050597965717
---------------------------------------------------------------------------------------------------


17it [02:16,  8.29s/it]

objective/kl: -0.2945861220359802
ppo/returns/mean: 0.5389434099197388
ppo/policy/advantages_mean: -0.0027230512350797653
---------------------------------------------------------------------------------------------------


18it [02:24,  8.10s/it]

objective/kl: 0.10799562931060791
ppo/returns/mean: 0.5901253819465637
ppo/policy/advantages_mean: -0.013219920918345451
---------------------------------------------------------------------------------------------------


19it [02:34,  8.72s/it]

objective/kl: -0.7591397166252136
ppo/returns/mean: 0.5450824499130249
ppo/policy/advantages_mean: -0.004472721368074417
---------------------------------------------------------------------------------------------------


20it [02:43,  8.76s/it]

objective/kl: -0.028402596712112427
ppo/returns/mean: 0.4479163885116577
ppo/policy/advantages_mean: -0.020446643233299255
---------------------------------------------------------------------------------------------------


21it [02:54,  9.50s/it]

objective/kl: 0.1420191526412964
ppo/returns/mean: 0.49016955494880676
ppo/policy/advantages_mean: -0.0023673027753829956
---------------------------------------------------------------------------------------------------


22it [03:06, 10.36s/it]

objective/kl: -0.5515356659889221
ppo/returns/mean: 0.4677821695804596
ppo/policy/advantages_mean: 0.046281978487968445
---------------------------------------------------------------------------------------------------


23it [03:18, 10.77s/it]

objective/kl: 0.06493102014064789
ppo/returns/mean: 0.46287864446640015
ppo/policy/advantages_mean: -0.022775322198867798
---------------------------------------------------------------------------------------------------


24it [03:30, 10.98s/it]

objective/kl: -0.4590589702129364
ppo/returns/mean: 0.5460917949676514
ppo/policy/advantages_mean: 0.009029455482959747
---------------------------------------------------------------------------------------------------


25it [03:41, 10.99s/it]

objective/kl: -0.004128128290176392
ppo/returns/mean: 0.5376606583595276
ppo/policy/advantages_mean: -0.0305989570915699
---------------------------------------------------------------------------------------------------


26it [03:52, 11.15s/it]

objective/kl: -0.5099937915802002
ppo/returns/mean: 0.4593430757522583
ppo/policy/advantages_mean: 0.011503521353006363
---------------------------------------------------------------------------------------------------


27it [04:04, 11.30s/it]

objective/kl: -1.6495065689086914
ppo/returns/mean: 0.5513890981674194
ppo/policy/advantages_mean: -0.005777647718787193
---------------------------------------------------------------------------------------------------


28it [04:14, 10.99s/it]

objective/kl: -1.5580155849456787
ppo/returns/mean: 0.6591747999191284
ppo/policy/advantages_mean: 0.03342143073678017
---------------------------------------------------------------------------------------------------


29it [04:24, 10.56s/it]

objective/kl: -0.8949955701828003
ppo/returns/mean: 0.5506173372268677
ppo/policy/advantages_mean: -0.01579529419541359
---------------------------------------------------------------------------------------------------


30it [04:33, 10.22s/it]

objective/kl: -0.6760386824607849
ppo/returns/mean: 0.6511843800544739
ppo/policy/advantages_mean: -0.004096552729606628
---------------------------------------------------------------------------------------------------


31it [04:42,  9.96s/it]

objective/kl: -0.399725079536438
ppo/returns/mean: 0.514212429523468
ppo/policy/advantages_mean: -0.019357630982995033
---------------------------------------------------------------------------------------------------


32it [04:51,  9.46s/it]

objective/kl: 0.05038077384233475
ppo/returns/mean: 0.5560387969017029
ppo/policy/advantages_mean: -0.001652248203754425
---------------------------------------------------------------------------------------------------


33it [05:02,  9.99s/it]

objective/kl: -0.3583081364631653
ppo/returns/mean: 0.5428758859634399
ppo/policy/advantages_mean: -0.01986437477171421
---------------------------------------------------------------------------------------------------


34it [05:09,  9.18s/it]

objective/kl: -0.28599950671195984
ppo/returns/mean: 0.5993366241455078
ppo/policy/advantages_mean: 0.020767301321029663
---------------------------------------------------------------------------------------------------


35it [05:20,  9.74s/it]

objective/kl: -0.18561983108520508
ppo/returns/mean: 0.4095754623413086
ppo/policy/advantages_mean: -0.01872251182794571
---------------------------------------------------------------------------------------------------


36it [05:31, 10.04s/it]

objective/kl: -0.6140936017036438
ppo/returns/mean: 0.5904093980789185
ppo/policy/advantages_mean: -0.003260302357375622
---------------------------------------------------------------------------------------------------


37it [05:41, 10.01s/it]

objective/kl: -0.46194711327552795
ppo/returns/mean: 0.6282437443733215
ppo/policy/advantages_mean: 0.041657060384750366
---------------------------------------------------------------------------------------------------


38it [05:51,  9.88s/it]

objective/kl: -0.26025763154029846
ppo/returns/mean: 0.5586228370666504
ppo/policy/advantages_mean: -0.00527684111148119
---------------------------------------------------------------------------------------------------


39it [06:01, 10.12s/it]

objective/kl: -0.7221450805664062
ppo/returns/mean: 0.5437317490577698
ppo/policy/advantages_mean: 0.01759791374206543
---------------------------------------------------------------------------------------------------


40it [06:12, 10.32s/it]

objective/kl: -1.5634404420852661
ppo/returns/mean: 0.6152448654174805
ppo/policy/advantages_mean: -0.001255047507584095
---------------------------------------------------------------------------------------------------


41it [06:22, 10.13s/it]

objective/kl: -1.1732131242752075
ppo/returns/mean: 0.6763641834259033
ppo/policy/advantages_mean: 0.016710881143808365
---------------------------------------------------------------------------------------------------


42it [06:31, 10.01s/it]

objective/kl: -1.5031253099441528
ppo/returns/mean: 0.6681853532791138
ppo/policy/advantages_mean: 0.004927687346935272
---------------------------------------------------------------------------------------------------


43it [06:42, 10.10s/it]

objective/kl: -1.5753588676452637
ppo/returns/mean: 0.6929349899291992
ppo/policy/advantages_mean: -0.0048222970217466354
---------------------------------------------------------------------------------------------------


44it [06:56, 11.33s/it]

objective/kl: -1.5198091268539429
ppo/returns/mean: 0.7568854093551636
ppo/policy/advantages_mean: 0.012718036770820618
---------------------------------------------------------------------------------------------------


45it [07:10, 11.99s/it]

objective/kl: -1.4996092319488525
ppo/returns/mean: 0.7259559035301208
ppo/policy/advantages_mean: 0.009840035811066628
---------------------------------------------------------------------------------------------------


46it [07:25, 13.15s/it]

objective/kl: -0.9507786631584167
ppo/returns/mean: 0.6552135944366455
ppo/policy/advantages_mean: 0.004326485097408295
---------------------------------------------------------------------------------------------------


47it [07:43, 14.36s/it]

objective/kl: -1.2876657247543335
ppo/returns/mean: 0.693859875202179
ppo/policy/advantages_mean: -0.023537028580904007
---------------------------------------------------------------------------------------------------


48it [08:00, 15.28s/it]

objective/kl: -1.1549830436706543
ppo/returns/mean: 0.6603123545646667
ppo/policy/advantages_mean: 9.790342301130295e-05
---------------------------------------------------------------------------------------------------


49it [08:15, 15.18s/it]

objective/kl: -1.4367729425430298
ppo/returns/mean: 0.683500349521637
ppo/policy/advantages_mean: 0.0064255837351083755
---------------------------------------------------------------------------------------------------


50it [08:28, 14.68s/it]

objective/kl: -0.991942286491394
ppo/returns/mean: 0.5641770958900452
ppo/policy/advantages_mean: -0.008543793112039566
---------------------------------------------------------------------------------------------------


51it [08:40, 13.82s/it]

objective/kl: -0.9770395159721375
ppo/returns/mean: 0.5963559150695801
ppo/policy/advantages_mean: -0.0034914519637823105
---------------------------------------------------------------------------------------------------


52it [08:52, 13.10s/it]

objective/kl: -1.7799625396728516
ppo/returns/mean: 0.6665254831314087
ppo/policy/advantages_mean: 0.0016519390046596527
---------------------------------------------------------------------------------------------------


53it [09:00, 11.81s/it]

objective/kl: -1.6331253051757812
ppo/returns/mean: 0.7649340629577637
ppo/policy/advantages_mean: -0.015886560082435608
---------------------------------------------------------------------------------------------------


54it [09:11, 11.30s/it]

objective/kl: -1.54524564743042
ppo/returns/mean: 0.6971887350082397
ppo/policy/advantages_mean: -0.011074826121330261
---------------------------------------------------------------------------------------------------


55it [09:24, 11.89s/it]

objective/kl: -1.6814417839050293
ppo/returns/mean: 0.6489653587341309
ppo/policy/advantages_mean: -0.05931863933801651
---------------------------------------------------------------------------------------------------


56it [09:37, 12.17s/it]

objective/kl: -1.7122420072555542
ppo/returns/mean: 0.7585437297821045
ppo/policy/advantages_mean: 0.028761224821209908
---------------------------------------------------------------------------------------------------


57it [09:49, 12.35s/it]

objective/kl: -1.2281758785247803
ppo/returns/mean: 0.7541275024414062
ppo/policy/advantages_mean: 0.02374233491718769
---------------------------------------------------------------------------------------------------


58it [10:00, 11.78s/it]

objective/kl: -1.391335129737854
ppo/returns/mean: 0.7510999441146851
ppo/policy/advantages_mean: -0.012521320953965187
---------------------------------------------------------------------------------------------------


59it [10:14, 12.55s/it]

objective/kl: -1.0626959800720215
ppo/returns/mean: 0.6887475252151489
ppo/policy/advantages_mean: -0.002054746961221099
---------------------------------------------------------------------------------------------------


60it [10:27, 12.63s/it]

objective/kl: -1.33425772190094
ppo/returns/mean: 0.8032708168029785
ppo/policy/advantages_mean: 0.03705165907740593
---------------------------------------------------------------------------------------------------


61it [10:41, 12.92s/it]

objective/kl: -1.443642258644104
ppo/returns/mean: 0.7721732258796692
ppo/policy/advantages_mean: 0.027509553357958794
---------------------------------------------------------------------------------------------------


62it [10:54, 13.03s/it]

objective/kl: -1.1571197509765625
ppo/returns/mean: 0.7247776389122009
ppo/policy/advantages_mean: 0.020592981949448586
---------------------------------------------------------------------------------------------------


63it [11:06, 12.62s/it]

objective/kl: -0.4807780981063843
ppo/returns/mean: 0.6282528042793274
ppo/policy/advantages_mean: -0.03550293296575546
---------------------------------------------------------------------------------------------------


64it [11:15, 11.69s/it]

objective/kl: -1.7470341920852661
ppo/returns/mean: 0.7939430475234985
ppo/policy/advantages_mean: -0.0068366434425115585
---------------------------------------------------------------------------------------------------


65it [11:25, 11.19s/it]

objective/kl: -1.4706357717514038
ppo/returns/mean: 0.8108559250831604
ppo/policy/advantages_mean: -0.04195154085755348
---------------------------------------------------------------------------------------------------


66it [11:35, 10.87s/it]

objective/kl: -3.0012807846069336
ppo/returns/mean: 0.8281902074813843
ppo/policy/advantages_mean: -0.0045660571195185184
---------------------------------------------------------------------------------------------------


67it [11:48, 11.54s/it]

objective/kl: -2.6771092414855957
ppo/returns/mean: 0.7990913987159729
ppo/policy/advantages_mean: 0.008126802742481232
---------------------------------------------------------------------------------------------------


68it [12:03, 12.56s/it]

objective/kl: -2.086404800415039
ppo/returns/mean: 0.7885903716087341
ppo/policy/advantages_mean: -0.02017766237258911
---------------------------------------------------------------------------------------------------


69it [12:24, 15.05s/it]

objective/kl: -2.6379127502441406
ppo/returns/mean: 0.8359607458114624
ppo/policy/advantages_mean: -0.019542748108506203
---------------------------------------------------------------------------------------------------


70it [12:39, 14.87s/it]

objective/kl: -1.8793213367462158
ppo/returns/mean: 0.8458739519119263
ppo/policy/advantages_mean: 0.09758055210113525
---------------------------------------------------------------------------------------------------


71it [12:58, 16.32s/it]

objective/kl: -1.4397075176239014
ppo/returns/mean: 0.7845053672790527
ppo/policy/advantages_mean: -0.006586600095033646
---------------------------------------------------------------------------------------------------


72it [13:18, 17.43s/it]

objective/kl: -1.5604491233825684
ppo/returns/mean: 0.7762390971183777
ppo/policy/advantages_mean: -0.00034676119685173035
---------------------------------------------------------------------------------------------------


73it [13:34, 16.97s/it]

objective/kl: -1.1210705041885376
ppo/returns/mean: 0.715211033821106
ppo/policy/advantages_mean: 0.016501832753419876
---------------------------------------------------------------------------------------------------


74it [13:46, 15.51s/it]

objective/kl: -1.9443068504333496
ppo/returns/mean: 0.8056647777557373
ppo/policy/advantages_mean: 0.02089846134185791
---------------------------------------------------------------------------------------------------


75it [14:01, 15.20s/it]

objective/kl: -1.4541819095611572
ppo/returns/mean: 0.7227985858917236
ppo/policy/advantages_mean: 0.0035407431423664093
---------------------------------------------------------------------------------------------------


76it [14:14, 14.56s/it]

objective/kl: -1.0778733491897583
ppo/returns/mean: 0.7234357595443726
ppo/policy/advantages_mean: -0.05719844251871109
---------------------------------------------------------------------------------------------------


77it [14:25, 13.44s/it]

objective/kl: -1.4673688411712646
ppo/returns/mean: 0.7104054093360901
ppo/policy/advantages_mean: -0.002076803706586361
---------------------------------------------------------------------------------------------------


78it [14:35, 12.56s/it]

objective/kl: -1.8283002376556396
ppo/returns/mean: 0.7415227890014648
ppo/policy/advantages_mean: -0.009147286415100098
---------------------------------------------------------------------------------------------------


79it [14:44, 11.51s/it]

objective/kl: -1.262743592262268
ppo/returns/mean: 0.7214829325675964
ppo/policy/advantages_mean: -0.016760192811489105
---------------------------------------------------------------------------------------------------


80it [14:54, 11.05s/it]

objective/kl: -1.2822389602661133
ppo/returns/mean: 0.7338069677352905
ppo/policy/advantages_mean: 0.006373940035700798
---------------------------------------------------------------------------------------------------


81it [15:08, 11.77s/it]

objective/kl: -0.4464561939239502
ppo/returns/mean: 0.6628580689430237
ppo/policy/advantages_mean: 0.0027691982686519623
---------------------------------------------------------------------------------------------------


82it [15:20, 12.06s/it]

objective/kl: -1.58591890335083
ppo/returns/mean: 0.691724419593811
ppo/policy/advantages_mean: 0.05132173374295235
---------------------------------------------------------------------------------------------------


83it [15:31, 11.67s/it]

objective/kl: -2.0202794075012207
ppo/returns/mean: 0.7324826121330261
ppo/policy/advantages_mean: 0.0038672499358654022
---------------------------------------------------------------------------------------------------


84it [15:42, 11.53s/it]

objective/kl: -2.146165132522583
ppo/returns/mean: 0.7169466018676758
ppo/policy/advantages_mean: 0.01806586980819702
---------------------------------------------------------------------------------------------------


85it [15:54, 11.53s/it]

objective/kl: -1.5822948217391968
ppo/returns/mean: 0.7749896049499512
ppo/policy/advantages_mean: 0.003012135624885559
---------------------------------------------------------------------------------------------------


86it [16:06, 11.81s/it]

objective/kl: -0.8087099194526672
ppo/returns/mean: 0.6686280965805054
ppo/policy/advantages_mean: -0.03423994034528732
---------------------------------------------------------------------------------------------------


87it [16:18, 11.69s/it]

objective/kl: -1.422277808189392
ppo/returns/mean: 0.7996482849121094
ppo/policy/advantages_mean: -0.00603809580206871
---------------------------------------------------------------------------------------------------


88it [16:34, 12.94s/it]

objective/kl: -2.482478141784668
ppo/returns/mean: 0.7825808525085449
ppo/policy/advantages_mean: 0.006221320480108261
---------------------------------------------------------------------------------------------------


89it [16:45, 12.53s/it]

objective/kl: -2.4128403663635254
ppo/returns/mean: 0.8492696285247803
ppo/policy/advantages_mean: -0.010511614382266998
---------------------------------------------------------------------------------------------------


90it [17:00, 13.33s/it]

objective/kl: -2.1270084381103516
ppo/returns/mean: 0.7341111898422241
ppo/policy/advantages_mean: -0.033264774829149246
---------------------------------------------------------------------------------------------------


91it [17:13, 13.25s/it]

objective/kl: -2.061938524246216
ppo/returns/mean: 0.740917444229126
ppo/policy/advantages_mean: -0.023139523342251778
---------------------------------------------------------------------------------------------------


92it [17:26, 12.96s/it]

objective/kl: -2.987795352935791
ppo/returns/mean: 0.9262752532958984
ppo/policy/advantages_mean: -0.04972357675433159
---------------------------------------------------------------------------------------------------


93it [17:44, 14.41s/it]

objective/kl: -3.4137985706329346
ppo/returns/mean: 0.9907602667808533
ppo/policy/advantages_mean: 0.024269215762615204
---------------------------------------------------------------------------------------------------


94it [18:04, 16.14s/it]

objective/kl: -3.347017526626587
ppo/returns/mean: 0.9355770349502563
ppo/policy/advantages_mean: 0.007806546986103058
---------------------------------------------------------------------------------------------------


95it [18:29, 18.82s/it]

objective/kl: -0.9573336243629456
ppo/returns/mean: 0.8183686137199402
ppo/policy/advantages_mean: 0.010271679610013962
---------------------------------------------------------------------------------------------------


96it [18:55, 20.97s/it]

objective/kl: -2.6872119903564453
ppo/returns/mean: 0.9305579662322998
ppo/policy/advantages_mean: 0.028464552015066147
---------------------------------------------------------------------------------------------------


97it [19:23, 23.01s/it]

objective/kl: 0.20921927690505981
ppo/returns/mean: 0.7652637958526611
ppo/policy/advantages_mean: 0.03234747052192688
---------------------------------------------------------------------------------------------------


98it [19:42, 21.94s/it]

objective/kl: -1.350314974784851
ppo/returns/mean: 0.753987193107605
ppo/policy/advantages_mean: 0.0027842186391353607
---------------------------------------------------------------------------------------------------


99it [19:59, 20.33s/it]

objective/kl: -2.1709916591644287
ppo/returns/mean: 0.7732476592063904
ppo/policy/advantages_mean: -0.0022934172302484512
---------------------------------------------------------------------------------------------------


100it [20:14, 18.95s/it]

objective/kl: -1.9746954441070557
ppo/returns/mean: 0.7302975058555603
ppo/policy/advantages_mean: 0.018338702619075775
---------------------------------------------------------------------------------------------------


101it [20:27, 17.19s/it]

objective/kl: -1.5396502017974854
ppo/returns/mean: 0.6551658511161804
ppo/policy/advantages_mean: -0.002171143889427185
---------------------------------------------------------------------------------------------------


102it [20:39, 15.43s/it]

objective/kl: -1.6994823217391968
ppo/returns/mean: 0.7536131739616394
ppo/policy/advantages_mean: -0.009098464623093605
---------------------------------------------------------------------------------------------------


103it [20:50, 14.05s/it]

objective/kl: -1.4287939071655273
ppo/returns/mean: 0.686443567276001
ppo/policy/advantages_mean: -0.0075796619057655334
---------------------------------------------------------------------------------------------------


104it [21:00, 13.10s/it]

objective/kl: -1.7679646015167236
ppo/returns/mean: 0.7482137680053711
ppo/policy/advantages_mean: -0.001214243471622467
---------------------------------------------------------------------------------------------------


105it [21:10, 12.01s/it]

objective/kl: -1.6693792343139648
ppo/returns/mean: 0.8235321044921875
ppo/policy/advantages_mean: 0.013081561774015427
---------------------------------------------------------------------------------------------------


106it [21:20, 11.33s/it]

objective/kl: -1.26454496383667
ppo/returns/mean: 0.7606746554374695
ppo/policy/advantages_mean: -0.026949767023324966
---------------------------------------------------------------------------------------------------


107it [21:31, 11.27s/it]

objective/kl: -1.213435411453247
ppo/returns/mean: 0.6572473049163818
ppo/policy/advantages_mean: -0.0036728763952851295
---------------------------------------------------------------------------------------------------


108it [21:40, 10.68s/it]

objective/kl: -1.1630185842514038
ppo/returns/mean: 0.7360529899597168
ppo/policy/advantages_mean: -0.007393501698970795
---------------------------------------------------------------------------------------------------


109it [21:50, 10.44s/it]

objective/kl: -0.578454852104187
ppo/returns/mean: 0.5495011806488037
ppo/policy/advantages_mean: 0.015930861234664917
---------------------------------------------------------------------------------------------------


110it [22:01, 10.73s/it]

objective/kl: -0.9189286828041077
ppo/returns/mean: 0.49608898162841797
ppo/policy/advantages_mean: -0.048852331936359406
---------------------------------------------------------------------------------------------------


111it [22:09,  9.84s/it]

objective/kl: -0.8939666748046875
ppo/returns/mean: 0.6029677987098694
ppo/policy/advantages_mean: -0.024261660873889923
---------------------------------------------------------------------------------------------------


112it [22:18,  9.64s/it]

objective/kl: -1.2203270196914673
ppo/returns/mean: 0.5586214065551758
ppo/policy/advantages_mean: -0.009863749146461487
---------------------------------------------------------------------------------------------------


113it [22:27,  9.21s/it]

objective/kl: -0.9965420365333557
ppo/returns/mean: 0.6504800319671631
ppo/policy/advantages_mean: 0.0073945000767707825
---------------------------------------------------------------------------------------------------


114it [22:35,  8.99s/it]

objective/kl: -0.7763785123825073
ppo/returns/mean: 0.541167140007019
ppo/policy/advantages_mean: 0.0065095387399196625
---------------------------------------------------------------------------------------------------


115it [22:44,  9.11s/it]

objective/kl: -1.5473175048828125
ppo/returns/mean: 0.6490627527236938
ppo/policy/advantages_mean: -0.022034883499145508
---------------------------------------------------------------------------------------------------


116it [22:52,  8.63s/it]

objective/kl: -1.1174572706222534
ppo/returns/mean: 0.6664177179336548
ppo/policy/advantages_mean: 0.03455619513988495
---------------------------------------------------------------------------------------------------


117it [23:00,  8.62s/it]

objective/kl: -1.8824700117111206
ppo/returns/mean: 0.7212419509887695
ppo/policy/advantages_mean: -0.11525994539260864
---------------------------------------------------------------------------------------------------


118it [23:10,  8.93s/it]

objective/kl: -1.3975090980529785
ppo/returns/mean: 0.6901370286941528
ppo/policy/advantages_mean: -0.004224568605422974
---------------------------------------------------------------------------------------------------


119it [23:19,  8.89s/it]

objective/kl: -1.0557494163513184
ppo/returns/mean: 0.647483229637146
ppo/policy/advantages_mean: -0.002411782741546631
---------------------------------------------------------------------------------------------------


120it [23:26,  8.34s/it]

objective/kl: -0.2988770306110382
ppo/returns/mean: 0.5878158807754517
ppo/policy/advantages_mean: 0.04059098660945892
---------------------------------------------------------------------------------------------------


121it [23:34,  8.32s/it]

objective/kl: -1.0824874639511108
ppo/returns/mean: 0.6364664435386658
ppo/policy/advantages_mean: -0.02306334301829338
---------------------------------------------------------------------------------------------------


122it [23:44,  8.84s/it]

objective/kl: -1.0896753072738647
ppo/returns/mean: 0.6262551546096802
ppo/policy/advantages_mean: -0.012344598770141602
---------------------------------------------------------------------------------------------------


123it [23:56,  9.61s/it]

objective/kl: -0.2999870181083679
ppo/returns/mean: 0.5121142864227295
ppo/policy/advantages_mean: 0.05953097343444824
---------------------------------------------------------------------------------------------------


124it [24:04,  9.32s/it]

objective/kl: -0.8404608368873596
ppo/returns/mean: 0.6582728028297424
ppo/policy/advantages_mean: 0.010774110443890095
---------------------------------------------------------------------------------------------------


125it [24:14,  9.42s/it]

objective/kl: -1.3106569051742554
ppo/returns/mean: 0.6643445491790771
ppo/policy/advantages_mean: -0.020604610443115234
---------------------------------------------------------------------------------------------------


126it [24:23,  9.41s/it]

objective/kl: -0.8337984085083008
ppo/returns/mean: 0.5712933540344238
ppo/policy/advantages_mean: -0.02881639078259468
---------------------------------------------------------------------------------------------------


127it [24:34,  9.61s/it]

objective/kl: -2.027538776397705
ppo/returns/mean: 0.7741102576255798
ppo/policy/advantages_mean: 0.022996827960014343
---------------------------------------------------------------------------------------------------


128it [24:44, 10.00s/it]

objective/kl: -1.3695950508117676
ppo/returns/mean: 0.6652207374572754
ppo/policy/advantages_mean: -0.0019252672791481018
---------------------------------------------------------------------------------------------------


129it [24:55, 10.18s/it]

objective/kl: -1.8408793210983276
ppo/returns/mean: 0.7170936465263367
ppo/policy/advantages_mean: 0.005891173612326384
---------------------------------------------------------------------------------------------------


130it [25:06, 10.43s/it]

objective/kl: -1.5603500604629517
ppo/returns/mean: 0.723102331161499
ppo/policy/advantages_mean: -0.006623748689889908
---------------------------------------------------------------------------------------------------


131it [25:18, 10.88s/it]

objective/kl: -2.8614678382873535
ppo/returns/mean: 0.7825156450271606
ppo/policy/advantages_mean: -0.03594866022467613
---------------------------------------------------------------------------------------------------


132it [25:34, 12.38s/it]

objective/kl: -2.9031624794006348
ppo/returns/mean: 0.8115837574005127
ppo/policy/advantages_mean: 0.09440987557172775
---------------------------------------------------------------------------------------------------


133it [25:47, 12.55s/it]

objective/kl: -2.6749937534332275
ppo/returns/mean: 0.8566739559173584
ppo/policy/advantages_mean: -0.009841524064540863
---------------------------------------------------------------------------------------------------


134it [26:02, 13.44s/it]

objective/kl: -3.97157621383667
ppo/returns/mean: 0.9251846671104431
ppo/policy/advantages_mean: -0.022515177726745605
---------------------------------------------------------------------------------------------------


135it [26:18, 14.11s/it]

objective/kl: -3.3480186462402344
ppo/returns/mean: 0.9257174730300903
ppo/policy/advantages_mean: 0.00077810138463974
---------------------------------------------------------------------------------------------------


136it [26:36, 15.15s/it]

objective/kl: -1.7745860815048218
ppo/returns/mean: 0.8192996978759766
ppo/policy/advantages_mean: 0.052374549210071564
---------------------------------------------------------------------------------------------------


137it [26:52, 15.65s/it]

objective/kl: -4.154860019683838
ppo/returns/mean: 1.0083552598953247
ppo/policy/advantages_mean: 0.024656232446432114
---------------------------------------------------------------------------------------------------


138it [27:16, 18.13s/it]

objective/kl: -3.791783571243286
ppo/returns/mean: 0.9378597140312195
ppo/policy/advantages_mean: -0.022278251126408577
---------------------------------------------------------------------------------------------------


139it [27:37, 18.78s/it]

objective/kl: -4.373902797698975
ppo/returns/mean: 0.9984107613563538
ppo/policy/advantages_mean: -0.022381477057933807
---------------------------------------------------------------------------------------------------


140it [28:03, 20.95s/it]

objective/kl: -1.9157238006591797
ppo/returns/mean: 0.829619824886322
ppo/policy/advantages_mean: 0.0463375449180603
---------------------------------------------------------------------------------------------------


141it [28:30, 23.00s/it]

objective/kl: -4.166494369506836
ppo/returns/mean: 1.0365474224090576
ppo/policy/advantages_mean: 0.004856802523136139
---------------------------------------------------------------------------------------------------


142it [28:59, 24.81s/it]

objective/kl: -3.5510354042053223
ppo/returns/mean: 0.9177241921424866
ppo/policy/advantages_mean: 0.001719452440738678
---------------------------------------------------------------------------------------------------


143it [29:33, 27.33s/it]

objective/kl: -1.6949923038482666
ppo/returns/mean: 0.7837955951690674
ppo/policy/advantages_mean: 0.04451793432235718
---------------------------------------------------------------------------------------------------


144it [30:01, 27.75s/it]

objective/kl: -2.8508358001708984
ppo/returns/mean: 0.9114658832550049
ppo/policy/advantages_mean: 0.05965403467416763
---------------------------------------------------------------------------------------------------


145it [30:26, 26.85s/it]

objective/kl: -2.847445011138916
ppo/returns/mean: 0.8729373216629028
ppo/policy/advantages_mean: -0.006678164005279541
---------------------------------------------------------------------------------------------------


146it [30:51, 26.36s/it]

objective/kl: -2.2353076934814453
ppo/returns/mean: 0.7582964301109314
ppo/policy/advantages_mean: -0.08033685386180878
---------------------------------------------------------------------------------------------------


147it [31:09, 23.62s/it]

objective/kl: -2.9334731101989746
ppo/returns/mean: 0.8921411633491516
ppo/policy/advantages_mean: 0.015132170170545578
---------------------------------------------------------------------------------------------------


148it [31:24, 21.15s/it]

objective/kl: -2.02217960357666
ppo/returns/mean: 0.6693365573883057
ppo/policy/advantages_mean: 0.02897193282842636
---------------------------------------------------------------------------------------------------


149it [31:37, 18.75s/it]

objective/kl: -2.6727466583251953
ppo/returns/mean: 0.9010158777236938
ppo/policy/advantages_mean: 0.05709328502416611
---------------------------------------------------------------------------------------------------


150it [31:51, 17.43s/it]

objective/kl: -3.2599053382873535
ppo/returns/mean: 0.8589754104614258
ppo/policy/advantages_mean: 0.004016231745481491
---------------------------------------------------------------------------------------------------


151it [32:03, 15.66s/it]

objective/kl: -2.4042277336120605
ppo/returns/mean: 0.8061999082565308
ppo/policy/advantages_mean: 0.0029147248715162277
---------------------------------------------------------------------------------------------------


152it [32:14, 14.17s/it]

objective/kl: -2.085346221923828
ppo/returns/mean: 0.8265884518623352
ppo/policy/advantages_mean: -0.021434728056192398
---------------------------------------------------------------------------------------------------


153it [32:24, 12.93s/it]

objective/kl: -1.5276203155517578
ppo/returns/mean: 0.7872822284698486
ppo/policy/advantages_mean: -0.04243305325508118
---------------------------------------------------------------------------------------------------


154it [32:37, 13.14s/it]

objective/kl: -3.37161922454834
ppo/returns/mean: 0.8479808568954468
ppo/policy/advantages_mean: 0.0203323382884264
---------------------------------------------------------------------------------------------------


155it [32:50, 13.08s/it]

objective/kl: -1.6663020849227905
ppo/returns/mean: 0.7707113027572632
ppo/policy/advantages_mean: 0.015860378742218018
---------------------------------------------------------------------------------------------------


156it [33:04, 13.26s/it]

objective/kl: -1.921569585800171
ppo/returns/mean: 0.7316107749938965
ppo/policy/advantages_mean: -0.01202500332146883
---------------------------------------------------------------------------------------------------


157it [33:17, 13.07s/it]

objective/kl: -2.139841079711914
ppo/returns/mean: 0.7936105728149414
ppo/policy/advantages_mean: -0.025357402861118317
---------------------------------------------------------------------------------------------------


158it [33:31, 13.39s/it]

objective/kl: -1.8794033527374268
ppo/returns/mean: 0.7360802888870239
ppo/policy/advantages_mean: -0.027264557778835297
---------------------------------------------------------------------------------------------------


159it [33:45, 13.56s/it]

objective/kl: -1.1911708116531372
ppo/returns/mean: 0.6859667301177979
ppo/policy/advantages_mean: 0.03700871765613556
---------------------------------------------------------------------------------------------------


160it [33:57, 13.13s/it]

objective/kl: -2.603936195373535
ppo/returns/mean: 0.7850006818771362
ppo/policy/advantages_mean: 0.0008783340454101562
---------------------------------------------------------------------------------------------------


161it [34:06, 12.00s/it]

objective/kl: -1.922682285308838
ppo/returns/mean: 0.742517352104187
ppo/policy/advantages_mean: -0.017559930682182312
---------------------------------------------------------------------------------------------------


162it [34:19, 12.34s/it]

objective/kl: -3.817892551422119
ppo/returns/mean: 0.9434998035430908
ppo/policy/advantages_mean: -0.004528246819972992
---------------------------------------------------------------------------------------------------


163it [34:30, 11.91s/it]

objective/kl: -3.0381383895874023
ppo/returns/mean: 0.916555643081665
ppo/policy/advantages_mean: 0.009890548884868622
---------------------------------------------------------------------------------------------------


164it [34:42, 12.01s/it]

objective/kl: -2.6331357955932617
ppo/returns/mean: 0.8055295348167419
ppo/policy/advantages_mean: -0.02194134332239628
---------------------------------------------------------------------------------------------------


165it [34:55, 12.17s/it]

objective/kl: -3.2560462951660156
ppo/returns/mean: 0.9538691639900208
ppo/policy/advantages_mean: -0.04268021136522293
---------------------------------------------------------------------------------------------------


166it [35:09, 12.73s/it]

objective/kl: -4.732697486877441
ppo/returns/mean: 1.0218359231948853
ppo/policy/advantages_mean: 0.003912054002285004
---------------------------------------------------------------------------------------------------


167it [35:28, 14.54s/it]

objective/kl: -6.3043131828308105
ppo/returns/mean: 1.0968486070632935
ppo/policy/advantages_mean: -0.04059867560863495
---------------------------------------------------------------------------------------------------


168it [35:49, 16.48s/it]

objective/kl: -3.5630176067352295
ppo/returns/mean: 1.0375189781188965
ppo/policy/advantages_mean: 0.013732008635997772
---------------------------------------------------------------------------------------------------


169it [36:10, 17.87s/it]

objective/kl: -3.322152614593506
ppo/returns/mean: 0.9430988430976868
ppo/policy/advantages_mean: 0.02459847182035446
---------------------------------------------------------------------------------------------------


170it [36:36, 20.24s/it]

objective/kl: -3.9868133068084717
ppo/returns/mean: 0.9906196594238281
ppo/policy/advantages_mean: 0.06129683554172516
---------------------------------------------------------------------------------------------------


171it [37:06, 23.35s/it]

objective/kl: -3.754753589630127
ppo/returns/mean: 0.9214943051338196
ppo/policy/advantages_mean: -0.024149656295776367
---------------------------------------------------------------------------------------------------


172it [37:31, 23.87s/it]

objective/kl: -3.3182034492492676
ppo/returns/mean: 1.014438271522522
ppo/policy/advantages_mean: -0.010881975293159485
---------------------------------------------------------------------------------------------------


173it [37:58, 24.81s/it]

objective/kl: -3.7555766105651855
ppo/returns/mean: 1.0255122184753418
ppo/policy/advantages_mean: -0.002564552705734968
---------------------------------------------------------------------------------------------------


174it [38:21, 24.15s/it]

objective/kl: -4.121359825134277
ppo/returns/mean: 0.9966930150985718
ppo/policy/advantages_mean: 0.01027921587228775
---------------------------------------------------------------------------------------------------


175it [38:48, 24.96s/it]

objective/kl: -3.9403328895568848
ppo/returns/mean: 0.9302552938461304
ppo/policy/advantages_mean: 0.039244718849658966
---------------------------------------------------------------------------------------------------


176it [39:07, 23.34s/it]

objective/kl: -3.828000545501709
ppo/returns/mean: 1.0246362686157227
ppo/policy/advantages_mean: 0.11282062530517578
---------------------------------------------------------------------------------------------------


177it [39:27, 22.14s/it]

objective/kl: -4.24734354019165
ppo/returns/mean: 1.0049564838409424
ppo/policy/advantages_mean: -0.01100146397948265
---------------------------------------------------------------------------------------------------


178it [39:45, 20.87s/it]

objective/kl: -3.40034818649292
ppo/returns/mean: 0.9260237216949463
ppo/policy/advantages_mean: 0.027424823492765427
---------------------------------------------------------------------------------------------------


179it [40:02, 19.69s/it]

objective/kl: -4.076220512390137
ppo/returns/mean: 1.0534144639968872
ppo/policy/advantages_mean: -0.014269106090068817
---------------------------------------------------------------------------------------------------


180it [40:19, 18.94s/it]

objective/kl: -3.254011392593384
ppo/returns/mean: 0.9767940044403076
ppo/policy/advantages_mean: -0.007180608808994293
---------------------------------------------------------------------------------------------------


181it [40:36, 18.28s/it]

objective/kl: -2.670846462249756
ppo/returns/mean: 0.9066332578659058
ppo/policy/advantages_mean: -0.008380088955163956
---------------------------------------------------------------------------------------------------


182it [40:49, 16.80s/it]

objective/kl: -2.013742446899414
ppo/returns/mean: 0.9283477067947388
ppo/policy/advantages_mean: -0.01692429557442665
---------------------------------------------------------------------------------------------------


183it [41:04, 16.24s/it]

objective/kl: -2.1725096702575684
ppo/returns/mean: 0.8668516874313354
ppo/policy/advantages_mean: -0.012864496558904648
---------------------------------------------------------------------------------------------------


184it [41:18, 15.63s/it]

objective/kl: -0.9263627529144287
ppo/returns/mean: 0.729840874671936
ppo/policy/advantages_mean: 0.02546612173318863
---------------------------------------------------------------------------------------------------


185it [41:30, 14.52s/it]

objective/kl: -2.1132636070251465
ppo/returns/mean: 0.8394311666488647
ppo/policy/advantages_mean: 0.03821747750043869
---------------------------------------------------------------------------------------------------


186it [41:43, 13.99s/it]

objective/kl: -1.4860596656799316
ppo/returns/mean: 0.7663471698760986
ppo/policy/advantages_mean: 0.008141061291098595
---------------------------------------------------------------------------------------------------


187it [41:55, 13.37s/it]

objective/kl: -2.1655139923095703
ppo/returns/mean: 0.7387441992759705
ppo/policy/advantages_mean: -0.018613610416650772
---------------------------------------------------------------------------------------------------


188it [42:07, 13.12s/it]

objective/kl: -2.054025888442993
ppo/returns/mean: 0.6917932033538818
ppo/policy/advantages_mean: -0.03503081947565079
---------------------------------------------------------------------------------------------------


189it [42:19, 12.79s/it]

objective/kl: -1.455916166305542
ppo/returns/mean: 0.7679411172866821
ppo/policy/advantages_mean: 0.03036276064813137
---------------------------------------------------------------------------------------------------


190it [42:31, 12.44s/it]

objective/kl: -3.116950511932373
ppo/returns/mean: 0.8088729381561279
ppo/policy/advantages_mean: 0.016476809978485107
---------------------------------------------------------------------------------------------------


191it [42:42, 11.93s/it]

objective/kl: -2.3148789405822754
ppo/returns/mean: 0.76561039686203
ppo/policy/advantages_mean: -0.00967395305633545
---------------------------------------------------------------------------------------------------


192it [42:51, 11.06s/it]

objective/kl: -2.193833589553833
ppo/returns/mean: 0.7990274429321289
ppo/policy/advantages_mean: -0.0040928516536951065
---------------------------------------------------------------------------------------------------


193it [42:59, 10.40s/it]

objective/kl: -0.9078240990638733
ppo/returns/mean: 0.6299028396606445
ppo/policy/advantages_mean: 0.008713874965906143
---------------------------------------------------------------------------------------------------


194it [43:10, 10.47s/it]

objective/kl: -1.9085787534713745
ppo/returns/mean: 0.7028199434280396
ppo/policy/advantages_mean: -0.001538977026939392
---------------------------------------------------------------------------------------------------


195it [43:20, 10.23s/it]

objective/kl: -2.107822895050049
ppo/returns/mean: 0.7211138606071472
ppo/policy/advantages_mean: 0.004849746823310852
---------------------------------------------------------------------------------------------------


196it [43:28,  9.69s/it]

objective/kl: -1.7622644901275635
ppo/returns/mean: 0.7927483320236206
ppo/policy/advantages_mean: -0.014579810202121735
---------------------------------------------------------------------------------------------------


197it [43:38,  9.85s/it]

objective/kl: -2.702582597732544
ppo/returns/mean: 0.769651472568512
ppo/policy/advantages_mean: -0.006382569670677185
---------------------------------------------------------------------------------------------------


198it [43:50, 10.47s/it]

objective/kl: -3.017589569091797
ppo/returns/mean: 0.8006900548934937
ppo/policy/advantages_mean: 0.0003537209704518318
---------------------------------------------------------------------------------------------------


199it [44:02, 10.77s/it]

objective/kl: -3.579777956008911
ppo/returns/mean: 0.8777177929878235
ppo/policy/advantages_mean: 0.002556869760155678
---------------------------------------------------------------------------------------------------


200it [44:13, 10.98s/it]

objective/kl: -2.7726898193359375
ppo/returns/mean: 0.9052552580833435
ppo/policy/advantages_mean: 0.0005616843700408936
---------------------------------------------------------------------------------------------------


201it [44:28, 12.00s/it]

objective/kl: -2.220829963684082
ppo/returns/mean: 0.8468736410140991
ppo/policy/advantages_mean: -0.0021770428866147995
---------------------------------------------------------------------------------------------------


202it [44:47, 14.14s/it]

objective/kl: -3.255743980407715
ppo/returns/mean: 0.9205949306488037
ppo/policy/advantages_mean: 0.03452986106276512
---------------------------------------------------------------------------------------------------


203it [45:01, 14.21s/it]

objective/kl: -1.6814837455749512
ppo/returns/mean: 0.8103760480880737
ppo/policy/advantages_mean: 0.06878107786178589
---------------------------------------------------------------------------------------------------


204it [45:16, 14.49s/it]

objective/kl: -1.9952001571655273
ppo/returns/mean: 0.7348066568374634
ppo/policy/advantages_mean: -0.020624497905373573
---------------------------------------------------------------------------------------------------


205it [45:31, 14.56s/it]

objective/kl: -2.831493377685547
ppo/returns/mean: 0.8497626781463623
ppo/policy/advantages_mean: -0.002914431504905224
---------------------------------------------------------------------------------------------------


206it [45:44, 14.06s/it]

objective/kl: -1.0671095848083496
ppo/returns/mean: 0.734153151512146
ppo/policy/advantages_mean: -0.004349306225776672
---------------------------------------------------------------------------------------------------


207it [45:57, 13.66s/it]

objective/kl: -2.4771575927734375
ppo/returns/mean: 0.7847490310668945
ppo/policy/advantages_mean: -0.015642531216144562
---------------------------------------------------------------------------------------------------


208it [46:08, 13.02s/it]

objective/kl: -1.4636473655700684
ppo/returns/mean: 0.7292807102203369
ppo/policy/advantages_mean: 0.04480237141251564
---------------------------------------------------------------------------------------------------


209it [46:23, 13.42s/it]

objective/kl: -2.3471126556396484
ppo/returns/mean: 0.8354038000106812
ppo/policy/advantages_mean: -0.020582033321261406
---------------------------------------------------------------------------------------------------


210it [46:33, 12.69s/it]

objective/kl: -2.1973583698272705
ppo/returns/mean: 0.8514808416366577
ppo/policy/advantages_mean: 0.006635535508394241
---------------------------------------------------------------------------------------------------


211it [46:48, 13.13s/it]

objective/kl: -2.3789758682250977
ppo/returns/mean: 0.8556070327758789
ppo/policy/advantages_mean: -0.0097384974360466
---------------------------------------------------------------------------------------------------


212it [46:59, 12.63s/it]

objective/kl: -1.8760063648223877
ppo/returns/mean: 0.787696361541748
ppo/policy/advantages_mean: 0.000857718288898468
---------------------------------------------------------------------------------------------------


213it [47:10, 12.19s/it]

objective/kl: -2.3207406997680664
ppo/returns/mean: 0.9128087759017944
ppo/policy/advantages_mean: 0.018390566110610962
---------------------------------------------------------------------------------------------------


214it [47:23, 12.24s/it]

objective/kl: -2.662790298461914
ppo/returns/mean: 0.8529129028320312
ppo/policy/advantages_mean: 0.006764117628335953
---------------------------------------------------------------------------------------------------


215it [47:39, 13.39s/it]

objective/kl: -3.438089370727539
ppo/returns/mean: 0.932243287563324
ppo/policy/advantages_mean: -0.016778014600276947
---------------------------------------------------------------------------------------------------


216it [47:54, 13.98s/it]

objective/kl: -3.070232391357422
ppo/returns/mean: 0.8939450979232788
ppo/policy/advantages_mean: -0.011899488046765327
---------------------------------------------------------------------------------------------------


217it [48:09, 14.27s/it]

objective/kl: -3.9583566188812256
ppo/returns/mean: 0.9561830759048462
ppo/policy/advantages_mean: 0.004662513732910156
---------------------------------------------------------------------------------------------------


218it [48:32, 17.02s/it]

objective/kl: -4.221391677856445
ppo/returns/mean: 0.8906314373016357
ppo/policy/advantages_mean: -0.05954872444272041
---------------------------------------------------------------------------------------------------


219it [48:59, 19.85s/it]

objective/kl: -2.1779282093048096
ppo/returns/mean: 0.8715704083442688
ppo/policy/advantages_mean: 0.001994617283344269
---------------------------------------------------------------------------------------------------


220it [49:28, 22.70s/it]

objective/kl: -3.1583633422851562
ppo/returns/mean: 0.8238459229469299
ppo/policy/advantages_mean: -0.03692569211125374
---------------------------------------------------------------------------------------------------


221it [49:55, 23.91s/it]

objective/kl: -3.501976490020752
ppo/returns/mean: 0.9226237535476685
ppo/policy/advantages_mean: 0.05402318015694618
---------------------------------------------------------------------------------------------------


222it [50:15, 22.77s/it]

objective/kl: -4.157441139221191
ppo/returns/mean: 0.9529633522033691
ppo/policy/advantages_mean: -0.00010171905159950256
---------------------------------------------------------------------------------------------------


223it [50:38, 22.83s/it]

objective/kl: -4.388575553894043
ppo/returns/mean: 1.073782205581665
ppo/policy/advantages_mean: -0.012100644409656525
---------------------------------------------------------------------------------------------------


224it [51:00, 22.48s/it]

objective/kl: -2.4625701904296875
ppo/returns/mean: 0.9062155485153198
ppo/policy/advantages_mean: -0.02165212482213974
---------------------------------------------------------------------------------------------------


225it [51:20, 21.68s/it]

objective/kl: -2.7964043617248535
ppo/returns/mean: 0.8841601610183716
ppo/policy/advantages_mean: -0.036769650876522064
---------------------------------------------------------------------------------------------------


226it [51:36, 20.16s/it]

objective/kl: -1.2651593685150146
ppo/returns/mean: 0.7881633043289185
ppo/policy/advantages_mean: 0.01968066394329071
---------------------------------------------------------------------------------------------------


227it [51:50, 18.26s/it]

objective/kl: -3.291015625
ppo/returns/mean: 0.9316723346710205
ppo/policy/advantages_mean: 0.0059049054980278015
---------------------------------------------------------------------------------------------------


228it [52:08, 18.08s/it]

objective/kl: -3.3098015785217285
ppo/returns/mean: 0.9505269527435303
ppo/policy/advantages_mean: 0.03799181431531906
---------------------------------------------------------------------------------------------------


229it [52:28, 18.70s/it]

objective/kl: -6.088420391082764
ppo/returns/mean: 1.086574912071228
ppo/policy/advantages_mean: 0.015820950269699097
---------------------------------------------------------------------------------------------------


230it [52:51, 20.17s/it]

objective/kl: -3.4846320152282715
ppo/returns/mean: 0.9923000335693359
ppo/policy/advantages_mean: -0.01971200481057167
---------------------------------------------------------------------------------------------------


231it [53:18, 22.25s/it]

objective/kl: -5.017145156860352
ppo/returns/mean: 1.0450040102005005
ppo/policy/advantages_mean: -0.00608430802822113
---------------------------------------------------------------------------------------------------


232it [54:02, 28.55s/it]

objective/kl: -3.647104263305664
ppo/returns/mean: 0.958116352558136
ppo/policy/advantages_mean: 0.03226175159215927
---------------------------------------------------------------------------------------------------


233it [54:33, 29.37s/it]

objective/kl: -4.9859771728515625
ppo/returns/mean: 1.11597740650177
ppo/policy/advantages_mean: 0.08815966546535492
---------------------------------------------------------------------------------------------------


234it [55:12, 32.27s/it]

objective/kl: -4.42000675201416
ppo/returns/mean: 1.0237975120544434
ppo/policy/advantages_mean: 0.016095472499728203
---------------------------------------------------------------------------------------------------


235it [56:04, 38.05s/it]

objective/kl: -2.239478588104248
ppo/returns/mean: 0.8557825088500977
ppo/policy/advantages_mean: -0.055526696145534515
---------------------------------------------------------------------------------------------------


236it [56:50, 40.59s/it]

objective/kl: 0.29508042335510254
ppo/returns/mean: 0.7245229482650757
ppo/policy/advantages_mean: 0.003619575873017311
---------------------------------------------------------------------------------------------------


237it [57:34, 41.48s/it]

objective/kl: -4.425642490386963
ppo/returns/mean: 0.9178788065910339
ppo/policy/advantages_mean: 0.03511352837085724
---------------------------------------------------------------------------------------------------


238it [58:06, 38.60s/it]

objective/kl: -2.839719772338867
ppo/returns/mean: 0.8470455408096313
ppo/policy/advantages_mean: 0.008377838879823685
---------------------------------------------------------------------------------------------------


239it [58:29, 34.03s/it]

objective/kl: -1.7583515644073486
ppo/returns/mean: 0.835813045501709
ppo/policy/advantages_mean: -0.005830220878124237
---------------------------------------------------------------------------------------------------


240it [58:47, 29.10s/it]

objective/kl: -2.0029423236846924
ppo/returns/mean: 0.8772169351577759
ppo/policy/advantages_mean: 0.013900190591812134
---------------------------------------------------------------------------------------------------


241it [59:02, 24.88s/it]

objective/kl: -2.755728244781494
ppo/returns/mean: 0.9313049912452698
ppo/policy/advantages_mean: -0.053566135466098785
---------------------------------------------------------------------------------------------------


242it [59:15, 21.31s/it]

objective/kl: -2.5712432861328125
ppo/returns/mean: 0.8811037540435791
ppo/policy/advantages_mean: -0.031915273517370224
---------------------------------------------------------------------------------------------------


243it [59:29, 19.13s/it]

objective/kl: -3.971700668334961
ppo/returns/mean: 0.9825763702392578
ppo/policy/advantages_mean: 0.00815783441066742
---------------------------------------------------------------------------------------------------


244it [59:44, 17.88s/it]

objective/kl: -3.814107894897461
ppo/returns/mean: 0.888834536075592
ppo/policy/advantages_mean: -0.031036697328090668
---------------------------------------------------------------------------------------------------


245it [1:00:00, 17.48s/it]

objective/kl: -5.078830718994141
ppo/returns/mean: 1.0412161350250244
ppo/policy/advantages_mean: 0.03138907253742218
---------------------------------------------------------------------------------------------------


246it [1:00:18, 17.72s/it]

objective/kl: -4.026834487915039
ppo/returns/mean: 1.0045976638793945
ppo/policy/advantages_mean: -0.00857546180486679
---------------------------------------------------------------------------------------------------


247it [1:00:47, 21.02s/it]

objective/kl: -6.219681739807129
ppo/returns/mean: 1.0033340454101562
ppo/policy/advantages_mean: -0.05068299174308777
---------------------------------------------------------------------------------------------------


248it [1:01:16, 23.36s/it]

objective/kl: -5.591446876525879
ppo/returns/mean: 1.0241512060165405
ppo/policy/advantages_mean: 0.008246414363384247
---------------------------------------------------------------------------------------------------


249it [1:01:46, 25.39s/it]

objective/kl: -4.979908466339111
ppo/returns/mean: 1.017818570137024
ppo/policy/advantages_mean: -0.0404021292924881
---------------------------------------------------------------------------------------------------


250it [1:02:34, 32.22s/it]

objective/kl: -3.6300060749053955
ppo/returns/mean: 0.8894491195678711
ppo/policy/advantages_mean: 0.010831902734935284
---------------------------------------------------------------------------------------------------


251it [1:03:06, 32.19s/it]

objective/kl: -5.361064910888672
ppo/returns/mean: 1.0704678297042847
ppo/policy/advantages_mean: 0.019998516887426376
---------------------------------------------------------------------------------------------------


252it [1:03:39, 32.37s/it]

objective/kl: -5.701337814331055
ppo/returns/mean: 1.054405689239502
ppo/policy/advantages_mean: -0.13009308278560638
---------------------------------------------------------------------------------------------------


253it [1:04:21, 35.15s/it]

objective/kl: -2.4311108589172363
ppo/returns/mean: 0.8868273496627808
ppo/policy/advantages_mean: 0.0017896238714456558
---------------------------------------------------------------------------------------------------


254it [1:04:59, 36.04s/it]

objective/kl: -4.38209867477417
ppo/returns/mean: 0.9977056980133057
ppo/policy/advantages_mean: 0.010566549375653267
---------------------------------------------------------------------------------------------------


255it [1:05:34, 35.84s/it]

objective/kl: -5.4017534255981445
ppo/returns/mean: 1.061455249786377
ppo/policy/advantages_mean: -0.0015239492058753967
---------------------------------------------------------------------------------------------------


256it [1:06:03, 33.82s/it]

objective/kl: -3.600895881652832
ppo/returns/mean: 0.939708948135376
ppo/policy/advantages_mean: 0.0041089244186878204
---------------------------------------------------------------------------------------------------


257it [1:06:37, 33.90s/it]

objective/kl: -0.856121301651001
ppo/returns/mean: 0.8945292234420776
ppo/policy/advantages_mean: -0.025845471769571304
---------------------------------------------------------------------------------------------------


258it [1:07:07, 32.63s/it]

objective/kl: -6.298117637634277
ppo/returns/mean: 1.0195780992507935
ppo/policy/advantages_mean: 0.0021218927577137947
---------------------------------------------------------------------------------------------------


259it [1:07:32, 30.34s/it]

objective/kl: 1.06049382686615
ppo/returns/mean: 0.8232981562614441
ppo/policy/advantages_mean: 0.02213633991777897
---------------------------------------------------------------------------------------------------


260it [1:07:52, 27.26s/it]

objective/kl: 3.567368984222412
ppo/returns/mean: 0.6098740100860596
ppo/policy/advantages_mean: 0.00929972529411316
---------------------------------------------------------------------------------------------------


261it [1:08:13, 25.40s/it]

objective/kl: 1.5724413394927979
ppo/returns/mean: 0.6570947170257568
ppo/policy/advantages_mean: 0.01110973209142685
---------------------------------------------------------------------------------------------------


262it [1:08:34, 23.95s/it]

objective/kl: 4.973761081695557
ppo/returns/mean: 0.5173565149307251
ppo/policy/advantages_mean: 0.04605090618133545
---------------------------------------------------------------------------------------------------


263it [1:08:52, 22.13s/it]

objective/kl: 8.367853164672852
ppo/returns/mean: 0.3400565981864929
ppo/policy/advantages_mean: -0.006115861237049103
---------------------------------------------------------------------------------------------------


264it [1:09:08, 20.43s/it]

objective/kl: 4.4773149490356445
ppo/returns/mean: 0.3711012899875641
ppo/policy/advantages_mean: -0.0003191530704498291
---------------------------------------------------------------------------------------------------


265it [1:09:23, 18.75s/it]

objective/kl: 13.482664108276367
ppo/returns/mean: -0.006455279886722565
ppo/policy/advantages_mean: 0.0029156990349292755
---------------------------------------------------------------------------------------------------


266it [1:09:38, 17.78s/it]

objective/kl: 10.225727081298828
ppo/returns/mean: -0.0034971851855516434
ppo/policy/advantages_mean: 0.0009907577186822891
---------------------------------------------------------------------------------------------------


267it [1:09:54, 16.97s/it]

objective/kl: 14.835107803344727
ppo/returns/mean: -0.13150347769260406
ppo/policy/advantages_mean: 0.0004120953381061554
---------------------------------------------------------------------------------------------------


268it [1:10:08, 16.12s/it]

objective/kl: 17.04723358154297
ppo/returns/mean: -0.37344613671302795
ppo/policy/advantages_mean: 0.02324342355132103
---------------------------------------------------------------------------------------------------


269it [1:10:21, 15.15s/it]

objective/kl: 16.079051971435547
ppo/returns/mean: -0.5713191628456116
ppo/policy/advantages_mean: -0.033713553100824356
---------------------------------------------------------------------------------------------------


270it [1:10:30, 13.51s/it]

objective/kl: 24.27459716796875
ppo/returns/mean: -0.8115023970603943
ppo/policy/advantages_mean: 0.0005918149836361408
---------------------------------------------------------------------------------------------------


271it [1:10:41, 12.67s/it]

objective/kl: 17.271244049072266
ppo/returns/mean: -0.630814790725708
ppo/policy/advantages_mean: 0.057637326419353485
---------------------------------------------------------------------------------------------------


272it [1:10:51, 11.80s/it]

objective/kl: 15.4468355178833
ppo/returns/mean: -0.5690077543258667
ppo/policy/advantages_mean: 0.0054509639739990234
---------------------------------------------------------------------------------------------------


273it [1:11:00, 11.07s/it]

objective/kl: 18.04489517211914
ppo/returns/mean: -0.724372148513794
ppo/policy/advantages_mean: -0.039725273847579956
---------------------------------------------------------------------------------------------------


274it [1:11:08, 10.15s/it]

objective/kl: 12.254863739013672
ppo/returns/mean: -0.6041005849838257
ppo/policy/advantages_mean: -0.01934605836868286
---------------------------------------------------------------------------------------------------


275it [1:11:13,  8.66s/it]

objective/kl: 23.35993003845215
ppo/returns/mean: -1.0375175476074219
ppo/policy/advantages_mean: -0.013559656217694283
---------------------------------------------------------------------------------------------------


276it [1:11:19,  7.87s/it]

objective/kl: 15.9154052734375
ppo/returns/mean: -0.6903700232505798
ppo/policy/advantages_mean: -0.0026019983924925327
---------------------------------------------------------------------------------------------------


277it [1:11:25,  7.29s/it]

objective/kl: 8.743059158325195
ppo/returns/mean: -0.365986704826355
ppo/policy/advantages_mean: 0.007188096642494202
---------------------------------------------------------------------------------------------------


278it [1:11:31,  6.96s/it]

objective/kl: 10.649040222167969
ppo/returns/mean: -0.5298925042152405
ppo/policy/advantages_mean: -0.03207366168498993
---------------------------------------------------------------------------------------------------


279it [1:11:38,  6.87s/it]

objective/kl: 4.899089813232422
ppo/returns/mean: -0.13702218234539032
ppo/policy/advantages_mean: -0.0008181408047676086
---------------------------------------------------------------------------------------------------


280it [1:11:45,  6.73s/it]

objective/kl: 8.031574249267578
ppo/returns/mean: -0.4693518579006195
ppo/policy/advantages_mean: -0.020217537879943848
---------------------------------------------------------------------------------------------------


281it [1:11:50,  6.44s/it]

objective/kl: 9.425251007080078
ppo/returns/mean: -0.48333656787872314
ppo/policy/advantages_mean: -0.0082772895693779
---------------------------------------------------------------------------------------------------


282it [1:11:56,  6.12s/it]

objective/kl: 7.732949256896973
ppo/returns/mean: -0.474212646484375
ppo/policy/advantages_mean: -0.034038640558719635
---------------------------------------------------------------------------------------------------


283it [1:12:02,  6.27s/it]

objective/kl: 6.012558937072754
ppo/returns/mean: -0.20859377086162567
ppo/policy/advantages_mean: 0.01839090883731842
---------------------------------------------------------------------------------------------------


284it [1:12:07,  5.91s/it]

objective/kl: 3.6439590454101562
ppo/returns/mean: -0.5495860576629639
ppo/policy/advantages_mean: 0.003955718129873276
---------------------------------------------------------------------------------------------------


285it [1:12:13,  5.93s/it]

objective/kl: 6.552542686462402
ppo/returns/mean: -0.6238673329353333
ppo/policy/advantages_mean: -0.03724979981780052
---------------------------------------------------------------------------------------------------


286it [1:12:19,  5.99s/it]

objective/kl: 3.894968271255493
ppo/returns/mean: -0.34798452258110046
ppo/policy/advantages_mean: 0.009484879672527313
---------------------------------------------------------------------------------------------------


287it [1:12:25,  5.81s/it]

objective/kl: 6.6008219718933105
ppo/returns/mean: -0.5084392428398132
ppo/policy/advantages_mean: 0.029010027647018433
---------------------------------------------------------------------------------------------------


288it [1:12:30,  5.69s/it]

objective/kl: 6.824007034301758
ppo/returns/mean: -0.6351877450942993
ppo/policy/advantages_mean: 0.013851657509803772
---------------------------------------------------------------------------------------------------


289it [1:12:36,  5.69s/it]

objective/kl: 4.245222091674805
ppo/returns/mean: -0.3065100312232971
ppo/policy/advantages_mean: -0.005112681537866592
---------------------------------------------------------------------------------------------------


290it [1:12:42,  5.81s/it]

objective/kl: 6.378503799438477
ppo/returns/mean: -0.3918061852455139
ppo/policy/advantages_mean: 0.019177593290805817
---------------------------------------------------------------------------------------------------


291it [1:12:47,  5.70s/it]

objective/kl: 4.878227233886719
ppo/returns/mean: -0.3316824436187744
ppo/policy/advantages_mean: -0.00217987596988678
---------------------------------------------------------------------------------------------------


292it [1:12:53,  5.61s/it]

objective/kl: 4.892594337463379
ppo/returns/mean: -0.4488665461540222
ppo/policy/advantages_mean: -0.047807492315769196
---------------------------------------------------------------------------------------------------


293it [1:12:58,  5.51s/it]

objective/kl: 5.10736083984375
ppo/returns/mean: -0.453632652759552
ppo/policy/advantages_mean: -0.0015824679285287857
---------------------------------------------------------------------------------------------------


294it [1:13:03,  5.46s/it]

objective/kl: 4.092427730560303
ppo/returns/mean: -0.20597019791603088
ppo/policy/advantages_mean: -0.008830774575471878
---------------------------------------------------------------------------------------------------


295it [1:13:09,  5.40s/it]

objective/kl: 3.8281188011169434
ppo/returns/mean: -0.36795055866241455
ppo/policy/advantages_mean: 0.01931341551244259
---------------------------------------------------------------------------------------------------


296it [1:13:13,  5.07s/it]

objective/kl: 3.5313949584960938
ppo/returns/mean: -0.39329826831817627
ppo/policy/advantages_mean: -0.02082270383834839
---------------------------------------------------------------------------------------------------


297it [1:13:19,  5.22s/it]

objective/kl: 3.9132161140441895
ppo/returns/mean: -0.2811076045036316
ppo/policy/advantages_mean: -0.03920867294073105
---------------------------------------------------------------------------------------------------


298it [1:13:23,  5.05s/it]

objective/kl: 3.427891969680786
ppo/returns/mean: -0.33265960216522217
ppo/policy/advantages_mean: 0.00874311476945877
---------------------------------------------------------------------------------------------------


299it [1:13:27,  4.75s/it]

objective/kl: 1.3556983470916748
ppo/returns/mean: -0.37349551916122437
ppo/policy/advantages_mean: 0.0033467933535575867
---------------------------------------------------------------------------------------------------


300it [1:13:32,  4.71s/it]

objective/kl: 4.370015621185303
ppo/returns/mean: -0.36254364252090454
ppo/policy/advantages_mean: -0.09800488501787186
---------------------------------------------------------------------------------------------------


301it [1:13:36,  4.51s/it]

objective/kl: 3.084042549133301
ppo/returns/mean: -0.37326502799987793
ppo/policy/advantages_mean: -0.018068157136440277
---------------------------------------------------------------------------------------------------


302it [1:13:40,  4.31s/it]

objective/kl: 3.2131876945495605
ppo/returns/mean: -0.3714755177497864
ppo/policy/advantages_mean: -0.06072989106178284
---------------------------------------------------------------------------------------------------


303it [1:13:43,  4.00s/it]

objective/kl: 0.10071699321269989
ppo/returns/mean: -0.18710362911224365
ppo/policy/advantages_mean: -0.003496464341878891
---------------------------------------------------------------------------------------------------


304it [1:13:46,  3.76s/it]

objective/kl: -1.1128126382827759
ppo/returns/mean: -0.04018920660018921
ppo/policy/advantages_mean: 5.960464477539063e-08
---------------------------------------------------------------------------------------------------


305it [1:13:49,  3.57s/it]

objective/kl: -6.148220062255859
ppo/returns/mean: 0.5890324711799622
ppo/policy/advantages_mean: 5.122274160385132e-08
---------------------------------------------------------------------------------------------------


306it [1:13:53,  3.44s/it]

objective/kl: -7.185739517211914
ppo/returns/mean: 0.8852189779281616
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


307it [1:13:56,  3.39s/it]

objective/kl: -3.451932430267334
ppo/returns/mean: 0.60296231508255
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


308it [1:13:59,  3.32s/it]

objective/kl: -1.8118302822113037
ppo/returns/mean: 0.46107637882232666
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


309it [1:14:02,  3.25s/it]

objective/kl: 1.5895441770553589
ppo/returns/mean: 0.051668986678123474
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


310it [1:14:05,  3.19s/it]

objective/kl: 2.4146623611450195
ppo/returns/mean: -0.12380925565958023
ppo/policy/advantages_mean: -7.450580596923828e-08
---------------------------------------------------------------------------------------------------


311it [1:14:08,  3.17s/it]

objective/kl: 2.964934825897217
ppo/returns/mean: -0.2648335099220276
ppo/policy/advantages_mean: 5.960464477539063e-08
---------------------------------------------------------------------------------------------------


312it [1:14:11,  3.15s/it]

objective/kl: 0.6122473478317261
ppo/returns/mean: -0.24356995522975922
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


313it [1:14:14,  3.13s/it]

objective/kl: 1.2298355102539062
ppo/returns/mean: -0.32600077986717224
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


314it [1:14:18,  3.13s/it]

objective/kl: -1.5491979122161865
ppo/returns/mean: -0.07935939729213715
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


315it [1:14:21,  3.12s/it]

objective/kl: -0.6554698944091797
ppo/returns/mean: -0.13572998344898224
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


316it [1:14:24,  3.12s/it]

objective/kl: -3.4172518253326416
ppo/returns/mean: 0.10472933948040009
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


317it [1:14:27,  3.11s/it]

objective/kl: -2.4533326625823975
ppo/returns/mean: 0.10866311192512512
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


318it [1:14:30,  3.10s/it]

objective/kl: -3.6451923847198486
ppo/returns/mean: 0.24365833401679993
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


319it [1:14:33,  3.11s/it]

objective/kl: -1.678112268447876
ppo/returns/mean: 0.13198181986808777
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


320it [1:14:36,  3.10s/it]

objective/kl: -1.2331838607788086
ppo/returns/mean: 0.15794971585273743
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


321it [1:14:39,  3.09s/it]

objective/kl: -0.11148139089345932
ppo/returns/mean: 0.0305362269282341
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


322it [1:14:42,  3.10s/it]

objective/kl: -0.6184737086296082
ppo/returns/mean: 0.055015936493873596
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


323it [1:14:45,  3.11s/it]

objective/kl: -0.2529221773147583
ppo/returns/mean: -0.02227587252855301
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


324it [1:14:49,  3.11s/it]

objective/kl: 0.9384320974349976
ppo/returns/mean: -0.12621763348579407
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


325it [1:14:52,  3.11s/it]

objective/kl: 0.3451312780380249
ppo/returns/mean: -0.07796089351177216
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


326it [1:14:55,  3.11s/it]

objective/kl: 0.1513974368572235
ppo/returns/mean: -0.07645641267299652
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


327it [1:14:58,  3.11s/it]

objective/kl: -0.23857742547988892
ppo/returns/mean: -0.057756222784519196
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


328it [1:15:01,  3.12s/it]

objective/kl: -0.3143351674079895
ppo/returns/mean: -0.0543966144323349
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


329it [1:15:04,  3.13s/it]

objective/kl: -0.10802282392978668
ppo/returns/mean: -0.06487817317247391
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


330it [1:15:07,  3.18s/it]

objective/kl: 0.7909431457519531
ppo/returns/mean: -0.245766282081604
ppo/policy/advantages_mean: 0.014317899942398071
---------------------------------------------------------------------------------------------------


331it [1:15:11,  3.16s/it]

objective/kl: 2.2691173553466797
ppo/returns/mean: -0.29293930530548096
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


332it [1:15:14,  3.14s/it]

objective/kl: -0.08708420395851135
ppo/returns/mean: -0.13243402540683746
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


333it [1:15:17,  3.20s/it]

objective/kl: 0.702153205871582
ppo/returns/mean: -0.1819908618927002
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


334it [1:15:20,  3.19s/it]

objective/kl: 1.3144234418869019
ppo/returns/mean: -0.20295119285583496
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


335it [1:15:23,  3.17s/it]

objective/kl: -0.32192397117614746
ppo/returns/mean: -0.11124595254659653
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


336it [1:15:26,  3.15s/it]

objective/kl: -0.34078091382980347
ppo/returns/mean: -0.07463249564170837
ppo/policy/advantages_mean: 5.587935447692871e-08
---------------------------------------------------------------------------------------------------


337it [1:15:30,  3.13s/it]

objective/kl: 0.6204284429550171
ppo/returns/mean: -0.14387570321559906
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


338it [1:15:33,  3.21s/it]

objective/kl: 1.045473337173462
ppo/returns/mean: -0.16480779647827148
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


339it [1:15:36,  3.17s/it]

objective/kl: 0.2860352098941803
ppo/returns/mean: -0.10572164505720139
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


340it [1:15:39,  3.17s/it]

objective/kl: 0.2991545498371124
ppo/returns/mean: -0.16057372093200684
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


341it [1:15:42,  3.15s/it]

objective/kl: -0.3349390923976898
ppo/returns/mean: -0.02189466916024685
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


342it [1:15:45,  3.14s/it]

objective/kl: -1.414764404296875
ppo/returns/mean: 0.04823591187596321
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


343it [1:15:48,  3.13s/it]

objective/kl: -1.1396090984344482
ppo/returns/mean: 0.04517263174057007
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


344it [1:15:52,  3.12s/it]

objective/kl: -1.2687863111495972
ppo/returns/mean: 0.08627134561538696
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


345it [1:15:55,  3.12s/it]

objective/kl: -1.227257490158081
ppo/returns/mean: 0.08725976943969727
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


346it [1:15:58,  3.12s/it]

objective/kl: -0.9141040444374084
ppo/returns/mean: 0.03792544826865196
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


347it [1:16:01,  3.11s/it]

objective/kl: -0.9003355503082275
ppo/returns/mean: 0.05532177537679672
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


348it [1:16:04,  3.13s/it]

objective/kl: 0.19594790041446686
ppo/returns/mean: -0.06367847323417664
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


349it [1:16:07,  3.11s/it]

objective/kl: -1.0463030338287354
ppo/returns/mean: 0.038906484842300415
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


350it [1:16:10,  3.13s/it]

objective/kl: -1.0280449390411377
ppo/returns/mean: 0.02614050731062889
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


351it [1:16:13,  3.12s/it]

objective/kl: -1.3553550243377686
ppo/returns/mean: 0.038755469024181366
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


352it [1:16:16,  3.10s/it]

objective/kl: -0.7286418676376343
ppo/returns/mean: 0.014387615025043488
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


353it [1:16:20,  3.13s/it]

objective/kl: -4.526836395263672
ppo/returns/mean: 0.3742659091949463
ppo/policy/advantages_mean: 4.0978193283081055e-08
---------------------------------------------------------------------------------------------------


354it [1:16:23,  3.15s/it]

objective/kl: -3.279470682144165
ppo/returns/mean: 0.3053644597530365
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


355it [1:16:26,  3.15s/it]

objective/kl: -2.695906400680542
ppo/returns/mean: 0.25700879096984863
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


356it [1:16:29,  3.13s/it]

objective/kl: -1.4832048416137695
ppo/returns/mean: 0.15579399466514587
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


357it [1:16:32,  3.13s/it]

objective/kl: -0.43749353289604187
ppo/returns/mean: 0.06429102271795273
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


358it [1:16:35,  3.12s/it]

objective/kl: -0.4652433395385742
ppo/returns/mean: 0.07968471199274063
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


359it [1:16:38,  3.11s/it]

objective/kl: -0.12449466437101364
ppo/returns/mean: 0.02173716574907303
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


360it [1:16:42,  3.12s/it]

objective/kl: -0.4521533250808716
ppo/returns/mean: -0.009768344461917877
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


361it [1:16:45,  3.12s/it]

objective/kl: -2.2921011447906494
ppo/returns/mean: 0.15839463472366333
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


362it [1:16:48,  3.12s/it]

objective/kl: -2.904904365539551
ppo/returns/mean: 0.16402709484100342
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


363it [1:16:51,  3.14s/it]

objective/kl: 0.13014072179794312
ppo/returns/mean: -0.17610254883766174
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


364it [1:16:54,  3.14s/it]

objective/kl: -2.1801607608795166
ppo/returns/mean: 0.16936169564723969
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


365it [1:16:57,  3.13s/it]

objective/kl: -1.018236756324768
ppo/returns/mean: 0.047979775816202164
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


366it [1:17:00,  3.17s/it]

objective/kl: -2.0128960609436035
ppo/returns/mean: 0.1433238983154297
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


367it [1:17:04,  3.16s/it]

objective/kl: -0.09304341673851013
ppo/returns/mean: -0.048315178602933884
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


368it [1:17:07,  3.15s/it]

objective/kl: 0.05829880014061928
ppo/returns/mean: -0.041538361459970474
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


369it [1:17:10,  3.15s/it]

objective/kl: -0.2893429398536682
ppo/returns/mean: -0.0376557782292366
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


370it [1:17:13,  3.13s/it]

objective/kl: -0.701380729675293
ppo/returns/mean: -0.0276222862303257
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


371it [1:17:16,  3.14s/it]

objective/kl: -0.504387617111206
ppo/returns/mean: -0.05198017507791519
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


372it [1:17:19,  3.14s/it]

objective/kl: -2.0993361473083496
ppo/returns/mean: 0.10904031246900558
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


373it [1:17:23,  3.20s/it]

objective/kl: -1.893010139465332
ppo/returns/mean: 0.11085954308509827
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


374it [1:17:26,  3.18s/it]

objective/kl: -2.5923027992248535
ppo/returns/mean: 0.20630493760108948
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


375it [1:17:29,  3.17s/it]

objective/kl: -1.9004433155059814
ppo/returns/mean: 0.15289531648159027
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


376it [1:17:32,  3.15s/it]

objective/kl: -1.5617988109588623
ppo/returns/mean: 0.13443343341350555
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


377it [1:17:35,  3.14s/it]

objective/kl: 0.43146926164627075
ppo/returns/mean: -0.0779060572385788
ppo/policy/advantages_mean: -3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


378it [1:17:38,  3.13s/it]

objective/kl: -0.10574758797883987
ppo/returns/mean: -0.06783416122198105
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


379it [1:17:41,  3.14s/it]

objective/kl: -0.08161473274230957
ppo/returns/mean: -0.06484775245189667
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


380it [1:17:45,  3.14s/it]

objective/kl: -0.2742885947227478
ppo/returns/mean: -0.07029732316732407
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


381it [1:17:48,  3.22s/it]

objective/kl: -0.8269599676132202
ppo/returns/mean: -0.01943894475698471
ppo/policy/advantages_mean: -6.51925802230835e-09
---------------------------------------------------------------------------------------------------


382it [1:17:51,  3.22s/it]

objective/kl: -1.4742686748504639
ppo/returns/mean: 0.04488372802734375
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


383it [1:17:54,  3.22s/it]

objective/kl: -1.7076581716537476
ppo/returns/mean: 0.08139027655124664
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


384it [1:17:58,  3.21s/it]

objective/kl: -1.4496532678604126
ppo/returns/mean: 0.08775630593299866
ppo/policy/advantages_mean: 2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


385it [1:18:01,  3.18s/it]

objective/kl: -1.1307008266448975
ppo/returns/mean: 0.08189492672681808
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


386it [1:18:04,  3.17s/it]

objective/kl: -0.2788724899291992
ppo/returns/mean: 0.005861252546310425
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


387it [1:18:07,  3.16s/it]

objective/kl: 0.11174295842647552
ppo/returns/mean: -0.04339907318353653
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


388it [1:18:10,  3.17s/it]

objective/kl: -0.3564717769622803
ppo/returns/mean: -0.014593400061130524
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


389it [1:18:13,  3.17s/it]

objective/kl: -0.1913149058818817
ppo/returns/mean: -0.0824194923043251
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


390it [1:18:17,  3.19s/it]

objective/kl: -0.733237624168396
ppo/returns/mean: 0.007582012563943863
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


391it [1:18:20,  3.18s/it]

objective/kl: -1.5734672546386719
ppo/returns/mean: 0.062450893223285675
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


392it [1:18:23,  3.17s/it]

objective/kl: -1.633955478668213
ppo/returns/mean: 0.09397260844707489
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


393it [1:18:26,  3.16s/it]

objective/kl: -2.1603875160217285
ppo/returns/mean: 0.1928296536207199
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


394it [1:18:29,  3.16s/it]

objective/kl: -1.7308390140533447
ppo/returns/mean: 0.1232989951968193
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


395it [1:18:32,  3.19s/it]

objective/kl: -1.139763593673706
ppo/returns/mean: 0.0717647522687912
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


396it [1:18:36,  3.21s/it]

objective/kl: -0.1431531310081482
ppo/returns/mean: -0.0272541344165802
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


397it [1:18:39,  3.22s/it]

objective/kl: -1.29366934299469
ppo/returns/mean: 0.07181303203105927
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


398it [1:18:42,  3.21s/it]

objective/kl: -3.3621251583099365
ppo/returns/mean: 0.2453787624835968
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


399it [1:18:45,  3.21s/it]

objective/kl: -2.3584325313568115
ppo/returns/mean: 0.17127889394760132
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


400it [1:18:49,  3.22s/it]

objective/kl: -1.9807140827178955
ppo/returns/mean: 0.15828603506088257
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


401it [1:18:52,  3.22s/it]

objective/kl: -1.5621670484542847
ppo/returns/mean: 0.09732501208782196
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


402it [1:18:55,  3.20s/it]

objective/kl: -1.8523263931274414
ppo/returns/mean: 0.11679627001285553
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


403it [1:18:58,  3.19s/it]

objective/kl: -1.2771819829940796
ppo/returns/mean: 0.08382132649421692
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


404it [1:19:01,  3.18s/it]

objective/kl: -1.020339012145996
ppo/returns/mean: 0.044663213193416595
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


405it [1:19:04,  3.16s/it]

objective/kl: -1.2719693183898926
ppo/returns/mean: 0.08581729978322983
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


406it [1:19:07,  3.15s/it]

objective/kl: -1.971991777420044
ppo/returns/mean: 0.11166676878929138
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


407it [1:19:11,  3.15s/it]

objective/kl: -3.0430588722229004
ppo/returns/mean: 0.18428675830364227
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


408it [1:19:14,  3.16s/it]

objective/kl: -2.012878894805908
ppo/returns/mean: 0.11962325870990753
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


409it [1:19:17,  3.21s/it]

objective/kl: -2.1717405319213867
ppo/returns/mean: 0.11859473586082458
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


410it [1:19:20,  3.19s/it]

objective/kl: -2.7063827514648438
ppo/returns/mean: 0.19812673330307007
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


411it [1:19:23,  3.17s/it]

objective/kl: -3.4130730628967285
ppo/returns/mean: 0.2881406843662262
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


412it [1:19:27,  3.16s/it]

objective/kl: -3.1913461685180664
ppo/returns/mean: 0.26729002594947815
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


413it [1:19:30,  3.17s/it]

objective/kl: -3.0482423305511475
ppo/returns/mean: 0.25231677293777466
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


414it [1:19:33,  3.15s/it]

objective/kl: -2.6393489837646484
ppo/returns/mean: 0.22398458421230316
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


415it [1:19:36,  3.22s/it]

objective/kl: -1.7661373615264893
ppo/returns/mean: 0.13993903994560242
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


416it [1:19:39,  3.19s/it]

objective/kl: -1.6785707473754883
ppo/returns/mean: 0.1402776539325714
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


417it [1:19:43,  3.21s/it]

objective/kl: -2.3306758403778076
ppo/returns/mean: 0.13627585768699646
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


418it [1:19:46,  3.19s/it]

objective/kl: -3.3476176261901855
ppo/returns/mean: 0.2399858981370926
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


419it [1:19:49,  3.17s/it]

objective/kl: -2.990243434906006
ppo/returns/mean: 0.21579232811927795
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


420it [1:19:52,  3.15s/it]

objective/kl: -3.4324915409088135
ppo/returns/mean: 0.2637913227081299
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


421it [1:19:55,  3.15s/it]

objective/kl: -3.1585628986358643
ppo/returns/mean: 0.24883775413036346
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


422it [1:19:58,  3.20s/it]

objective/kl: -2.8866219520568848
ppo/returns/mean: 0.2023557424545288
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


423it [1:20:02,  3.19s/it]

objective/kl: -3.2226290702819824
ppo/returns/mean: 0.22021038830280304
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


424it [1:20:05,  3.18s/it]

objective/kl: -3.3327629566192627
ppo/returns/mean: 0.22332580387592316
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


425it [1:20:08,  3.16s/it]

objective/kl: -4.183835983276367
ppo/returns/mean: 0.35139888525009155
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


426it [1:20:11,  3.15s/it]

objective/kl: -2.0929293632507324
ppo/returns/mean: 0.161192387342453
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


427it [1:20:14,  3.14s/it]

objective/kl: -3.1592135429382324
ppo/returns/mean: 0.22200867533683777
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


428it [1:20:17,  3.13s/it]

objective/kl: -2.4522581100463867
ppo/returns/mean: 0.17761138081550598
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


429it [1:20:20,  3.13s/it]

objective/kl: -2.4623756408691406
ppo/returns/mean: 0.14892923831939697
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


430it [1:20:24,  3.13s/it]

objective/kl: -3.1085362434387207
ppo/returns/mean: 0.22080069780349731
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


431it [1:20:27,  3.14s/it]

objective/kl: -5.069797515869141
ppo/returns/mean: 0.3921404480934143
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


432it [1:20:30,  3.15s/it]

objective/kl: -5.271636962890625
ppo/returns/mean: 0.4025183916091919
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


433it [1:20:33,  3.15s/it]

objective/kl: -4.6592698097229
ppo/returns/mean: 0.38457924127578735
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


434it [1:20:36,  3.15s/it]

objective/kl: -3.2535347938537598
ppo/returns/mean: 0.2657385468482971
ppo/policy/advantages_mean: -8.381903171539307e-09
---------------------------------------------------------------------------------------------------


435it [1:20:39,  3.13s/it]

objective/kl: -3.623572826385498
ppo/returns/mean: 0.3033963739871979
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


436it [1:20:42,  3.13s/it]

objective/kl: -3.0854783058166504
ppo/returns/mean: 0.2424117922782898
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


437it [1:20:46,  3.15s/it]

objective/kl: -4.083883285522461
ppo/returns/mean: 0.31510400772094727
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


438it [1:20:49,  3.16s/it]

objective/kl: -3.6969707012176514
ppo/returns/mean: 0.2251153439283371
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


439it [1:20:52,  3.19s/it]

objective/kl: -4.548168659210205
ppo/returns/mean: 0.2927664518356323
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


440it [1:20:55,  3.23s/it]

objective/kl: -3.550544500350952
ppo/returns/mean: 0.2351614534854889
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


441it [1:20:58,  3.21s/it]

objective/kl: -5.265865802764893
ppo/returns/mean: 0.4005160629749298
ppo/policy/advantages_mean: 1.7695128917694092e-08
---------------------------------------------------------------------------------------------------


442it [1:21:02,  3.18s/it]

objective/kl: -3.3122615814208984
ppo/returns/mean: 0.2777192294597626
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


443it [1:21:05,  3.16s/it]

objective/kl: -3.3132147789001465
ppo/returns/mean: 0.2364172637462616
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


444it [1:21:08,  3.16s/it]

objective/kl: -2.8968968391418457
ppo/returns/mean: 0.16224420070648193
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


445it [1:21:11,  3.15s/it]

objective/kl: -3.013821601867676
ppo/returns/mean: 0.1947411298751831
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


446it [1:21:14,  3.14s/it]

objective/kl: -4.1883463859558105
ppo/returns/mean: 0.24155893921852112
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


447it [1:21:17,  3.14s/it]

objective/kl: -5.830660820007324
ppo/returns/mean: 0.3526223301887512
ppo/policy/advantages_mean: -5.21540641784668e-08
---------------------------------------------------------------------------------------------------


448it [1:21:20,  3.15s/it]

objective/kl: -6.707540988922119
ppo/returns/mean: 0.4893675446510315
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


449it [1:21:24,  3.15s/it]

objective/kl: -7.555490016937256
ppo/returns/mean: 0.6018645763397217
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


450it [1:21:27,  3.14s/it]

objective/kl: -6.056094169616699
ppo/returns/mean: 0.5362347364425659
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


451it [1:21:30,  3.15s/it]

objective/kl: -5.184913635253906
ppo/returns/mean: 0.4253600239753723
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


452it [1:21:33,  3.13s/it]

objective/kl: -4.861459732055664
ppo/returns/mean: 0.432056188583374
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


453it [1:21:36,  3.12s/it]

objective/kl: -3.44185209274292
ppo/returns/mean: 0.26810774207115173
ppo/policy/advantages_mean: 3.91155481338501e-08
---------------------------------------------------------------------------------------------------


454it [1:21:39,  3.11s/it]

objective/kl: -3.039416790008545
ppo/returns/mean: 0.24001839756965637
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


455it [1:21:42,  3.11s/it]

objective/kl: -3.0520269870758057
ppo/returns/mean: 0.17439153790473938
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


456it [1:21:45,  3.11s/it]

objective/kl: -4.06402063369751
ppo/returns/mean: 0.22995899617671967
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


457it [1:21:48,  3.11s/it]

objective/kl: -3.6228342056274414
ppo/returns/mean: 0.1652444303035736
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


458it [1:21:52,  3.12s/it]

objective/kl: -4.372768878936768
ppo/returns/mean: 0.24490930140018463
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


459it [1:21:55,  3.17s/it]

objective/kl: -5.062775611877441
ppo/returns/mean: 0.34255528450012207
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


460it [1:21:59,  3.49s/it]

objective/kl: -5.060915470123291
ppo/returns/mean: 0.34127962589263916
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


461it [1:22:02,  3.38s/it]

objective/kl: -4.826742172241211
ppo/returns/mean: 0.3410894274711609
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


462it [1:22:05,  3.32s/it]

objective/kl: -4.031057357788086
ppo/returns/mean: 0.3053912818431854
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


463it [1:22:09,  3.26s/it]

objective/kl: -4.834061145782471
ppo/returns/mean: 0.3636101186275482
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


464it [1:22:12,  3.21s/it]

objective/kl: -3.9603679180145264
ppo/returns/mean: 0.2747907042503357
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


465it [1:22:15,  3.17s/it]

objective/kl: -2.9093475341796875
ppo/returns/mean: 0.2083829790353775
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


466it [1:22:18,  3.15s/it]

objective/kl: -3.6552963256835938
ppo/returns/mean: 0.22454434633255005
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


467it [1:22:21,  3.14s/it]

objective/kl: -2.549220323562622
ppo/returns/mean: 0.1423949897289276
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


468it [1:22:24,  3.12s/it]

objective/kl: -3.625110626220703
ppo/returns/mean: 0.18781684339046478
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


469it [1:22:27,  3.13s/it]

objective/kl: -5.060150146484375
ppo/returns/mean: 0.30326300859451294
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


470it [1:22:30,  3.13s/it]

objective/kl: -6.017295837402344
ppo/returns/mean: 0.40295636653900146
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


471it [1:22:33,  3.12s/it]

objective/kl: -5.016024589538574
ppo/returns/mean: 0.3474266827106476
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


472it [1:22:36,  3.11s/it]

objective/kl: -4.319118499755859
ppo/returns/mean: 0.33869993686676025
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


473it [1:22:39,  3.08s/it]

objective/kl: -4.540454864501953
ppo/returns/mean: 0.3407837450504303
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


474it [1:22:43,  3.11s/it]

objective/kl: -4.474409103393555
ppo/returns/mean: 0.3440716862678528
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


475it [1:22:46,  3.11s/it]

objective/kl: -4.361749649047852
ppo/returns/mean: 0.3174084424972534
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


476it [1:22:49,  3.12s/it]

objective/kl: -4.536822319030762
ppo/returns/mean: 0.32759031653404236
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


477it [1:22:52,  3.13s/it]

objective/kl: -4.312822341918945
ppo/returns/mean: 0.2773118317127228
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


478it [1:22:55,  3.12s/it]

objective/kl: -5.132778167724609
ppo/returns/mean: 0.35108116269111633
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


479it [1:22:58,  3.12s/it]

objective/kl: -4.188296318054199
ppo/returns/mean: 0.2816033959388733
ppo/policy/advantages_mean: -3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


480it [1:23:01,  3.15s/it]

objective/kl: -4.075341701507568
ppo/returns/mean: 0.26832666993141174
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


481it [1:23:05,  3.14s/it]

objective/kl: -4.559338092803955
ppo/returns/mean: 0.30436378717422485
ppo/policy/advantages_mean: 1.7695128917694092e-08
---------------------------------------------------------------------------------------------------


482it [1:23:08,  3.13s/it]

objective/kl: -5.662088394165039
ppo/returns/mean: 0.41753441095352173
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


483it [1:23:11,  3.14s/it]

objective/kl: -5.774720191955566
ppo/returns/mean: 0.4464840590953827
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


484it [1:23:14,  3.15s/it]

objective/kl: -5.028543472290039
ppo/returns/mean: 0.39490190148353577
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


485it [1:23:17,  3.15s/it]

objective/kl: -4.563263893127441
ppo/returns/mean: 0.3643772602081299
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


486it [1:23:20,  3.14s/it]

objective/kl: -4.131775856018066
ppo/returns/mean: 0.306519091129303
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


487it [1:23:23,  3.13s/it]

objective/kl: -3.8639612197875977
ppo/returns/mean: 0.2826537787914276
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


488it [1:23:27,  3.13s/it]

objective/kl: -4.0736083984375
ppo/returns/mean: 0.24759970605373383
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


489it [1:23:30,  3.13s/it]

objective/kl: -4.586005210876465
ppo/returns/mean: 0.28778380155563354
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


490it [1:23:33,  3.12s/it]

objective/kl: -6.185999870300293
ppo/returns/mean: 0.4133944511413574
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


491it [1:23:36,  3.18s/it]

objective/kl: -5.932850360870361
ppo/returns/mean: 0.3911111652851105
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


492it [1:23:39,  3.18s/it]

objective/kl: -5.092617034912109
ppo/returns/mean: 0.38293981552124023
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


493it [1:23:42,  3.17s/it]

objective/kl: -4.982578277587891
ppo/returns/mean: 0.3878694176673889
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


494it [1:23:46,  3.16s/it]

objective/kl: -4.002894401550293
ppo/returns/mean: 0.28796401619911194
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


495it [1:23:49,  3.15s/it]

objective/kl: -4.045957088470459
ppo/returns/mean: 0.30482417345046997
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


496it [1:23:52,  3.14s/it]

objective/kl: -4.204059600830078
ppo/returns/mean: 0.2794216573238373
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


497it [1:23:55,  3.21s/it]

objective/kl: -4.342402935028076
ppo/returns/mean: 0.29106998443603516
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


498it [1:23:58,  3.18s/it]

objective/kl: -4.472695827484131
ppo/returns/mean: 0.26718223094940186
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


499it [1:24:01,  3.17s/it]

objective/kl: -4.394006729125977
ppo/returns/mean: 0.2525366544723511
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


500it [1:24:05,  3.15s/it]

objective/kl: -4.018153190612793
ppo/returns/mean: 0.2205081582069397
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


501it [1:24:08,  3.16s/it]

objective/kl: -5.533674240112305
ppo/returns/mean: 0.3180551528930664
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


502it [1:24:11,  3.18s/it]

objective/kl: -6.002536773681641
ppo/returns/mean: 0.3831408619880676
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


503it [1:24:14,  3.16s/it]

objective/kl: -6.0081400871276855
ppo/returns/mean: 0.40890371799468994
ppo/policy/advantages_mean: -3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


504it [1:24:17,  3.18s/it]

objective/kl: -5.74553918838501
ppo/returns/mean: 0.44315385818481445
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


505it [1:24:21,  3.18s/it]

objective/kl: -5.6316986083984375
ppo/returns/mean: 0.4056689739227295
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


506it [1:24:24,  3.17s/it]

objective/kl: -4.453421115875244
ppo/returns/mean: 0.3090931177139282
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


507it [1:24:27,  3.16s/it]

objective/kl: -4.866958141326904
ppo/returns/mean: 0.3329256772994995
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


508it [1:24:30,  3.15s/it]

objective/kl: -4.042611122131348
ppo/returns/mean: 0.2869403660297394
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


509it [1:24:33,  3.27s/it]

objective/kl: -4.224934101104736
ppo/returns/mean: 0.3018818497657776
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


510it [1:24:37,  3.39s/it]

objective/kl: -4.75866174697876
ppo/returns/mean: 0.333479642868042
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


511it [1:24:40,  3.30s/it]

objective/kl: -3.908437728881836
ppo/returns/mean: 0.25403648614883423
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


512it [1:24:43,  3.28s/it]

objective/kl: -4.340764045715332
ppo/returns/mean: 0.29117655754089355
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


513it [1:24:47,  3.23s/it]

objective/kl: -4.150548934936523
ppo/returns/mean: 0.2577626705169678
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


514it [1:24:50,  3.20s/it]

objective/kl: -5.364742279052734
ppo/returns/mean: 0.3437398672103882
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


515it [1:24:53,  3.19s/it]

objective/kl: -4.122122764587402
ppo/returns/mean: 0.27208220958709717
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


516it [1:24:56,  3.17s/it]

objective/kl: -6.486794471740723
ppo/returns/mean: 0.41120660305023193
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


517it [1:24:59,  3.15s/it]

objective/kl: -6.314956188201904
ppo/returns/mean: 0.4320472776889801
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


518it [1:25:02,  3.15s/it]

objective/kl: -6.351033687591553
ppo/returns/mean: 0.47519439458847046
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


519it [1:25:05,  3.15s/it]

objective/kl: -5.781923294067383
ppo/returns/mean: 0.3808673024177551
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


520it [1:25:09,  3.32s/it]

objective/kl: -5.925903797149658
ppo/returns/mean: 0.4174363613128662
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


521it [1:25:12,  3.24s/it]

objective/kl: -5.254261493682861
ppo/returns/mean: 0.4240592122077942
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


522it [1:25:15,  3.20s/it]

objective/kl: -5.820368766784668
ppo/returns/mean: 0.41324010491371155
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


523it [1:25:18,  3.18s/it]

objective/kl: -5.141956806182861
ppo/returns/mean: 0.35786575078964233
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


524it [1:25:21,  3.16s/it]

objective/kl: -5.4494853019714355
ppo/returns/mean: 0.38778966665267944
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


525it [1:25:25,  3.13s/it]

objective/kl: -6.5415120124816895
ppo/returns/mean: 0.47281545400619507
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


526it [1:25:28,  3.12s/it]

objective/kl: -6.608168601989746
ppo/returns/mean: 0.45049652457237244
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


527it [1:25:31,  3.13s/it]

objective/kl: -5.855653762817383
ppo/returns/mean: 0.41570085287094116
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


528it [1:25:34,  3.20s/it]

objective/kl: -5.571483612060547
ppo/returns/mean: 0.4033973813056946
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


529it [1:25:37,  3.19s/it]

objective/kl: -4.469342231750488
ppo/returns/mean: 0.3130995035171509
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


530it [1:25:40,  3.16s/it]

objective/kl: -4.923853874206543
ppo/returns/mean: 0.3176054358482361
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


531it [1:25:44,  3.15s/it]

objective/kl: -5.389453887939453
ppo/returns/mean: 0.3029021620750427
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


532it [1:25:47,  3.13s/it]

objective/kl: -5.145085334777832
ppo/returns/mean: 0.30962568521499634
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


533it [1:25:50,  3.12s/it]

objective/kl: -6.429276466369629
ppo/returns/mean: 0.4260328412055969
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


534it [1:25:53,  3.11s/it]

objective/kl: -6.52562141418457
ppo/returns/mean: 0.4317125380039215
ppo/policy/advantages_mean: -4.6566128730773926e-08
---------------------------------------------------------------------------------------------------


535it [1:25:56,  3.10s/it]

objective/kl: -7.05470085144043
ppo/returns/mean: 0.5163894295692444
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


536it [1:25:59,  3.09s/it]

objective/kl: -7.095516204833984
ppo/returns/mean: 0.5216953754425049
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


537it [1:26:02,  3.09s/it]

objective/kl: -5.983162879943848
ppo/returns/mean: 0.46289315819740295
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


538it [1:26:05,  3.09s/it]

objective/kl: -5.2214226722717285
ppo/returns/mean: 0.40304866433143616
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


539it [1:26:08,  3.08s/it]

objective/kl: -4.966535568237305
ppo/returns/mean: 0.35455235838890076
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


540it [1:26:11,  3.09s/it]

objective/kl: -4.929275989532471
ppo/returns/mean: 0.36632388830184937
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


541it [1:26:14,  3.09s/it]

objective/kl: -4.303038597106934
ppo/returns/mean: 0.2831570506095886
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


542it [1:26:17,  3.09s/it]

objective/kl: -5.533311367034912
ppo/returns/mean: 0.3666140139102936
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


543it [1:26:21,  3.08s/it]

objective/kl: -6.603921890258789
ppo/returns/mean: 0.4476410150527954
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


544it [1:26:24,  3.08s/it]

objective/kl: -6.238704681396484
ppo/returns/mean: 0.4318070709705353
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


545it [1:26:27,  3.08s/it]

objective/kl: -5.286982536315918
ppo/returns/mean: 0.3402891159057617
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


546it [1:26:30,  3.08s/it]

objective/kl: -6.263093948364258
ppo/returns/mean: 0.43242818117141724
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


547it [1:26:33,  3.15s/it]

objective/kl: -6.377796649932861
ppo/returns/mean: 0.41775989532470703
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


548it [1:26:36,  3.14s/it]

objective/kl: -6.279239654541016
ppo/returns/mean: 0.4530625343322754
ppo/policy/advantages_mean: -1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


549it [1:26:39,  3.14s/it]

objective/kl: -6.535482406616211
ppo/returns/mean: 0.4494493007659912
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


550it [1:26:42,  3.12s/it]

objective/kl: -7.308785915374756
ppo/returns/mean: 0.5340659022331238
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


551it [1:26:46,  3.19s/it]

objective/kl: -6.118230819702148
ppo/returns/mean: 0.4839983582496643
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


552it [1:26:49,  3.29s/it]

objective/kl: -5.477323055267334
ppo/returns/mean: 0.4137536287307739
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


553it [1:26:52,  3.22s/it]

objective/kl: -4.728878021240234
ppo/returns/mean: 0.3384036421775818
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


554it [1:26:55,  3.19s/it]

objective/kl: -4.346935272216797
ppo/returns/mean: 0.3074306845664978
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


555it [1:26:59,  3.18s/it]

objective/kl: -4.2209062576293945
ppo/returns/mean: 0.25541621446609497
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


556it [1:27:02,  3.17s/it]

objective/kl: -5.479405403137207
ppo/returns/mean: 0.3684026598930359
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


557it [1:27:05,  3.15s/it]

objective/kl: -5.487673759460449
ppo/returns/mean: 0.34925249218940735
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


558it [1:27:08,  3.13s/it]

objective/kl: -4.771999359130859
ppo/returns/mean: 0.32172906398773193
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


559it [1:27:11,  3.14s/it]

objective/kl: -4.715092182159424
ppo/returns/mean: 0.2649993896484375
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


560it [1:27:14,  3.13s/it]

objective/kl: -5.4584221839904785
ppo/returns/mean: 0.30002346634864807
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


561it [1:27:17,  3.12s/it]

objective/kl: -6.3254852294921875
ppo/returns/mean: 0.350098192691803
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


562it [1:27:20,  3.11s/it]

objective/kl: -6.432639122009277
ppo/returns/mean: 0.39536988735198975
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


563it [1:27:24,  3.10s/it]

objective/kl: -6.476045608520508
ppo/returns/mean: 0.4213065803050995
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


564it [1:27:27,  3.10s/it]

objective/kl: -6.524989604949951
ppo/returns/mean: 0.44932255148887634
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


565it [1:27:30,  3.09s/it]

objective/kl: -5.545236587524414
ppo/returns/mean: 0.3972727358341217
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


566it [1:27:33,  3.10s/it]

objective/kl: -5.84283447265625
ppo/returns/mean: 0.41187307238578796
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


567it [1:27:36,  3.09s/it]

objective/kl: -6.212356090545654
ppo/returns/mean: 0.4344995319843292
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


568it [1:27:39,  3.10s/it]

objective/kl: -4.6743268966674805
ppo/returns/mean: 0.32910895347595215
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


569it [1:27:42,  3.12s/it]

objective/kl: -4.964807987213135
ppo/returns/mean: 0.34033286571502686
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


570it [1:27:45,  3.12s/it]

objective/kl: -5.778742790222168
ppo/returns/mean: 0.38826656341552734
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


571it [1:27:48,  3.14s/it]

objective/kl: -6.084160804748535
ppo/returns/mean: 0.38062381744384766
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


572it [1:27:52,  3.13s/it]

objective/kl: -6.397553443908691
ppo/returns/mean: 0.42342156171798706
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


573it [1:27:55,  3.11s/it]

objective/kl: -7.25747013092041
ppo/returns/mean: 0.4965074062347412
ppo/policy/advantages_mean: 3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


574it [1:27:58,  3.10s/it]

objective/kl: -7.256105422973633
ppo/returns/mean: 0.5025274753570557
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


575it [1:28:01,  3.09s/it]

objective/kl: -7.140190124511719
ppo/returns/mean: 0.5051291584968567
ppo/policy/advantages_mean: -3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


576it [1:28:04,  3.12s/it]

objective/kl: -6.095922470092773
ppo/returns/mean: 0.43256431818008423
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


577it [1:28:07,  3.12s/it]

objective/kl: -5.539812088012695
ppo/returns/mean: 0.4165741801261902
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


578it [1:28:10,  3.11s/it]

objective/kl: -4.886723518371582
ppo/returns/mean: 0.36967527866363525
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


579it [1:28:13,  3.12s/it]

objective/kl: -5.62533712387085
ppo/returns/mean: 0.37653541564941406
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


580it [1:28:16,  3.11s/it]

objective/kl: -5.5386457443237305
ppo/returns/mean: 0.3481476306915283
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


581it [1:28:20,  3.12s/it]

objective/kl: -6.727806091308594
ppo/returns/mean: 0.4496457874774933
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


582it [1:28:23,  3.13s/it]

objective/kl: -6.659030914306641
ppo/returns/mean: 0.4407588839530945
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


583it [1:28:26,  3.12s/it]

objective/kl: -6.280969619750977
ppo/returns/mean: 0.399324893951416
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


584it [1:28:29,  3.12s/it]

objective/kl: -9.049760818481445
ppo/returns/mean: 0.6025649309158325
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


585it [1:28:32,  3.11s/it]

objective/kl: -7.66706657409668
ppo/returns/mean: 0.5666581988334656
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


586it [1:28:35,  3.11s/it]

objective/kl: -6.960715293884277
ppo/returns/mean: 0.5197333097457886
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


587it [1:28:38,  3.12s/it]

objective/kl: -6.8664751052856445
ppo/returns/mean: 0.4887760877609253
ppo/policy/advantages_mean: 0.000838087871670723
---------------------------------------------------------------------------------------------------


588it [1:28:41,  3.11s/it]

objective/kl: -6.258784294128418
ppo/returns/mean: 0.4473504424095154
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


589it [1:28:44,  3.11s/it]

objective/kl: -5.385903358459473
ppo/returns/mean: 0.4160889983177185
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


590it [1:28:48,  3.10s/it]

objective/kl: -4.8368120193481445
ppo/returns/mean: 0.3340306282043457
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


591it [1:28:51,  3.09s/it]

objective/kl: -4.379110336303711
ppo/returns/mean: 0.2821250557899475
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


592it [1:28:54,  3.09s/it]

objective/kl: -7.890203475952148
ppo/returns/mean: 0.5253154635429382
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


593it [1:28:57,  3.15s/it]

objective/kl: -10.339655876159668
ppo/returns/mean: 0.7045007944107056
ppo/policy/advantages_mean: -5.21540641784668e-08
---------------------------------------------------------------------------------------------------


594it [1:29:00,  3.14s/it]

objective/kl: -9.173362731933594
ppo/returns/mean: 0.6638987064361572
ppo/policy/advantages_mean: -5.21540641784668e-08
---------------------------------------------------------------------------------------------------


595it [1:29:03,  3.13s/it]

objective/kl: -9.344644546508789
ppo/returns/mean: 0.7006768584251404
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


596it [1:29:06,  3.12s/it]

objective/kl: -7.317377090454102
ppo/returns/mean: 0.550061047077179
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


597it [1:29:09,  3.12s/it]

objective/kl: -7.042346000671387
ppo/returns/mean: 0.5645388960838318
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


598it [1:29:12,  3.11s/it]

objective/kl: -6.296439170837402
ppo/returns/mean: 0.5152090787887573
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


599it [1:29:16,  3.10s/it]

objective/kl: -4.9181694984436035
ppo/returns/mean: 0.38843870162963867
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


600it [1:29:19,  3.10s/it]

objective/kl: -6.041804313659668
ppo/returns/mean: 0.4789907932281494
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


601it [1:29:22,  3.10s/it]

objective/kl: -5.707655906677246
ppo/returns/mean: 0.4080861210823059
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


602it [1:29:25,  3.09s/it]

objective/kl: -5.88916540145874
ppo/returns/mean: 0.45972633361816406
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


603it [1:29:28,  3.09s/it]

objective/kl: -6.22573709487915
ppo/returns/mean: 0.42704707384109497
ppo/policy/advantages_mean: 4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


604it [1:29:31,  3.09s/it]

objective/kl: -6.976871967315674
ppo/returns/mean: 0.47425633668899536
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


605it [1:29:34,  3.10s/it]

objective/kl: -6.016547203063965
ppo/returns/mean: 0.39122211933135986
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


606it [1:29:37,  3.14s/it]

objective/kl: -6.023697853088379
ppo/returns/mean: 0.3606833815574646
ppo/policy/advantages_mean: 1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


607it [1:29:41,  3.14s/it]

objective/kl: -6.077646255493164
ppo/returns/mean: 0.36594322323799133
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


608it [1:29:44,  3.16s/it]

objective/kl: -6.763981342315674
ppo/returns/mean: 0.45026206970214844
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


609it [1:29:47,  3.13s/it]

objective/kl: -8.237152099609375
ppo/returns/mean: 0.5821857452392578
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


610it [1:29:50,  3.14s/it]

objective/kl: -9.208724975585938
ppo/returns/mean: 0.6305005550384521
ppo/policy/advantages_mean: 3.166496753692627e-08
---------------------------------------------------------------------------------------------------


611it [1:29:53,  3.26s/it]

objective/kl: -10.211759567260742
ppo/returns/mean: 0.7458128333091736
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


612it [1:29:57,  3.24s/it]

objective/kl: -10.624869346618652
ppo/returns/mean: 0.8149652481079102
ppo/policy/advantages_mean: -9.313225746154785e-10
---------------------------------------------------------------------------------------------------


613it [1:30:00,  3.22s/it]

objective/kl: -9.876091003417969
ppo/returns/mean: 0.7422208786010742
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


614it [1:30:03,  3.20s/it]

objective/kl: -9.433144569396973
ppo/returns/mean: 0.7413831949234009
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


615it [1:30:06,  3.19s/it]

objective/kl: -9.85383129119873
ppo/returns/mean: 0.7658711671829224
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


616it [1:30:09,  3.19s/it]

objective/kl: -9.169455528259277
ppo/returns/mean: 0.738776683807373
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


617it [1:30:12,  3.17s/it]

objective/kl: -9.69117259979248
ppo/returns/mean: 0.7733743190765381
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


618it [1:30:16,  3.16s/it]

objective/kl: -9.026653289794922
ppo/returns/mean: 0.7302243113517761
ppo/policy/advantages_mean: -1.210719347000122e-08
---------------------------------------------------------------------------------------------------


619it [1:30:19,  3.17s/it]

objective/kl: -9.691461563110352
ppo/returns/mean: 0.7397893667221069
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


620it [1:30:22,  3.19s/it]

objective/kl: -9.662561416625977
ppo/returns/mean: 0.7614782452583313
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


621it [1:30:25,  3.19s/it]

objective/kl: -8.26530647277832
ppo/returns/mean: 0.6108160614967346
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


622it [1:30:28,  3.19s/it]

objective/kl: -8.224100112915039
ppo/returns/mean: 0.6222125291824341
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


623it [1:30:32,  3.20s/it]

objective/kl: -7.628643035888672
ppo/returns/mean: 0.5590176582336426
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


624it [1:30:35,  3.30s/it]

objective/kl: -7.602214813232422
ppo/returns/mean: 0.5117034912109375
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


625it [1:30:39,  3.32s/it]

objective/kl: -7.892110824584961
ppo/returns/mean: 0.5614241361618042
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


626it [1:30:42,  3.30s/it]

objective/kl: -8.409612655639648
ppo/returns/mean: 0.5213625431060791
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


627it [1:30:45,  3.26s/it]

objective/kl: -9.713924407958984
ppo/returns/mean: 0.632662832736969
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


628it [1:30:48,  3.24s/it]

objective/kl: -10.560747146606445
ppo/returns/mean: 0.6496527791023254
ppo/policy/advantages_mean: -0.00210745632648468
---------------------------------------------------------------------------------------------------


629it [1:30:51,  3.19s/it]

objective/kl: -11.472036361694336
ppo/returns/mean: 0.7928537726402283
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


630it [1:30:54,  3.17s/it]

objective/kl: -12.637506484985352
ppo/returns/mean: 0.9144234657287598
ppo/policy/advantages_mean: -5.960464477539063e-08
---------------------------------------------------------------------------------------------------


631it [1:30:58,  3.17s/it]

objective/kl: -11.62725830078125
ppo/returns/mean: 0.8466783761978149
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


632it [1:31:01,  3.16s/it]

objective/kl: -13.026172637939453
ppo/returns/mean: 0.9964010119438171
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


633it [1:31:04,  3.14s/it]

objective/kl: -11.811386108398438
ppo/returns/mean: 0.9356012344360352
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


634it [1:31:07,  3.13s/it]

objective/kl: -11.92613410949707
ppo/returns/mean: 0.9537228345870972
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


635it [1:31:10,  3.12s/it]

objective/kl: -11.127115249633789
ppo/returns/mean: 0.9278507232666016
ppo/policy/advantages_mean: 1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


636it [1:31:13,  3.12s/it]

objective/kl: -9.489564895629883
ppo/returns/mean: 0.7955103516578674
ppo/policy/advantages_mean: -1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


637it [1:31:16,  3.13s/it]

objective/kl: -9.112531661987305
ppo/returns/mean: 0.7648434638977051
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


638it [1:31:19,  3.13s/it]

objective/kl: -8.347650527954102
ppo/returns/mean: 0.6852140426635742
ppo/policy/advantages_mean: 7.078051567077637e-08
---------------------------------------------------------------------------------------------------


639it [1:31:23,  3.17s/it]

objective/kl: -8.003185272216797
ppo/returns/mean: 0.6554475426673889
ppo/policy/advantages_mean: 8.381903171539307e-09
---------------------------------------------------------------------------------------------------


640it [1:31:26,  3.15s/it]

objective/kl: -7.6555914878845215
ppo/returns/mean: 0.5982397198677063
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


641it [1:31:29,  3.19s/it]

objective/kl: -7.921031475067139
ppo/returns/mean: 0.5552853345870972
ppo/policy/advantages_mean: -2.3283064365386963e-08
---------------------------------------------------------------------------------------------------


642it [1:31:32,  3.17s/it]

objective/kl: -8.47216510772705
ppo/returns/mean: 0.5703250169754028
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


643it [1:31:35,  3.16s/it]

objective/kl: -10.13749885559082
ppo/returns/mean: 0.6100670099258423
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


644it [1:31:38,  3.16s/it]

objective/kl: -10.804533958435059
ppo/returns/mean: 0.659804105758667
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


645it [1:31:42,  3.16s/it]

objective/kl: -11.835235595703125
ppo/returns/mean: 0.7366228699684143
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


646it [1:31:45,  3.16s/it]

objective/kl: -13.153796195983887
ppo/returns/mean: 0.8618399500846863
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


647it [1:31:48,  3.16s/it]

objective/kl: -14.944342613220215
ppo/returns/mean: 0.935595691204071
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


648it [1:31:51,  3.14s/it]

objective/kl: -14.417951583862305
ppo/returns/mean: 0.9001033902168274
ppo/policy/advantages_mean: 5.960464477539063e-08
---------------------------------------------------------------------------------------------------


649it [1:31:54,  3.15s/it]

objective/kl: -15.07454776763916
ppo/returns/mean: 0.7921416163444519
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


650it [1:31:57,  3.14s/it]

objective/kl: -13.181612968444824
ppo/returns/mean: 0.7909018397331238
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


651it [1:32:00,  3.14s/it]

objective/kl: -11.352182388305664
ppo/returns/mean: 0.8971366882324219
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


652it [1:32:04,  3.14s/it]

objective/kl: -9.42924690246582
ppo/returns/mean: 0.7309832572937012
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


653it [1:32:07,  3.13s/it]

objective/kl: -9.481706619262695
ppo/returns/mean: 0.7325144410133362
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


654it [1:32:10,  3.12s/it]

objective/kl: -8.314924240112305
ppo/returns/mean: 0.5973780155181885
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


655it [1:32:13,  3.12s/it]

objective/kl: -8.614673614501953
ppo/returns/mean: 0.514335036277771
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


656it [1:32:16,  3.13s/it]

objective/kl: -8.840890884399414
ppo/returns/mean: 0.5099492073059082
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


657it [1:32:19,  3.13s/it]

objective/kl: -10.334287643432617
ppo/returns/mean: 0.5678476095199585
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


658it [1:32:22,  3.14s/it]

objective/kl: -11.687742233276367
ppo/returns/mean: 0.6279136538505554
ppo/policy/advantages_mean: 2.7008354663848877e-08
---------------------------------------------------------------------------------------------------


659it [1:32:26,  3.14s/it]

objective/kl: -13.404623031616211
ppo/returns/mean: 0.758475661277771
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


660it [1:32:29,  3.26s/it]

objective/kl: -14.051263809204102
ppo/returns/mean: 0.8009390234947205
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


661it [1:32:32,  3.29s/it]

objective/kl: -13.543889999389648
ppo/returns/mean: 0.7546621561050415
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


662it [1:32:36,  3.24s/it]

objective/kl: -12.59223747253418
ppo/returns/mean: 0.708149790763855
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


663it [1:32:39,  3.21s/it]

objective/kl: -12.02975845336914
ppo/returns/mean: 0.7569921016693115
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


664it [1:32:42,  3.31s/it]

objective/kl: -11.596263885498047
ppo/returns/mean: 0.7511073350906372
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


665it [1:32:45,  3.27s/it]

objective/kl: -11.47739028930664
ppo/returns/mean: 0.6664604544639587
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


666it [1:32:49,  3.29s/it]

objective/kl: -11.909555435180664
ppo/returns/mean: 0.7718654870986938
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


667it [1:32:52,  3.23s/it]

objective/kl: -10.979470252990723
ppo/returns/mean: 0.6957910060882568
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


668it [1:32:55,  3.20s/it]

objective/kl: -9.293642044067383
ppo/returns/mean: 0.5451217293739319
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


669it [1:32:58,  3.18s/it]

objective/kl: -8.067792892456055
ppo/returns/mean: 0.4546006917953491
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


670it [1:33:01,  3.20s/it]

objective/kl: -6.603116989135742
ppo/returns/mean: 0.3471849262714386
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


671it [1:33:04,  3.18s/it]

objective/kl: -6.446837425231934
ppo/returns/mean: 0.2569321393966675
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


672it [1:33:08,  3.17s/it]

objective/kl: -6.571273326873779
ppo/returns/mean: 0.24124926328659058
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


673it [1:33:11,  3.17s/it]

objective/kl: -6.750615119934082
ppo/returns/mean: 0.18987151980400085
ppo/policy/advantages_mean: -0.0012232493609189987
---------------------------------------------------------------------------------------------------


674it [1:33:14,  3.20s/it]

objective/kl: -6.66982364654541
ppo/returns/mean: 0.1568899303674698
ppo/policy/advantages_mean: -0.0011569056659936905
---------------------------------------------------------------------------------------------------


675it [1:33:17,  3.20s/it]

objective/kl: -7.335437297821045
ppo/returns/mean: 0.17313161492347717
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


676it [1:33:20,  3.18s/it]

objective/kl: -7.655612945556641
ppo/returns/mean: 0.1512044221162796
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


677it [1:33:23,  3.16s/it]

objective/kl: -9.590292930603027
ppo/returns/mean: 0.38428327441215515
ppo/policy/advantages_mean: -5.122274160385132e-08
---------------------------------------------------------------------------------------------------


678it [1:33:27,  3.17s/it]

objective/kl: -10.249727249145508
ppo/returns/mean: 0.40770524740219116
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


679it [1:33:30,  3.20s/it]

objective/kl: -11.254075050354004
ppo/returns/mean: 0.5409258008003235
ppo/policy/advantages_mean: 6.332993507385254e-08
---------------------------------------------------------------------------------------------------


680it [1:33:33,  3.17s/it]

objective/kl: -11.454229354858398
ppo/returns/mean: 0.5557421445846558
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


681it [1:33:36,  3.16s/it]

objective/kl: -11.36029052734375
ppo/returns/mean: 0.6245843172073364
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


682it [1:33:39,  3.15s/it]

objective/kl: -11.398757934570312
ppo/returns/mean: 0.611947238445282
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


683it [1:33:42,  3.13s/it]

objective/kl: -10.78410530090332
ppo/returns/mean: 0.6101431250572205
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


684it [1:33:46,  3.13s/it]

objective/kl: -10.23132610321045
ppo/returns/mean: 0.5868877172470093
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


685it [1:33:49,  3.12s/it]

objective/kl: -10.23287296295166
ppo/returns/mean: 0.650384247303009
ppo/policy/advantages_mean: 2.421438694000244e-08
---------------------------------------------------------------------------------------------------


686it [1:33:52,  3.16s/it]

objective/kl: -9.426359176635742
ppo/returns/mean: 0.5662267208099365
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


687it [1:33:55,  3.15s/it]

objective/kl: -9.323884010314941
ppo/returns/mean: 0.5699992179870605
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


688it [1:33:58,  3.15s/it]

objective/kl: -8.635843276977539
ppo/returns/mean: 0.45844578742980957
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


689it [1:34:01,  3.15s/it]

objective/kl: -8.426698684692383
ppo/returns/mean: 0.4780735671520233
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


690it [1:34:04,  3.14s/it]

objective/kl: -9.118032455444336
ppo/returns/mean: 0.5218344926834106
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


691it [1:34:08,  3.13s/it]

objective/kl: -9.844419479370117
ppo/returns/mean: 0.5626770853996277
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


692it [1:34:11,  3.13s/it]

objective/kl: -10.213010787963867
ppo/returns/mean: 0.492484986782074
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


693it [1:34:14,  3.13s/it]

objective/kl: -10.934746742248535
ppo/returns/mean: 0.6308367252349854
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


694it [1:34:17,  3.14s/it]

objective/kl: -10.1248779296875
ppo/returns/mean: 0.5746623277664185
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


695it [1:34:20,  3.15s/it]

objective/kl: -10.426453590393066
ppo/returns/mean: 0.5921196341514587
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


696it [1:34:23,  3.14s/it]

objective/kl: -9.306288719177246
ppo/returns/mean: 0.46918854117393494
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


697it [1:34:26,  3.13s/it]

objective/kl: -7.886207103729248
ppo/returns/mean: 0.43651464581489563
ppo/policy/advantages_mean: 2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


698it [1:34:29,  3.13s/it]

objective/kl: -8.72315788269043
ppo/returns/mean: 0.43367308378219604
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


699it [1:34:33,  3.13s/it]

objective/kl: -9.843564987182617
ppo/returns/mean: 0.5091463327407837
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


700it [1:34:36,  3.13s/it]

objective/kl: -10.3446044921875
ppo/returns/mean: 0.5475908517837524
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


701it [1:34:39,  3.14s/it]

objective/kl: -12.3691987991333
ppo/returns/mean: 0.8684448003768921
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


702it [1:34:42,  3.12s/it]

objective/kl: -11.469987869262695
ppo/returns/mean: 0.7758172750473022
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


703it [1:34:45,  3.12s/it]

objective/kl: -10.883794784545898
ppo/returns/mean: 0.8448166847229004
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


704it [1:34:49,  3.31s/it]

objective/kl: -11.066888809204102
ppo/returns/mean: 0.8404779434204102
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


705it [1:34:52,  3.26s/it]

objective/kl: -9.308886528015137
ppo/returns/mean: 0.7456920146942139
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


706it [1:34:55,  3.22s/it]

objective/kl: -7.119010925292969
ppo/returns/mean: 0.5153948068618774
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


707it [1:34:58,  3.19s/it]

objective/kl: -10.205334663391113
ppo/returns/mean: 0.6656262874603271
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


708it [1:35:01,  3.18s/it]

objective/kl: -11.14216423034668
ppo/returns/mean: 0.7451962232589722
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


709it [1:35:04,  3.16s/it]

objective/kl: -10.144562721252441
ppo/returns/mean: 0.7467715740203857
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


710it [1:35:08,  3.16s/it]

objective/kl: -9.893874168395996
ppo/returns/mean: 0.6460839509963989
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


711it [1:35:11,  3.14s/it]

objective/kl: -8.515438079833984
ppo/returns/mean: 0.603257954120636
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


712it [1:35:14,  3.19s/it]

objective/kl: -8.18727970123291
ppo/returns/mean: 0.5424579381942749
ppo/policy/advantages_mean: 4.0978193283081055e-08
---------------------------------------------------------------------------------------------------


713it [1:35:17,  3.17s/it]

objective/kl: -8.591306686401367
ppo/returns/mean: 0.5796270370483398
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


714it [1:35:20,  3.16s/it]

objective/kl: -10.282238006591797
ppo/returns/mean: 0.6289917230606079
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


715it [1:35:23,  3.16s/it]

objective/kl: -10.894464492797852
ppo/returns/mean: 0.6830499768257141
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


716it [1:35:27,  3.16s/it]

objective/kl: -10.012197494506836
ppo/returns/mean: 0.6779969930648804
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


717it [1:35:30,  3.15s/it]

objective/kl: -10.062541007995605
ppo/returns/mean: 0.690803587436676
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


718it [1:35:33,  3.18s/it]

objective/kl: -8.869756698608398
ppo/returns/mean: 0.5986891984939575
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


719it [1:35:36,  3.18s/it]

objective/kl: -10.716543197631836
ppo/returns/mean: 0.7087782621383667
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


720it [1:35:39,  3.17s/it]

objective/kl: -11.445917129516602
ppo/returns/mean: 0.7908498644828796
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


721it [1:35:42,  3.16s/it]

objective/kl: -11.034954071044922
ppo/returns/mean: 0.840756893157959
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


722it [1:35:46,  3.29s/it]

objective/kl: -12.579039573669434
ppo/returns/mean: 0.9767147302627563
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


723it [1:35:49,  3.29s/it]

objective/kl: -12.468923568725586
ppo/returns/mean: 0.946032702922821
ppo/policy/advantages_mean: -3.166496753692627e-08
---------------------------------------------------------------------------------------------------


724it [1:35:53,  3.50s/it]

objective/kl: -10.311896324157715
ppo/returns/mean: 0.9027085304260254
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


725it [1:35:57,  3.41s/it]

objective/kl: -8.757725715637207
ppo/returns/mean: 0.7488671541213989
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


726it [1:36:00,  3.32s/it]

objective/kl: -8.820408821105957
ppo/returns/mean: 0.7168500423431396
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


727it [1:36:03,  3.26s/it]

objective/kl: -7.530430793762207
ppo/returns/mean: 0.5686161518096924
ppo/policy/advantages_mean: 7.82310962677002e-08
---------------------------------------------------------------------------------------------------


728it [1:36:06,  3.25s/it]

objective/kl: -7.363995552062988
ppo/returns/mean: 0.4980376958847046
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


729it [1:36:09,  3.20s/it]

objective/kl: -8.294766426086426
ppo/returns/mean: 0.614323079586029
ppo/policy/advantages_mean: 2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


730it [1:36:12,  3.18s/it]

objective/kl: -9.190938949584961
ppo/returns/mean: 0.5958951711654663
ppo/policy/advantages_mean: 2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


731it [1:36:15,  3.16s/it]

objective/kl: -10.209524154663086
ppo/returns/mean: 0.6403679847717285
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


732it [1:36:18,  3.14s/it]

objective/kl: -11.071016311645508
ppo/returns/mean: 0.6948143243789673
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


733it [1:36:22,  3.15s/it]

objective/kl: -10.190765380859375
ppo/returns/mean: 0.6580214500427246
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


734it [1:36:25,  3.13s/it]

objective/kl: -11.186958312988281
ppo/returns/mean: 0.7095233201980591
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


735it [1:36:28,  3.13s/it]

objective/kl: -9.8780517578125
ppo/returns/mean: 0.6599176526069641
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


736it [1:36:31,  3.12s/it]

objective/kl: -10.109199523925781
ppo/returns/mean: 0.7053177356719971
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


737it [1:36:34,  3.13s/it]

objective/kl: -10.27414321899414
ppo/returns/mean: 0.6434839367866516
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


738it [1:36:37,  3.13s/it]

objective/kl: -10.827573776245117
ppo/returns/mean: 0.7379421591758728
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


739it [1:36:40,  3.12s/it]

objective/kl: -9.753171920776367
ppo/returns/mean: 0.7236075401306152
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


740it [1:36:44,  3.21s/it]

objective/kl: -10.385676383972168
ppo/returns/mean: 0.6903483867645264
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


741it [1:36:47,  3.18s/it]

objective/kl: -10.130025863647461
ppo/returns/mean: 0.7433252930641174
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


742it [1:36:50,  3.16s/it]

objective/kl: -9.533353805541992
ppo/returns/mean: 0.6482828855514526
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


743it [1:36:53,  3.15s/it]

objective/kl: -10.054746627807617
ppo/returns/mean: 0.7247134447097778
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


744it [1:36:56,  3.16s/it]

objective/kl: -9.918213844299316
ppo/returns/mean: 0.7403938174247742
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


745it [1:36:59,  3.17s/it]

objective/kl: -10.467201232910156
ppo/returns/mean: 0.7705668210983276
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


746it [1:37:03,  3.16s/it]

objective/kl: -13.88994026184082
ppo/returns/mean: 1.0172345638275146
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


747it [1:37:06,  3.16s/it]

objective/kl: -12.080238342285156
ppo/returns/mean: 0.8542029857635498
ppo/policy/advantages_mean: 0.0011664535850286484
---------------------------------------------------------------------------------------------------


748it [1:37:09,  3.15s/it]

objective/kl: -11.527597427368164
ppo/returns/mean: 0.9614794254302979
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


749it [1:37:12,  3.16s/it]

objective/kl: -9.80270767211914
ppo/returns/mean: 0.7922347187995911
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


750it [1:37:15,  3.14s/it]

objective/kl: -10.761266708374023
ppo/returns/mean: 0.8282496929168701
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


751it [1:37:18,  3.13s/it]

objective/kl: -9.77812385559082
ppo/returns/mean: 0.7469273805618286
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


752it [1:37:21,  3.13s/it]

objective/kl: -10.049222946166992
ppo/returns/mean: 0.7325712442398071
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


753it [1:37:24,  3.12s/it]

objective/kl: -10.251014709472656
ppo/returns/mean: 0.7134132385253906
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


754it [1:37:28,  3.12s/it]

objective/kl: -10.991829872131348
ppo/returns/mean: 0.7963045835494995
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


755it [1:37:31,  3.12s/it]

objective/kl: -11.093598365783691
ppo/returns/mean: 0.8026451468467712
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


756it [1:37:34,  3.12s/it]

objective/kl: -10.265185356140137
ppo/returns/mean: 0.7624038457870483
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


757it [1:37:37,  3.12s/it]

objective/kl: -9.943140029907227
ppo/returns/mean: 0.7178139686584473
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


758it [1:37:40,  3.12s/it]

objective/kl: -9.252861022949219
ppo/returns/mean: 0.6455192565917969
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


759it [1:37:43,  3.11s/it]

objective/kl: -9.280070304870605
ppo/returns/mean: 0.726984441280365
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


760it [1:37:46,  3.11s/it]

objective/kl: -9.419028282165527
ppo/returns/mean: 0.6938899159431458
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


761it [1:37:49,  3.14s/it]

objective/kl: -11.33139419555664
ppo/returns/mean: 0.7748448848724365
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


762it [1:37:53,  3.15s/it]

objective/kl: -12.102668762207031
ppo/returns/mean: 0.8544122576713562
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


763it [1:37:56,  3.13s/it]

objective/kl: -11.390583038330078
ppo/returns/mean: 0.9163680076599121
ppo/policy/advantages_mean: -3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


764it [1:37:59,  3.14s/it]

objective/kl: -11.58487319946289
ppo/returns/mean: 0.9137813448905945
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


765it [1:38:02,  3.15s/it]

objective/kl: -11.241250991821289
ppo/returns/mean: 0.8911248445510864
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


766it [1:38:05,  3.15s/it]

objective/kl: -10.819612503051758
ppo/returns/mean: 0.8908443450927734
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


767it [1:38:08,  3.15s/it]

objective/kl: -10.884027481079102
ppo/returns/mean: 0.8488879203796387
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


768it [1:38:11,  3.14s/it]

objective/kl: -10.946434020996094
ppo/returns/mean: 0.8498272895812988
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


769it [1:38:15,  3.14s/it]

objective/kl: -10.52250862121582
ppo/returns/mean: 0.8362302184104919
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


770it [1:38:18,  3.26s/it]

objective/kl: -10.402039527893066
ppo/returns/mean: 0.7768881320953369
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


771it [1:38:22,  3.48s/it]

objective/kl: -10.76646900177002
ppo/returns/mean: 0.8405989408493042
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


772it [1:38:26,  3.56s/it]

objective/kl: -11.394110679626465
ppo/returns/mean: 0.8620203733444214
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


773it [1:38:29,  3.43s/it]

objective/kl: -11.672819137573242
ppo/returns/mean: 0.843409538269043
ppo/policy/advantages_mean: 1.210719347000122e-08
---------------------------------------------------------------------------------------------------


774it [1:38:32,  3.38s/it]

objective/kl: -11.219854354858398
ppo/returns/mean: 0.7988039255142212
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


775it [1:38:35,  3.32s/it]

objective/kl: -12.537223815917969
ppo/returns/mean: 0.9390972852706909
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


776it [1:38:39,  3.27s/it]

objective/kl: -12.15435791015625
ppo/returns/mean: 0.8953406810760498
ppo/policy/advantages_mean: -3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


777it [1:38:42,  3.22s/it]

objective/kl: -11.47680950164795
ppo/returns/mean: 0.8662360906600952
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


778it [1:38:45,  3.18s/it]

objective/kl: -11.424436569213867
ppo/returns/mean: 0.8698068857192993
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


779it [1:38:48,  3.18s/it]

objective/kl: -11.161133766174316
ppo/returns/mean: 0.8341969847679138
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


780it [1:38:51,  3.16s/it]

objective/kl: -11.454259872436523
ppo/returns/mean: 0.8855948448181152
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


781it [1:38:54,  3.15s/it]

objective/kl: -10.51575756072998
ppo/returns/mean: 0.7848284244537354
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


782it [1:38:57,  3.17s/it]

objective/kl: -11.243410110473633
ppo/returns/mean: 0.8586337566375732
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


783it [1:39:01,  3.19s/it]

objective/kl: -10.40048885345459
ppo/returns/mean: 0.8280566930770874
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


784it [1:39:04,  3.19s/it]

objective/kl: -10.93414306640625
ppo/returns/mean: 0.8427988290786743
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


785it [1:39:07,  3.17s/it]

objective/kl: -12.068307876586914
ppo/returns/mean: 0.7170225381851196
ppo/policy/advantages_mean: 6.51925802230835e-09
---------------------------------------------------------------------------------------------------


786it [1:39:10,  3.17s/it]

objective/kl: -11.542085647583008
ppo/returns/mean: 0.6933234930038452
ppo/policy/advantages_mean: -0.001032210886478424
---------------------------------------------------------------------------------------------------


787it [1:39:13,  3.18s/it]

objective/kl: -9.719780921936035
ppo/returns/mean: 0.7171237468719482
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


788it [1:39:16,  3.17s/it]

objective/kl: -9.693580627441406
ppo/returns/mean: 0.6131876707077026
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


789it [1:39:20,  3.15s/it]

objective/kl: -9.150545120239258
ppo/returns/mean: 0.5759334564208984
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


790it [1:39:23,  3.14s/it]

objective/kl: -9.099995613098145
ppo/returns/mean: 0.5126703977584839
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


791it [1:39:26,  3.14s/it]

objective/kl: -8.688596725463867
ppo/returns/mean: 0.43293654918670654
ppo/policy/advantages_mean: 0.0015755174681544304
---------------------------------------------------------------------------------------------------


792it [1:39:29,  3.15s/it]

objective/kl: -7.906148433685303
ppo/returns/mean: 0.4695665240287781
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


793it [1:39:32,  3.15s/it]

objective/kl: -8.661564826965332
ppo/returns/mean: 0.5318416953086853
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


794it [1:39:35,  3.14s/it]

objective/kl: -7.9794020652771
ppo/returns/mean: 0.43658196926116943
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


795it [1:39:38,  3.14s/it]

objective/kl: -9.492977142333984
ppo/returns/mean: 0.5517352223396301
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


796it [1:39:42,  3.14s/it]

objective/kl: -9.273588180541992
ppo/returns/mean: 0.513372004032135
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


797it [1:39:45,  3.13s/it]

objective/kl: -10.54405403137207
ppo/returns/mean: 0.5659440755844116
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


798it [1:39:48,  3.13s/it]

objective/kl: -12.314271926879883
ppo/returns/mean: 0.7197054624557495
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


799it [1:39:51,  3.14s/it]

objective/kl: -12.605853080749512
ppo/returns/mean: 0.8117792010307312
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


800it [1:39:55,  3.37s/it]

objective/kl: -12.101421356201172
ppo/returns/mean: 0.7800440788269043
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


801it [1:39:58,  3.32s/it]

objective/kl: -12.750946044921875
ppo/returns/mean: 0.8516873717308044
ppo/policy/advantages_mean: 2.421438694000244e-08
---------------------------------------------------------------------------------------------------


802it [1:40:01,  3.26s/it]

objective/kl: -12.442152976989746
ppo/returns/mean: 0.8463107347488403
ppo/policy/advantages_mean: 3.259629011154175e-08
---------------------------------------------------------------------------------------------------


803it [1:40:04,  3.21s/it]

objective/kl: -10.953229904174805
ppo/returns/mean: 0.7889498472213745
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


804it [1:40:07,  3.21s/it]

objective/kl: -10.345934867858887
ppo/returns/mean: 0.7274396419525146
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


805it [1:40:11,  3.17s/it]

objective/kl: -10.607706069946289
ppo/returns/mean: 0.7550326585769653
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


806it [1:40:14,  3.36s/it]

objective/kl: -10.994693756103516
ppo/returns/mean: 0.7548078894615173
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


807it [1:40:17,  3.28s/it]

objective/kl: -11.181745529174805
ppo/returns/mean: 0.7698982954025269
ppo/policy/advantages_mean: 3.166496753692627e-08
---------------------------------------------------------------------------------------------------


808it [1:40:21,  3.24s/it]

objective/kl: -11.264802932739258
ppo/returns/mean: 0.7225416898727417
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


809it [1:40:24,  3.21s/it]

objective/kl: -11.378898620605469
ppo/returns/mean: 0.7057455778121948
ppo/policy/advantages_mean: 2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


810it [1:40:27,  3.18s/it]

objective/kl: -11.747756958007812
ppo/returns/mean: 0.7767874002456665
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


811it [1:40:30,  3.30s/it]

objective/kl: -11.513473510742188
ppo/returns/mean: 0.7535158395767212
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


812it [1:40:34,  3.25s/it]

objective/kl: -10.558433532714844
ppo/returns/mean: 0.7004697918891907
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


813it [1:40:37,  3.22s/it]

objective/kl: -10.887929916381836
ppo/returns/mean: 0.6841074228286743
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


814it [1:40:40,  3.18s/it]

objective/kl: -11.128190994262695
ppo/returns/mean: 0.684309184551239
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


815it [1:40:43,  3.18s/it]

objective/kl: -12.53167724609375
ppo/returns/mean: 0.7968341112136841
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


816it [1:40:46,  3.17s/it]

objective/kl: -11.001920700073242
ppo/returns/mean: 0.6213733553886414
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


817it [1:40:49,  3.16s/it]

objective/kl: -11.919055938720703
ppo/returns/mean: 0.7888443470001221
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


818it [1:40:52,  3.16s/it]

objective/kl: -11.808029174804688
ppo/returns/mean: 0.73504638671875
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


819it [1:40:56,  3.14s/it]

objective/kl: -12.202169418334961
ppo/returns/mean: 0.8069313168525696
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


820it [1:40:59,  3.15s/it]

objective/kl: -11.384716033935547
ppo/returns/mean: 0.7935267090797424
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


821it [1:41:02,  3.31s/it]

objective/kl: -10.608940124511719
ppo/returns/mean: 0.6917459964752197
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


822it [1:41:06,  3.30s/it]

objective/kl: -10.362586975097656
ppo/returns/mean: 0.6982288360595703
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


823it [1:41:09,  3.30s/it]

objective/kl: -10.152910232543945
ppo/returns/mean: 0.5626147389411926
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


824it [1:41:12,  3.23s/it]

objective/kl: -9.315633773803711
ppo/returns/mean: 0.6116900444030762
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


825it [1:41:15,  3.25s/it]

objective/kl: -10.971817016601562
ppo/returns/mean: 0.6504990458488464
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


826it [1:41:18,  3.22s/it]

objective/kl: -10.020416259765625
ppo/returns/mean: 0.5293821096420288
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


827it [1:41:22,  3.19s/it]

objective/kl: -10.526910781860352
ppo/returns/mean: 0.6146224737167358
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


828it [1:41:25,  3.17s/it]

objective/kl: -10.840010643005371
ppo/returns/mean: 0.6360371112823486
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


829it [1:41:28,  3.17s/it]

objective/kl: -10.106250762939453
ppo/returns/mean: 0.5752396583557129
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


830it [1:41:31,  3.15s/it]

objective/kl: -9.07934284210205
ppo/returns/mean: 0.5029370784759521
ppo/policy/advantages_mean: -0.0012893229722976685
---------------------------------------------------------------------------------------------------


831it [1:41:34,  3.15s/it]

objective/kl: -9.393826484680176
ppo/returns/mean: 0.5540258884429932
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


832it [1:41:38,  3.25s/it]

objective/kl: -8.761422157287598
ppo/returns/mean: 0.4849776327610016
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


833it [1:41:41,  3.33s/it]

objective/kl: -10.162309646606445
ppo/returns/mean: 0.578323483467102
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


834it [1:41:44,  3.30s/it]

objective/kl: -10.432045936584473
ppo/returns/mean: 0.48835375905036926
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


835it [1:41:48,  3.28s/it]

objective/kl: -11.620145797729492
ppo/returns/mean: 0.6276708841323853
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


836it [1:41:51,  3.37s/it]

objective/kl: -10.007339477539062
ppo/returns/mean: 0.5960131287574768
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


837it [1:41:54,  3.31s/it]

objective/kl: -10.214885711669922
ppo/returns/mean: 0.5419815182685852
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


838it [1:41:58,  3.26s/it]

objective/kl: -11.225934982299805
ppo/returns/mean: 0.6320227384567261
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


839it [1:42:01,  3.23s/it]

objective/kl: -10.92611026763916
ppo/returns/mean: 0.6095322966575623
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


840it [1:42:04,  3.21s/it]

objective/kl: -11.372817993164062
ppo/returns/mean: 0.6353269219398499
ppo/policy/advantages_mean: -0.002094365656375885
---------------------------------------------------------------------------------------------------


841it [1:42:07,  3.21s/it]

objective/kl: -9.878662109375
ppo/returns/mean: 0.568222165107727
ppo/policy/advantages_mean: 4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


842it [1:42:10,  3.19s/it]

objective/kl: -11.139286041259766
ppo/returns/mean: 0.6131442785263062
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


843it [1:42:14,  3.25s/it]

objective/kl: -9.811464309692383
ppo/returns/mean: 0.5662801861763
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


844it [1:42:17,  3.22s/it]

objective/kl: -10.242485046386719
ppo/returns/mean: 0.591423749923706
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


845it [1:42:20,  3.19s/it]

objective/kl: -10.777448654174805
ppo/returns/mean: 0.6089169979095459
ppo/policy/advantages_mean: -0.0004946542903780937
---------------------------------------------------------------------------------------------------


846it [1:42:23,  3.17s/it]

objective/kl: -10.840505599975586
ppo/returns/mean: 0.5943580865859985
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


847it [1:42:26,  3.15s/it]

objective/kl: -9.867239952087402
ppo/returns/mean: 0.4837542772293091
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


848it [1:42:29,  3.13s/it]

objective/kl: -8.99726676940918
ppo/returns/mean: 0.5139313340187073
ppo/policy/advantages_mean: 8.381903171539307e-09
---------------------------------------------------------------------------------------------------


849it [1:42:32,  3.15s/it]

objective/kl: -8.448467254638672
ppo/returns/mean: 0.41175004839897156
ppo/policy/advantages_mean: 0.0010651294142007828
---------------------------------------------------------------------------------------------------


850it [1:42:35,  3.14s/it]

objective/kl: -9.968297958374023
ppo/returns/mean: 0.5157985091209412
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


851it [1:42:39,  3.12s/it]

objective/kl: -10.776397705078125
ppo/returns/mean: 0.6116853952407837
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


852it [1:42:42,  3.13s/it]

objective/kl: -10.84504222869873
ppo/returns/mean: 0.5937786102294922
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


853it [1:42:45,  3.13s/it]

objective/kl: -10.240928649902344
ppo/returns/mean: 0.6111306548118591
ppo/policy/advantages_mean: 0.0013289675116539001
---------------------------------------------------------------------------------------------------


854it [1:42:48,  3.13s/it]

objective/kl: -11.74859619140625
ppo/returns/mean: 0.637643575668335
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


855it [1:42:51,  3.13s/it]

objective/kl: -10.138020515441895
ppo/returns/mean: 0.5360438227653503
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


856it [1:42:54,  3.14s/it]

objective/kl: -8.985115051269531
ppo/returns/mean: 0.4285011887550354
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


857it [1:42:57,  3.14s/it]

objective/kl: -8.965789794921875
ppo/returns/mean: 0.43443378806114197
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


858it [1:43:01,  3.13s/it]

objective/kl: -8.796228408813477
ppo/returns/mean: 0.4969702363014221
ppo/policy/advantages_mean: 0.0018389225006103516
---------------------------------------------------------------------------------------------------


859it [1:43:04,  3.14s/it]

objective/kl: -8.148849487304688
ppo/returns/mean: 0.39165452122688293
ppo/policy/advantages_mean: 0.00039502233266830444
---------------------------------------------------------------------------------------------------


860it [1:43:07,  3.20s/it]

objective/kl: -8.503693580627441
ppo/returns/mean: 0.4526134133338928
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


861it [1:43:10,  3.18s/it]

objective/kl: -8.74406623840332
ppo/returns/mean: 0.4415102005004883
ppo/policy/advantages_mean: -3.1581148505210876e-05
---------------------------------------------------------------------------------------------------


862it [1:43:13,  3.18s/it]

objective/kl: -8.739059448242188
ppo/returns/mean: 0.3675880432128906
ppo/policy/advantages_mean: 1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


863it [1:43:16,  3.16s/it]

objective/kl: -9.786752700805664
ppo/returns/mean: 0.4564378261566162
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


864it [1:43:20,  3.18s/it]

objective/kl: -9.428330421447754
ppo/returns/mean: 0.37376731634140015
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


865it [1:43:23,  3.17s/it]

objective/kl: -8.757225036621094
ppo/returns/mean: 0.4184744358062744
ppo/policy/advantages_mean: 0.0011803004890680313
---------------------------------------------------------------------------------------------------


866it [1:43:26,  3.20s/it]

objective/kl: -10.4588041305542
ppo/returns/mean: 0.5038824081420898
ppo/policy/advantages_mean: -0.00031802430748939514
---------------------------------------------------------------------------------------------------


867it [1:43:29,  3.17s/it]

objective/kl: -9.857437133789062
ppo/returns/mean: 0.41969263553619385
ppo/policy/advantages_mean: 0.0006778240203857422
---------------------------------------------------------------------------------------------------


868it [1:43:32,  3.16s/it]

objective/kl: -8.823674201965332
ppo/returns/mean: 0.41745686531066895
ppo/policy/advantages_mean: -0.0009583048522472382
---------------------------------------------------------------------------------------------------


869it [1:43:35,  3.15s/it]

objective/kl: -9.975284576416016
ppo/returns/mean: 0.5186459422111511
ppo/policy/advantages_mean: 0.00024218857288360596
---------------------------------------------------------------------------------------------------


870it [1:43:39,  3.14s/it]

objective/kl: -9.65202808380127
ppo/returns/mean: 0.5291938781738281
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


871it [1:43:42,  3.12s/it]

objective/kl: -10.133176803588867
ppo/returns/mean: 0.5394208431243896
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


872it [1:43:45,  3.12s/it]

objective/kl: -9.228069305419922
ppo/returns/mean: 0.4725530743598938
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


873it [1:43:48,  3.12s/it]

objective/kl: -8.50015640258789
ppo/returns/mean: 0.45202958583831787
ppo/policy/advantages_mean: 5.122274160385132e-09
---------------------------------------------------------------------------------------------------


874it [1:43:51,  3.12s/it]

objective/kl: -8.901007652282715
ppo/returns/mean: 0.4201088547706604
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


875it [1:43:54,  3.13s/it]

objective/kl: -9.312326431274414
ppo/returns/mean: 0.41749608516693115
ppo/policy/advantages_mean: -0.002550508826971054
---------------------------------------------------------------------------------------------------


876it [1:43:57,  3.13s/it]

objective/kl: -7.901858329772949
ppo/returns/mean: 0.3498384952545166
ppo/policy/advantages_mean: 1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


877it [1:44:00,  3.14s/it]

objective/kl: -8.94022274017334
ppo/returns/mean: 0.42537248134613037
ppo/policy/advantages_mean: 0.0009600669145584106
---------------------------------------------------------------------------------------------------


878it [1:44:04,  3.14s/it]

objective/kl: -7.555761337280273
ppo/returns/mean: 0.3461437225341797
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


879it [1:44:07,  3.13s/it]

objective/kl: -6.412932395935059
ppo/returns/mean: 0.2779654860496521
ppo/policy/advantages_mean: 5.21540641784668e-08
---------------------------------------------------------------------------------------------------


880it [1:44:10,  3.13s/it]

objective/kl: -7.4733357429504395
ppo/returns/mean: 0.3025321960449219
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


881it [1:44:13,  3.13s/it]

objective/kl: -6.123002052307129
ppo/returns/mean: 0.1501496583223343
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


882it [1:44:16,  3.12s/it]

objective/kl: -7.067224979400635
ppo/returns/mean: 0.25529083609580994
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


883it [1:44:19,  3.12s/it]

objective/kl: -7.808931350708008
ppo/returns/mean: 0.30530571937561035
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


884it [1:44:22,  3.13s/it]

objective/kl: -7.637120246887207
ppo/returns/mean: 0.26735472679138184
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


885it [1:44:25,  3.13s/it]

objective/kl: -8.429664611816406
ppo/returns/mean: 0.3484004735946655
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


886it [1:44:29,  3.13s/it]

objective/kl: -9.140536308288574
ppo/returns/mean: 0.3439759910106659
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


887it [1:44:32,  3.13s/it]

objective/kl: -8.033221244812012
ppo/returns/mean: 0.26494720578193665
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


888it [1:44:35,  3.13s/it]

objective/kl: -8.327033042907715
ppo/returns/mean: 0.42879733443260193
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


889it [1:44:39,  3.45s/it]

objective/kl: -7.741279602050781
ppo/returns/mean: 0.3891904354095459
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


890it [1:44:42,  3.38s/it]

objective/kl: -9.535048484802246
ppo/returns/mean: 0.4412079453468323
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


891it [1:44:45,  3.30s/it]

objective/kl: -9.806511878967285
ppo/returns/mean: 0.5178951025009155
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


892it [1:44:49,  3.25s/it]

objective/kl: -9.863595962524414
ppo/returns/mean: 0.5192262530326843
ppo/policy/advantages_mean: -0.0007065571844577789
---------------------------------------------------------------------------------------------------


893it [1:44:52,  3.23s/it]

objective/kl: -8.97203254699707
ppo/returns/mean: 0.4406268298625946
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


894it [1:44:55,  3.20s/it]

objective/kl: -9.331589698791504
ppo/returns/mean: 0.4597623348236084
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


895it [1:44:58,  3.17s/it]

objective/kl: -9.102164268493652
ppo/returns/mean: 0.4802737236022949
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


896it [1:45:01,  3.16s/it]

objective/kl: -7.374130725860596
ppo/returns/mean: 0.35035258531570435
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


897it [1:45:04,  3.15s/it]

objective/kl: -7.379027366638184
ppo/returns/mean: 0.3026747703552246
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


898it [1:45:07,  3.15s/it]

objective/kl: -7.290314674377441
ppo/returns/mean: 0.2839313745498657
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


899it [1:45:11,  3.16s/it]

objective/kl: -7.181347846984863
ppo/returns/mean: 0.25888335704803467
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


900it [1:45:14,  3.14s/it]

objective/kl: -7.795976638793945
ppo/returns/mean: 0.2796247601509094
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


901it [1:45:17,  3.14s/it]

objective/kl: -8.730168342590332
ppo/returns/mean: 0.36528170108795166
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


902it [1:45:20,  3.19s/it]

objective/kl: -7.750116348266602
ppo/returns/mean: 0.2921999990940094
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


903it [1:45:23,  3.16s/it]

objective/kl: -8.524948120117188
ppo/returns/mean: 0.40805965662002563
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


904it [1:45:26,  3.15s/it]

objective/kl: -7.403362274169922
ppo/returns/mean: 0.2439214289188385
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


905it [1:45:29,  3.15s/it]

objective/kl: -6.211695671081543
ppo/returns/mean: 0.2659412622451782
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


906it [1:45:33,  3.18s/it]

objective/kl: -7.295860290527344
ppo/returns/mean: 0.293830931186676
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


907it [1:45:36,  3.16s/it]

objective/kl: -7.664399147033691
ppo/returns/mean: 0.37913209199905396
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


908it [1:45:39,  3.14s/it]

objective/kl: -8.69529914855957
ppo/returns/mean: 0.43270841240882874
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


909it [1:45:42,  3.14s/it]

objective/kl: -8.518329620361328
ppo/returns/mean: 0.36396998167037964
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


910it [1:45:45,  3.15s/it]

objective/kl: -8.77739143371582
ppo/returns/mean: 0.4201291501522064
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


911it [1:45:48,  3.15s/it]

objective/kl: -9.373973846435547
ppo/returns/mean: 0.5199645757675171
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


912it [1:45:52,  3.22s/it]

objective/kl: -9.735404968261719
ppo/returns/mean: 0.5068373680114746
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


913it [1:45:55,  3.20s/it]

objective/kl: -9.00333309173584
ppo/returns/mean: 0.4380236864089966
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


914it [1:45:58,  3.20s/it]

objective/kl: -7.992115497589111
ppo/returns/mean: 0.4300772547721863
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


915it [1:46:01,  3.19s/it]

objective/kl: -8.306608200073242
ppo/returns/mean: 0.3942036032676697
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


916it [1:46:04,  3.17s/it]

objective/kl: -8.007484436035156
ppo/returns/mean: 0.41799455881118774
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


917it [1:46:08,  3.18s/it]

objective/kl: -9.310543060302734
ppo/returns/mean: 0.48602938652038574
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


918it [1:46:11,  3.17s/it]

objective/kl: -8.514605522155762
ppo/returns/mean: 0.41553807258605957
ppo/policy/advantages_mean: -0.0002709394320845604
---------------------------------------------------------------------------------------------------


919it [1:46:14,  3.15s/it]

objective/kl: -8.306127548217773
ppo/returns/mean: 0.4907500445842743
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


920it [1:46:17,  3.15s/it]

objective/kl: -8.387554168701172
ppo/returns/mean: 0.439605712890625
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


921it [1:46:20,  3.15s/it]

objective/kl: -8.836973190307617
ppo/returns/mean: 0.45814138650894165
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


922it [1:46:23,  3.14s/it]

objective/kl: -7.893969535827637
ppo/returns/mean: 0.35496506094932556
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


923it [1:46:26,  3.12s/it]

objective/kl: -8.546714782714844
ppo/returns/mean: 0.4475328326225281
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


924it [1:46:29,  3.12s/it]

objective/kl: -7.483259201049805
ppo/returns/mean: 0.3642122745513916
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


925it [1:46:33,  3.13s/it]

objective/kl: -7.446089744567871
ppo/returns/mean: 0.30880826711654663
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


926it [1:46:36,  3.13s/it]

objective/kl: -7.671270847320557
ppo/returns/mean: 0.31509965658187866
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


927it [1:46:39,  3.13s/it]

objective/kl: -8.315741539001465
ppo/returns/mean: 0.388848215341568
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


928it [1:46:42,  3.14s/it]

objective/kl: -8.280645370483398
ppo/returns/mean: 0.37306106090545654
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


929it [1:46:45,  3.14s/it]

objective/kl: -8.781280517578125
ppo/returns/mean: 0.39150604605674744
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


930it [1:46:48,  3.15s/it]

objective/kl: -7.949371337890625
ppo/returns/mean: 0.30387574434280396
ppo/policy/advantages_mean: 0.0007993411272764206
---------------------------------------------------------------------------------------------------


931it [1:46:51,  3.14s/it]

objective/kl: -8.431282043457031
ppo/returns/mean: 0.35855674743652344
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


932it [1:46:54,  3.12s/it]

objective/kl: -8.979738235473633
ppo/returns/mean: 0.4501117467880249
ppo/policy/advantages_mean: 0.001054227352142334
---------------------------------------------------------------------------------------------------


933it [1:46:58,  3.10s/it]

objective/kl: -8.434793472290039
ppo/returns/mean: 0.3604346513748169
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


934it [1:47:01,  3.17s/it]

objective/kl: -7.994574546813965
ppo/returns/mean: 0.31005650758743286
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


935it [1:47:04,  3.19s/it]

objective/kl: -7.595772743225098
ppo/returns/mean: 0.3474240303039551
ppo/policy/advantages_mean: 0.0012468360364437103
---------------------------------------------------------------------------------------------------


936it [1:47:07,  3.24s/it]

objective/kl: -6.658082485198975
ppo/returns/mean: 0.22593747079372406
ppo/policy/advantages_mean: -0.003024667501449585
---------------------------------------------------------------------------------------------------


937it [1:47:11,  3.20s/it]

objective/kl: -7.6096391677856445
ppo/returns/mean: 0.2925463318824768
ppo/policy/advantages_mean: -0.0016505271196365356
---------------------------------------------------------------------------------------------------


938it [1:47:14,  3.18s/it]

objective/kl: -7.0226874351501465
ppo/returns/mean: 0.30436021089553833
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


939it [1:47:17,  3.15s/it]

objective/kl: -7.951490879058838
ppo/returns/mean: 0.3292258679866791
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


940it [1:47:20,  3.13s/it]

objective/kl: -7.950034141540527
ppo/returns/mean: 0.3341948986053467
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


941it [1:47:23,  3.12s/it]

objective/kl: -8.461660385131836
ppo/returns/mean: 0.3292393386363983
ppo/policy/advantages_mean: 4.453212022781372e-05
---------------------------------------------------------------------------------------------------


942it [1:47:26,  3.12s/it]

objective/kl: -7.697625637054443
ppo/returns/mean: 0.3235432505607605
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


943it [1:47:29,  3.13s/it]

objective/kl: -8.356521606445312
ppo/returns/mean: 0.2998436391353607
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


944it [1:47:32,  3.13s/it]

objective/kl: -8.511588096618652
ppo/returns/mean: 0.32570046186447144
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


945it [1:47:35,  3.13s/it]

objective/kl: -7.747405052185059
ppo/returns/mean: 0.3688809871673584
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


946it [1:47:39,  3.13s/it]

objective/kl: -8.29613971710205
ppo/returns/mean: 0.3506682813167572
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


947it [1:47:42,  3.15s/it]

objective/kl: -8.82796859741211
ppo/returns/mean: 0.42200589179992676
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


948it [1:47:45,  3.14s/it]

objective/kl: -8.36212158203125
ppo/returns/mean: 0.3869074583053589
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


949it [1:47:48,  3.13s/it]

objective/kl: -8.31927490234375
ppo/returns/mean: 0.3905310332775116
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


950it [1:47:51,  3.14s/it]

objective/kl: -8.397756576538086
ppo/returns/mean: 0.39113616943359375
ppo/policy/advantages_mean: 6.51925802230835e-09
---------------------------------------------------------------------------------------------------


951it [1:47:54,  3.14s/it]

objective/kl: -9.52253532409668
ppo/returns/mean: 0.42404884099960327
ppo/policy/advantages_mean: 0.00012436043471097946
---------------------------------------------------------------------------------------------------


952it [1:47:57,  3.13s/it]

objective/kl: -9.60458755493164
ppo/returns/mean: 0.4821963310241699
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


953it [1:48:00,  3.10s/it]

objective/kl: -9.355438232421875
ppo/returns/mean: 0.46689289808273315
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


954it [1:48:04,  3.09s/it]

objective/kl: -7.605623245239258
ppo/returns/mean: 0.3496969938278198
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


955it [1:48:07,  3.10s/it]

objective/kl: -8.005178451538086
ppo/returns/mean: 0.3567594289779663
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


956it [1:48:10,  3.09s/it]

objective/kl: -6.761025428771973
ppo/returns/mean: 0.2987217903137207
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


957it [1:48:13,  3.10s/it]

objective/kl: -8.492315292358398
ppo/returns/mean: 0.34747040271759033
ppo/policy/advantages_mean: -0.00033793970942497253
---------------------------------------------------------------------------------------------------


958it [1:48:16,  3.10s/it]

objective/kl: -7.678873062133789
ppo/returns/mean: 0.32981762290000916
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


959it [1:48:19,  3.09s/it]

objective/kl: -7.446621417999268
ppo/returns/mean: 0.30786797404289246
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


960it [1:48:22,  3.09s/it]

objective/kl: -7.226926803588867
ppo/returns/mean: 0.29886919260025024
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


961it [1:48:25,  3.10s/it]

objective/kl: -7.625182151794434
ppo/returns/mean: 0.2756546139717102
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


962it [1:48:28,  3.10s/it]

objective/kl: -6.426070213317871
ppo/returns/mean: 0.2047489881515503
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


963it [1:48:31,  3.11s/it]

objective/kl: -5.859252452850342
ppo/returns/mean: 0.2093728631734848
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


964it [1:48:35,  3.12s/it]

objective/kl: -5.342473030090332
ppo/returns/mean: 0.2146623581647873
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


965it [1:48:38,  3.11s/it]

objective/kl: -7.792879104614258
ppo/returns/mean: 0.3011951148509979
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


966it [1:48:41,  3.14s/it]

objective/kl: -8.24049186706543
ppo/returns/mean: 0.330352246761322
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


967it [1:48:44,  3.12s/it]

objective/kl: -8.554786682128906
ppo/returns/mean: 0.41301363706588745
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


968it [1:48:47,  3.11s/it]

objective/kl: -9.814787864685059
ppo/returns/mean: 0.4745016098022461
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


969it [1:48:50,  3.11s/it]

objective/kl: -9.040226936340332
ppo/returns/mean: 0.46521931886672974
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


970it [1:48:53,  3.13s/it]

objective/kl: -9.115324020385742
ppo/returns/mean: 0.4644584357738495
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


971it [1:48:56,  3.11s/it]

objective/kl: -9.209981918334961
ppo/returns/mean: 0.47302037477493286
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


972it [1:49:00,  3.12s/it]

objective/kl: -8.038267135620117
ppo/returns/mean: 0.46356111764907837
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


973it [1:49:03,  3.10s/it]

objective/kl: -7.994256973266602
ppo/returns/mean: 0.43134626746177673
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


974it [1:49:06,  3.17s/it]

objective/kl: -6.902679443359375
ppo/returns/mean: 0.3473682701587677
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


975it [1:49:09,  3.16s/it]

objective/kl: -7.13326358795166
ppo/returns/mean: 0.37108856439590454
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


976it [1:49:12,  3.16s/it]

objective/kl: -7.755942344665527
ppo/returns/mean: 0.3605195879936218
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


977it [1:49:15,  3.15s/it]

objective/kl: -7.636834144592285
ppo/returns/mean: 0.3882942497730255
ppo/policy/advantages_mean: 6.51925802230835e-09
---------------------------------------------------------------------------------------------------


978it [1:49:19,  3.15s/it]

objective/kl: -7.893275260925293
ppo/returns/mean: 0.4035264253616333
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


979it [1:49:22,  3.13s/it]

objective/kl: -7.836650848388672
ppo/returns/mean: 0.3853285312652588
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


980it [1:49:25,  3.14s/it]

objective/kl: -7.406054496765137
ppo/returns/mean: 0.3458658456802368
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


981it [1:49:28,  3.13s/it]

objective/kl: -7.487369537353516
ppo/returns/mean: 0.39566075801849365
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


982it [1:49:31,  3.12s/it]

objective/kl: -7.285575866699219
ppo/returns/mean: 0.3513938784599304
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


983it [1:49:34,  3.12s/it]

objective/kl: -7.071962833404541
ppo/returns/mean: 0.2956780791282654
ppo/policy/advantages_mean: 0.0011975131928920746
---------------------------------------------------------------------------------------------------


984it [1:49:37,  3.11s/it]

objective/kl: -7.402427673339844
ppo/returns/mean: 0.3153313100337982
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


985it [1:49:40,  3.10s/it]

objective/kl: -8.281949996948242
ppo/returns/mean: 0.3700045347213745
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


986it [1:49:43,  3.12s/it]

objective/kl: -7.7025837898254395
ppo/returns/mean: 0.31515899300575256
ppo/policy/advantages_mean: 0.003910757601261139
---------------------------------------------------------------------------------------------------


987it [1:49:47,  3.12s/it]

objective/kl: -8.685272216796875
ppo/returns/mean: 0.45097464323043823
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


988it [1:49:50,  3.13s/it]

objective/kl: -8.940252304077148
ppo/returns/mean: 0.4586375057697296
ppo/policy/advantages_mean: -4.284083843231201e-08
---------------------------------------------------------------------------------------------------


989it [1:49:53,  3.12s/it]

objective/kl: -8.860499382019043
ppo/returns/mean: 0.48337864875793457
ppo/policy/advantages_mean: 3.818422555923462e-08
---------------------------------------------------------------------------------------------------


990it [1:49:56,  3.11s/it]

objective/kl: -8.45949935913086
ppo/returns/mean: 0.4559139907360077
ppo/policy/advantages_mean: -0.0003503970801830292
---------------------------------------------------------------------------------------------------


991it [1:49:59,  3.10s/it]

objective/kl: -8.226263046264648
ppo/returns/mean: 0.4897051453590393
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


992it [1:50:02,  3.08s/it]

objective/kl: -7.6391215324401855
ppo/returns/mean: 0.4712727963924408
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


993it [1:50:05,  3.07s/it]

objective/kl: -7.439316749572754
ppo/returns/mean: 0.37814730405807495
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


994it [1:50:08,  3.07s/it]

objective/kl: -7.828072547912598
ppo/returns/mean: 0.4176078736782074
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


995it [1:50:11,  3.07s/it]

objective/kl: -7.891876220703125
ppo/returns/mean: 0.40390142798423767
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


996it [1:50:14,  3.08s/it]

objective/kl: -7.511842727661133
ppo/returns/mean: 0.44313448667526245
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


997it [1:50:17,  3.08s/it]

objective/kl: -8.054401397705078
ppo/returns/mean: 0.42025241255760193
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


998it [1:50:21,  3.12s/it]

objective/kl: -8.825394630432129
ppo/returns/mean: 0.48295092582702637
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


999it [1:50:24,  3.12s/it]

objective/kl: -8.582162857055664
ppo/returns/mean: 0.48131975531578064
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1000it [1:50:27,  3.10s/it]

objective/kl: -9.546354293823242
ppo/returns/mean: 0.508463978767395
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1001it [1:50:30,  3.10s/it]

objective/kl: -8.137615203857422
ppo/returns/mean: 0.4509435296058655
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1002it [1:50:33,  3.10s/it]

objective/kl: -7.4928202629089355
ppo/returns/mean: 0.4663386046886444
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1003it [1:50:36,  3.10s/it]

objective/kl: -6.773562431335449
ppo/returns/mean: 0.30065762996673584
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1004it [1:50:39,  3.12s/it]

objective/kl: -6.483781814575195
ppo/returns/mean: 0.2628943920135498
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1005it [1:50:42,  3.12s/it]

objective/kl: -6.045248985290527
ppo/returns/mean: 0.2266809195280075
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1006it [1:50:45,  3.12s/it]

objective/kl: -5.996919631958008
ppo/returns/mean: 0.2399277240037918
ppo/policy/advantages_mean: 2.421438694000244e-08
---------------------------------------------------------------------------------------------------


1007it [1:50:49,  3.11s/it]

objective/kl: -6.971213340759277
ppo/returns/mean: 0.2820526957511902
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1008it [1:50:52,  3.11s/it]

objective/kl: -6.88262939453125
ppo/returns/mean: 0.2795037031173706
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1009it [1:50:55,  3.10s/it]

objective/kl: -8.216058731079102
ppo/returns/mean: 0.31556111574172974
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1010it [1:50:58,  3.16s/it]

objective/kl: -7.096601963043213
ppo/returns/mean: 0.26077038049697876
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1011it [1:51:01,  3.14s/it]

objective/kl: -7.056638240814209
ppo/returns/mean: 0.3264928162097931
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1012it [1:51:04,  3.12s/it]

objective/kl: -6.800583839416504
ppo/returns/mean: 0.3391983211040497
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1013it [1:51:07,  3.12s/it]

objective/kl: -6.6254119873046875
ppo/returns/mean: 0.3040691614151001
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1014it [1:51:10,  3.12s/it]

objective/kl: -6.979596138000488
ppo/returns/mean: 0.3045539855957031
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1015it [1:51:14,  3.13s/it]

objective/kl: -6.855375289916992
ppo/returns/mean: 0.2866422235965729
ppo/policy/advantages_mean: -0.0022029727697372437
---------------------------------------------------------------------------------------------------


1016it [1:51:17,  3.13s/it]

objective/kl: -7.7831292152404785
ppo/returns/mean: 0.37543052434921265
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


1017it [1:51:20,  3.12s/it]

objective/kl: -7.750337600708008
ppo/returns/mean: 0.3856180012226105
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1018it [1:51:23,  3.10s/it]

objective/kl: -7.58730411529541
ppo/returns/mean: 0.4014238715171814
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1019it [1:51:26,  3.11s/it]

objective/kl: -7.9786376953125
ppo/returns/mean: 0.40476515889167786
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1020it [1:51:29,  3.10s/it]

objective/kl: -7.628310203552246
ppo/returns/mean: 0.40661364793777466
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1021it [1:51:32,  3.12s/it]

objective/kl: -6.827198028564453
ppo/returns/mean: 0.3642287254333496
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1022it [1:51:35,  3.12s/it]

objective/kl: -7.0875325202941895
ppo/returns/mean: 0.3489181399345398
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1023it [1:51:38,  3.11s/it]

objective/kl: -7.102738857269287
ppo/returns/mean: 0.3517884314060211
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1024it [1:51:42,  3.11s/it]

objective/kl: -7.522723197937012
ppo/returns/mean: 0.3549734950065613
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1025it [1:51:45,  3.11s/it]

objective/kl: -8.13497257232666
ppo/returns/mean: 0.40502798557281494
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1026it [1:51:48,  3.10s/it]

objective/kl: -8.464576721191406
ppo/returns/mean: 0.3983033299446106
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1027it [1:51:51,  3.15s/it]

objective/kl: -8.334941864013672
ppo/returns/mean: 0.4046013653278351
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1028it [1:51:54,  3.12s/it]

objective/kl: -9.320208549499512
ppo/returns/mean: 0.42483842372894287
ppo/policy/advantages_mean: 8.381903171539307e-09
---------------------------------------------------------------------------------------------------


1029it [1:51:57,  3.10s/it]

objective/kl: -8.393768310546875
ppo/returns/mean: 0.44454777240753174
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1030it [1:52:00,  3.13s/it]

objective/kl: -8.140279769897461
ppo/returns/mean: 0.44802552461624146
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1031it [1:52:04,  3.19s/it]

objective/kl: -7.595658779144287
ppo/returns/mean: 0.3761064410209656
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1032it [1:52:07,  3.15s/it]

objective/kl: -7.795533657073975
ppo/returns/mean: 0.37982723116874695
ppo/policy/advantages_mean: 2.7008354663848877e-08
---------------------------------------------------------------------------------------------------


1033it [1:52:10,  3.13s/it]

objective/kl: -7.937248706817627
ppo/returns/mean: 0.40490180253982544
ppo/policy/advantages_mean: -2.421438694000244e-08
---------------------------------------------------------------------------------------------------


1034it [1:52:13,  3.13s/it]

objective/kl: -8.767547607421875
ppo/returns/mean: 0.452770859003067
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1035it [1:52:16,  3.12s/it]

objective/kl: -8.695289611816406
ppo/returns/mean: 0.43211817741394043
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1036it [1:52:19,  3.14s/it]

objective/kl: -8.278167724609375
ppo/returns/mean: 0.414995014667511
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1037it [1:52:22,  3.12s/it]

objective/kl: -6.864907264709473
ppo/returns/mean: 0.4108954071998596
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1038it [1:52:25,  3.12s/it]

objective/kl: -7.645066738128662
ppo/returns/mean: 0.37341535091400146
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1039it [1:52:29,  3.12s/it]

objective/kl: -7.175868988037109
ppo/returns/mean: 0.3764726519584656
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1040it [1:52:32,  3.14s/it]

objective/kl: -7.05388069152832
ppo/returns/mean: 0.35084831714630127
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1041it [1:52:35,  3.14s/it]

objective/kl: -7.256800651550293
ppo/returns/mean: 0.39911824464797974
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1042it [1:52:38,  3.15s/it]

objective/kl: -7.561305999755859
ppo/returns/mean: 0.3329899311065674
ppo/policy/advantages_mean: 0.0011656582355499268
---------------------------------------------------------------------------------------------------


1043it [1:52:41,  3.14s/it]

objective/kl: -8.935348510742188
ppo/returns/mean: 0.37124067544937134
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1044it [1:52:44,  3.14s/it]

objective/kl: -8.234477043151855
ppo/returns/mean: 0.3991324305534363
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1045it [1:52:47,  3.14s/it]

objective/kl: -7.181524753570557
ppo/returns/mean: 0.3875691890716553
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1046it [1:52:51,  3.13s/it]

objective/kl: -8.219034194946289
ppo/returns/mean: 0.43303075432777405
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1047it [1:52:54,  3.13s/it]

objective/kl: -7.1123270988464355
ppo/returns/mean: 0.3337624669075012
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1048it [1:52:57,  3.13s/it]

objective/kl: -8.570085525512695
ppo/returns/mean: 0.43174949288368225
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1049it [1:53:00,  3.14s/it]

objective/kl: -8.785884857177734
ppo/returns/mean: 0.44163772463798523
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1050it [1:53:03,  3.13s/it]

objective/kl: -7.73362398147583
ppo/returns/mean: 0.42114025354385376
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1051it [1:53:06,  3.13s/it]

objective/kl: -7.715117454528809
ppo/returns/mean: 0.419259250164032
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1052it [1:53:09,  3.14s/it]

objective/kl: -7.817426681518555
ppo/returns/mean: 0.4014250934123993
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1053it [1:53:13,  3.14s/it]

objective/kl: -7.491643905639648
ppo/returns/mean: 0.41356179118156433
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1054it [1:53:16,  3.21s/it]

objective/kl: -6.862485885620117
ppo/returns/mean: 0.3106844127178192
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1055it [1:53:19,  3.19s/it]

objective/kl: -7.586577415466309
ppo/returns/mean: 0.3640748858451843
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1056it [1:53:22,  3.18s/it]

objective/kl: -6.932758331298828
ppo/returns/mean: 0.2622552812099457
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1057it [1:53:25,  3.19s/it]

objective/kl: -7.826469421386719
ppo/returns/mean: 0.35086166858673096
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1058it [1:53:29,  3.20s/it]

objective/kl: -6.987321853637695
ppo/returns/mean: 0.3549458384513855
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1059it [1:53:32,  3.19s/it]

objective/kl: -7.796517848968506
ppo/returns/mean: 0.34557798504829407
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1060it [1:53:35,  3.17s/it]

objective/kl: -8.257251739501953
ppo/returns/mean: 0.3972305655479431
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1061it [1:53:38,  3.17s/it]

objective/kl: -8.665922164916992
ppo/returns/mean: 0.43989312648773193
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1062it [1:53:41,  3.19s/it]

objective/kl: -7.5915656089782715
ppo/returns/mean: 0.4099633991718292
ppo/policy/advantages_mean: 8.381903171539307e-09
---------------------------------------------------------------------------------------------------


1063it [1:53:45,  3.21s/it]

objective/kl: -7.5519561767578125
ppo/returns/mean: 0.4266359806060791
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


1064it [1:53:48,  3.20s/it]

objective/kl: -7.124087333679199
ppo/returns/mean: 0.37313112616539
ppo/policy/advantages_mean: 1.1641532182693481e-08
---------------------------------------------------------------------------------------------------


1065it [1:53:51,  3.17s/it]

objective/kl: -7.481470108032227
ppo/returns/mean: 0.3651895821094513
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1066it [1:53:54,  3.17s/it]

objective/kl: -8.185446739196777
ppo/returns/mean: 0.45639657974243164
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1067it [1:53:57,  3.17s/it]

objective/kl: -7.890084266662598
ppo/returns/mean: 0.46594956517219543
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1068it [1:54:00,  3.16s/it]

objective/kl: -7.72459077835083
ppo/returns/mean: 0.415850430727005
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1069it [1:54:03,  3.16s/it]

objective/kl: -7.431478977203369
ppo/returns/mean: 0.4774530529975891
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1070it [1:54:07,  3.16s/it]

objective/kl: -7.451481342315674
ppo/returns/mean: 0.4342018961906433
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1071it [1:54:10,  3.19s/it]

objective/kl: -7.87099027633667
ppo/returns/mean: 0.44667452573776245
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1072it [1:54:13,  3.17s/it]

objective/kl: -8.420441627502441
ppo/returns/mean: 0.3989589810371399
ppo/policy/advantages_mean: 2.5019049644470215e-05
---------------------------------------------------------------------------------------------------


1073it [1:54:16,  3.16s/it]

objective/kl: -8.266149520874023
ppo/returns/mean: 0.4619940519332886
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1074it [1:54:19,  3.15s/it]

objective/kl: -7.554349899291992
ppo/returns/mean: 0.39954355359077454
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1075it [1:54:22,  3.14s/it]

objective/kl: -8.225980758666992
ppo/returns/mean: 0.4281213879585266
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1076it [1:54:26,  3.14s/it]

objective/kl: -7.177095413208008
ppo/returns/mean: 0.3909417390823364
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1077it [1:54:29,  3.13s/it]

objective/kl: -7.726649761199951
ppo/returns/mean: 0.44002842903137207
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1078it [1:54:32,  3.12s/it]

objective/kl: -7.588287830352783
ppo/returns/mean: 0.32285046577453613
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1079it [1:54:35,  3.12s/it]

objective/kl: -7.611937046051025
ppo/returns/mean: 0.35728079080581665
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1080it [1:54:38,  3.12s/it]

objective/kl: -7.842937469482422
ppo/returns/mean: 0.3762511610984802
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1081it [1:54:41,  3.13s/it]

objective/kl: -7.529411315917969
ppo/returns/mean: 0.3121790289878845
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1082it [1:54:44,  3.14s/it]

objective/kl: -7.744228839874268
ppo/returns/mean: 0.3569115400314331
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1083it [1:54:47,  3.12s/it]

objective/kl: -7.192048072814941
ppo/returns/mean: 0.34862861037254333
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1084it [1:54:51,  3.13s/it]

objective/kl: -7.027219772338867
ppo/returns/mean: 0.3560272455215454
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1085it [1:54:54,  3.20s/it]

objective/kl: -7.8212385177612305
ppo/returns/mean: 0.3403294086456299
ppo/policy/advantages_mean: 0.0011447928845882416
---------------------------------------------------------------------------------------------------


1086it [1:54:57,  3.17s/it]

objective/kl: -7.47485876083374
ppo/returns/mean: 0.4018012583255768
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1087it [1:55:00,  3.20s/it]

objective/kl: -6.573659896850586
ppo/returns/mean: 0.3571716547012329
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1088it [1:55:03,  3.18s/it]

objective/kl: -8.70266056060791
ppo/returns/mean: 0.35679012537002563
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1089it [1:55:07,  3.16s/it]

objective/kl: -8.510050773620605
ppo/returns/mean: 0.34276917576789856
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1090it [1:55:10,  3.15s/it]

objective/kl: -7.556358337402344
ppo/returns/mean: 0.3318423628807068
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1091it [1:55:13,  3.21s/it]

objective/kl: -6.756311416625977
ppo/returns/mean: 0.30964937806129456
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1092it [1:55:16,  3.18s/it]

objective/kl: -6.937314033508301
ppo/returns/mean: 0.36970943212509155
ppo/policy/advantages_mean: -8.381903171539307e-09
---------------------------------------------------------------------------------------------------


1093it [1:55:19,  3.15s/it]

objective/kl: -7.165616035461426
ppo/returns/mean: 0.2869601845741272
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1094it [1:55:22,  3.15s/it]

objective/kl: -6.7025251388549805
ppo/returns/mean: 0.3258821964263916
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1095it [1:55:25,  3.14s/it]

objective/kl: -6.701384544372559
ppo/returns/mean: 0.25155702233314514
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1096it [1:55:29,  3.13s/it]

objective/kl: -6.6090497970581055
ppo/returns/mean: 0.2693765163421631
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1097it [1:55:32,  3.13s/it]

objective/kl: -6.366266250610352
ppo/returns/mean: 0.18609300255775452
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1098it [1:55:35,  3.19s/it]

objective/kl: -7.120077610015869
ppo/returns/mean: 0.23644058406352997
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1099it [1:55:38,  3.16s/it]

objective/kl: -6.4595770835876465
ppo/returns/mean: 0.27427932620048523
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1100it [1:55:41,  3.15s/it]

objective/kl: -7.943952560424805
ppo/returns/mean: 0.288213312625885
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1101it [1:55:44,  3.13s/it]

objective/kl: -8.637685775756836
ppo/returns/mean: 0.3618082106113434
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1102it [1:55:47,  3.13s/it]

objective/kl: -8.013689041137695
ppo/returns/mean: 0.35275694727897644
ppo/policy/advantages_mean: 0.0013142228126525879
---------------------------------------------------------------------------------------------------


1103it [1:55:51,  3.27s/it]

objective/kl: -8.545530319213867
ppo/returns/mean: 0.38513273000717163
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1104it [1:55:54,  3.23s/it]

objective/kl: -7.838015079498291
ppo/returns/mean: 0.39386406540870667
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1105it [1:55:57,  3.20s/it]

objective/kl: -7.286152362823486
ppo/returns/mean: 0.4189693033695221
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1106it [1:56:01,  3.21s/it]

objective/kl: -7.097436904907227
ppo/returns/mean: 0.349309504032135
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1107it [1:56:04,  3.20s/it]

objective/kl: -7.391622543334961
ppo/returns/mean: 0.27526015043258667
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1108it [1:56:07,  3.18s/it]

objective/kl: -8.279044151306152
ppo/returns/mean: 0.31885236501693726
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1109it [1:56:10,  3.16s/it]

objective/kl: -8.513175964355469
ppo/returns/mean: 0.32525643706321716
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1110it [1:56:13,  3.18s/it]

objective/kl: -8.38255500793457
ppo/returns/mean: 0.3105565309524536
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1111it [1:56:16,  3.21s/it]

objective/kl: -8.277584075927734
ppo/returns/mean: 0.3976505994796753
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1112it [1:56:20,  3.17s/it]

objective/kl: -7.3840837478637695
ppo/returns/mean: 0.30391544103622437
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1113it [1:56:23,  3.14s/it]

objective/kl: -7.945281982421875
ppo/returns/mean: 0.4072273075580597
ppo/policy/advantages_mean: 2.514570951461792e-08
---------------------------------------------------------------------------------------------------


1114it [1:56:26,  3.12s/it]

objective/kl: -8.26630973815918
ppo/returns/mean: 0.4105355739593506
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1115it [1:56:29,  3.11s/it]

objective/kl: -7.458217144012451
ppo/returns/mean: 0.34352800250053406
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1116it [1:56:32,  3.10s/it]

objective/kl: -7.1258392333984375
ppo/returns/mean: 0.3419501781463623
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1117it [1:56:35,  3.11s/it]

objective/kl: -7.109896183013916
ppo/returns/mean: 0.3152140974998474
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1118it [1:56:38,  3.11s/it]

objective/kl: -7.042527198791504
ppo/returns/mean: 0.2876412868499756
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1119it [1:56:41,  3.10s/it]

objective/kl: -6.746386528015137
ppo/returns/mean: 0.29600948095321655
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1120it [1:56:44,  3.10s/it]

objective/kl: -7.221774101257324
ppo/returns/mean: 0.26538485288619995
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1121it [1:56:47,  3.10s/it]

objective/kl: -7.614501953125
ppo/returns/mean: 0.2772945165634155
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1122it [1:56:50,  3.09s/it]

objective/kl: -7.36181640625
ppo/returns/mean: 0.3008844256401062
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1123it [1:56:54,  3.09s/it]

objective/kl: -5.527213096618652
ppo/returns/mean: 0.2085730880498886
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1124it [1:56:57,  3.09s/it]

objective/kl: -6.632978439331055
ppo/returns/mean: 0.24960261583328247
ppo/policy/advantages_mean: -0.00036278367042541504
---------------------------------------------------------------------------------------------------


1125it [1:57:00,  3.10s/it]

objective/kl: -6.622039794921875
ppo/returns/mean: 0.2303134799003601
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1126it [1:57:03,  3.11s/it]

objective/kl: -6.427299499511719
ppo/returns/mean: 0.26281285285949707
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1127it [1:57:06,  3.10s/it]

objective/kl: -6.557258605957031
ppo/returns/mean: 0.24321047961711884
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1128it [1:57:09,  3.20s/it]

objective/kl: -6.174079895019531
ppo/returns/mean: 0.20000059902668
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1129it [1:57:13,  3.17s/it]

objective/kl: -7.22054386138916
ppo/returns/mean: 0.30498191714286804
ppo/policy/advantages_mean: 0.0005820617079734802
---------------------------------------------------------------------------------------------------


1130it [1:57:16,  3.15s/it]

objective/kl: -7.015389919281006
ppo/returns/mean: 0.30135780572891235
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


1131it [1:57:19,  3.16s/it]

objective/kl: -6.904726028442383
ppo/returns/mean: 0.3254587650299072
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1132it [1:57:22,  3.16s/it]

objective/kl: -7.053438663482666
ppo/returns/mean: 0.33424150943756104
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1133it [1:57:25,  3.14s/it]

objective/kl: -7.554939270019531
ppo/returns/mean: 0.35817116498947144
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1134it [1:57:28,  3.13s/it]

objective/kl: -7.87227725982666
ppo/returns/mean: 0.3578189015388489
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1135it [1:57:31,  3.12s/it]

objective/kl: -9.110139846801758
ppo/returns/mean: 0.4333460330963135
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1136it [1:57:34,  3.15s/it]

objective/kl: -7.61750602722168
ppo/returns/mean: 0.3876633048057556
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1137it [1:57:38,  3.17s/it]

objective/kl: -9.442346572875977
ppo/returns/mean: 0.434480756521225
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1138it [1:57:41,  3.21s/it]

objective/kl: -8.257207870483398
ppo/returns/mean: 0.3920326828956604
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1139it [1:57:44,  3.18s/it]

objective/kl: -8.386468887329102
ppo/returns/mean: 0.427578330039978
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1140it [1:57:47,  3.15s/it]

objective/kl: -7.360146522521973
ppo/returns/mean: 0.3837432861328125
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1141it [1:57:50,  3.18s/it]

objective/kl: -7.394436359405518
ppo/returns/mean: 0.3623391091823578
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1142it [1:57:54,  3.16s/it]

objective/kl: -6.341312408447266
ppo/returns/mean: 0.34915560483932495
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


1143it [1:57:57,  3.13s/it]

objective/kl: -6.11921501159668
ppo/returns/mean: 0.3181163966655731
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1144it [1:58:00,  3.13s/it]

objective/kl: -6.693833351135254
ppo/returns/mean: 0.2580084800720215
ppo/policy/advantages_mean: 8.428003638982773e-05
---------------------------------------------------------------------------------------------------


1145it [1:58:03,  3.12s/it]

objective/kl: -7.028279781341553
ppo/returns/mean: 0.2969479262828827
ppo/policy/advantages_mean: -0.0005653500556945801
---------------------------------------------------------------------------------------------------


1146it [1:58:06,  3.16s/it]

objective/kl: -7.0753068923950195
ppo/returns/mean: 0.29851192235946655
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1147it [1:58:09,  3.15s/it]

objective/kl: -7.466036796569824
ppo/returns/mean: 0.35614535212516785
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1148it [1:58:12,  3.15s/it]

objective/kl: -8.027929306030273
ppo/returns/mean: 0.3188721537590027
ppo/policy/advantages_mean: -0.0007698722183704376
---------------------------------------------------------------------------------------------------


1149it [1:58:15,  3.13s/it]

objective/kl: -7.511131763458252
ppo/returns/mean: 0.31934767961502075
ppo/policy/advantages_mean: -0.0006620287895202637
---------------------------------------------------------------------------------------------------


1150it [1:58:19,  3.14s/it]

objective/kl: -6.65412712097168
ppo/returns/mean: 0.2367502897977829
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


1151it [1:58:22,  3.14s/it]

objective/kl: -6.876062393188477
ppo/returns/mean: 0.33967626094818115
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1152it [1:58:25,  3.14s/it]

objective/kl: -6.330843925476074
ppo/returns/mean: 0.2492164671421051
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1153it [1:58:28,  3.14s/it]

objective/kl: -6.281839847564697
ppo/returns/mean: 0.24066676199436188
ppo/policy/advantages_mean: 4.470348358154297e-08
---------------------------------------------------------------------------------------------------


1154it [1:58:31,  3.18s/it]

objective/kl: -7.010235786437988
ppo/returns/mean: 0.24576741456985474
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1155it [1:58:34,  3.17s/it]

objective/kl: -6.071329116821289
ppo/returns/mean: 0.25213441252708435
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1156it [1:58:38,  3.15s/it]

objective/kl: -6.602087497711182
ppo/returns/mean: 0.2698614001274109
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1157it [1:58:41,  3.17s/it]

objective/kl: -6.089290618896484
ppo/returns/mean: 0.23465654253959656
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1158it [1:58:44,  3.16s/it]

objective/kl: -6.794550895690918
ppo/returns/mean: 0.24072593450546265
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1159it [1:58:47,  3.15s/it]

objective/kl: -6.529975891113281
ppo/returns/mean: 0.22344756126403809
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1160it [1:58:50,  3.14s/it]

objective/kl: -5.604111671447754
ppo/returns/mean: 0.19075357913970947
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1161it [1:58:53,  3.15s/it]

objective/kl: -6.217017650604248
ppo/returns/mean: 0.23804044723510742
ppo/policy/advantages_mean: 1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


1162it [1:58:56,  3.14s/it]

objective/kl: -6.6757330894470215
ppo/returns/mean: 0.2568989396095276
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1163it [1:59:00,  3.13s/it]

objective/kl: -7.721059799194336
ppo/returns/mean: 0.3172456622123718
ppo/policy/advantages_mean: -3.91155481338501e-08
---------------------------------------------------------------------------------------------------


1164it [1:59:03,  3.13s/it]

objective/kl: -7.54085636138916
ppo/returns/mean: 0.35350167751312256
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1165it [1:59:06,  3.15s/it]

objective/kl: -7.5415825843811035
ppo/returns/mean: 0.3584703207015991
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1166it [1:59:09,  3.21s/it]

objective/kl: -7.279088020324707
ppo/returns/mean: 0.40147727727890015
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1167it [1:59:12,  3.19s/it]

objective/kl: -7.107199668884277
ppo/returns/mean: 0.3779488801956177
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1168it [1:59:16,  3.17s/it]

objective/kl: -7.119690895080566
ppo/returns/mean: 0.34631580114364624
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1169it [1:59:19,  3.18s/it]

objective/kl: -7.495724678039551
ppo/returns/mean: 0.4026452302932739
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1170it [1:59:22,  3.16s/it]

objective/kl: -7.129814624786377
ppo/returns/mean: 0.3504040241241455
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1171it [1:59:25,  3.14s/it]

objective/kl: -6.473169803619385
ppo/returns/mean: 0.3732137978076935
ppo/policy/advantages_mean: -0.00021325796842575073
---------------------------------------------------------------------------------------------------


1172it [1:59:28,  3.14s/it]

objective/kl: -6.409024238586426
ppo/returns/mean: 0.29838263988494873
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1173it [1:59:31,  3.13s/it]

objective/kl: -7.467837333679199
ppo/returns/mean: 0.3357357382774353
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1174it [1:59:34,  3.14s/it]

objective/kl: -7.205341815948486
ppo/returns/mean: 0.3640071153640747
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1175it [1:59:38,  3.16s/it]

objective/kl: -5.753705978393555
ppo/returns/mean: 0.2461254894733429
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1176it [1:59:41,  3.15s/it]

objective/kl: -6.593170642852783
ppo/returns/mean: 0.26849648356437683
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1177it [1:59:44,  3.14s/it]

objective/kl: -5.841115474700928
ppo/returns/mean: 0.3098193109035492
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1178it [1:59:47,  3.15s/it]

objective/kl: -6.9299516677856445
ppo/returns/mean: 0.3322422504425049
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1179it [1:59:50,  3.14s/it]

objective/kl: -8.1391019821167
ppo/returns/mean: 0.3505346179008484
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1180it [1:59:53,  3.15s/it]

objective/kl: -8.80675983428955
ppo/returns/mean: 0.3914095461368561
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1181it [1:59:56,  3.15s/it]

objective/kl: -8.624086380004883
ppo/returns/mean: 0.37317466735839844
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1182it [2:00:00,  3.15s/it]

objective/kl: -8.386832237243652
ppo/returns/mean: 0.38221365213394165
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1183it [2:00:03,  3.15s/it]

objective/kl: -6.944940090179443
ppo/returns/mean: 0.3020642399787903
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1184it [2:00:06,  3.14s/it]

objective/kl: -6.194875240325928
ppo/returns/mean: 0.31729161739349365
ppo/policy/advantages_mean: 0.001170506700873375
---------------------------------------------------------------------------------------------------


1185it [2:00:09,  3.13s/it]

objective/kl: -6.9705915451049805
ppo/returns/mean: 0.3466559648513794
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1186it [2:00:12,  3.13s/it]

objective/kl: -7.936718940734863
ppo/returns/mean: 0.3877567648887634
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1187it [2:00:15,  3.14s/it]

objective/kl: -7.18955135345459
ppo/returns/mean: 0.3574409484863281
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1188it [2:00:18,  3.14s/it]

objective/kl: -9.111682891845703
ppo/returns/mean: 0.3505696654319763
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1189it [2:00:21,  3.13s/it]

objective/kl: -9.565793991088867
ppo/returns/mean: 0.40877753496170044
ppo/policy/advantages_mean: 4.470348358154297e-08
---------------------------------------------------------------------------------------------------


1190it [2:00:25,  3.12s/it]

objective/kl: -8.448200225830078
ppo/returns/mean: 0.3516944944858551
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1191it [2:00:28,  3.12s/it]

objective/kl: -8.827143669128418
ppo/returns/mean: 0.36942383646965027
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1192it [2:00:31,  3.12s/it]

objective/kl: -8.925689697265625
ppo/returns/mean: 0.39771658182144165
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1193it [2:00:34,  3.12s/it]

objective/kl: -7.528855323791504
ppo/returns/mean: 0.3346823453903198
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1194it [2:00:37,  3.12s/it]

objective/kl: -7.420729160308838
ppo/returns/mean: 0.37376391887664795
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1195it [2:00:40,  3.12s/it]

objective/kl: -6.951963424682617
ppo/returns/mean: 0.3485206365585327
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1196it [2:00:43,  3.12s/it]

objective/kl: -6.875290870666504
ppo/returns/mean: 0.31800246238708496
ppo/policy/advantages_mean: 2.514570951461792e-08
---------------------------------------------------------------------------------------------------


1197it [2:00:46,  3.12s/it]

objective/kl: -7.227011680603027
ppo/returns/mean: 0.3094278573989868
ppo/policy/advantages_mean: 2.8870999813079834e-08
---------------------------------------------------------------------------------------------------


1198it [2:00:50,  3.12s/it]

objective/kl: -7.114382266998291
ppo/returns/mean: 0.2703818082809448
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1199it [2:00:53,  3.12s/it]

objective/kl: -7.166221618652344
ppo/returns/mean: 0.22936835885047913
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1200it [2:00:56,  3.12s/it]

objective/kl: -6.914773941040039
ppo/returns/mean: 0.17917653918266296
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1201it [2:00:59,  3.13s/it]

objective/kl: -6.461122512817383
ppo/returns/mean: 0.20010022819042206
ppo/policy/advantages_mean: -1.4435499906539917e-08
---------------------------------------------------------------------------------------------------


1202it [2:01:02,  3.15s/it]

objective/kl: -6.149588584899902
ppo/returns/mean: 0.18902575969696045
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1203it [2:01:05,  3.15s/it]

objective/kl: -6.318330764770508
ppo/returns/mean: 0.20018576085567474
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1204it [2:01:08,  3.14s/it]

objective/kl: -7.414196014404297
ppo/returns/mean: 0.20610566437244415
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1205it [2:01:11,  3.13s/it]

objective/kl: -6.9733171463012695
ppo/returns/mean: 0.25897911190986633
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


1206it [2:01:15,  3.14s/it]

objective/kl: -7.400364398956299
ppo/returns/mean: 0.32119202613830566
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1207it [2:01:18,  3.13s/it]

objective/kl: -8.241509437561035
ppo/returns/mean: 0.3274584114551544
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1208it [2:01:21,  3.18s/it]

objective/kl: -8.602628707885742
ppo/returns/mean: 0.3710660934448242
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


1209it [2:01:24,  3.19s/it]

objective/kl: -8.15699291229248
ppo/returns/mean: 0.32103267312049866
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1210it [2:01:27,  3.17s/it]

objective/kl: -8.092711448669434
ppo/returns/mean: 0.34551525115966797
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1211it [2:01:31,  3.23s/it]

objective/kl: -7.890056610107422
ppo/returns/mean: 0.36713695526123047
ppo/policy/advantages_mean: 2.1420419216156006e-08
---------------------------------------------------------------------------------------------------


1212it [2:01:34,  3.20s/it]

objective/kl: -6.520223617553711
ppo/returns/mean: 0.28157347440719604
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1213it [2:01:37,  3.18s/it]

objective/kl: -6.212212562561035
ppo/returns/mean: 0.3008006811141968
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1214it [2:01:40,  3.17s/it]

objective/kl: -5.84432315826416
ppo/returns/mean: 0.24454212188720703
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1215it [2:01:43,  3.16s/it]

objective/kl: -6.539284706115723
ppo/returns/mean: 0.24992597103118896
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1216it [2:01:46,  3.15s/it]

objective/kl: -7.209900856018066
ppo/returns/mean: 0.27653568983078003
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1217it [2:01:50,  3.16s/it]

objective/kl: -7.254378318786621
ppo/returns/mean: 0.22215193510055542
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1218it [2:01:53,  3.17s/it]

objective/kl: -6.9669342041015625
ppo/returns/mean: 0.2445356845855713
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1219it [2:01:56,  3.15s/it]

objective/kl: -8.225967407226562
ppo/returns/mean: 0.26780110597610474
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1220it [2:02:00,  3.50s/it]

objective/kl: -8.198127746582031
ppo/returns/mean: 0.28100547194480896
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1221it [2:02:03,  3.41s/it]

objective/kl: -8.296531677246094
ppo/returns/mean: 0.3063827455043793
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1222it [2:02:07,  3.35s/it]

objective/kl: -7.993087291717529
ppo/returns/mean: 0.2833949327468872
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1223it [2:02:10,  3.32s/it]

objective/kl: -6.997572898864746
ppo/returns/mean: 0.2834230661392212
ppo/policy/advantages_mean: -1.210719347000122e-08
---------------------------------------------------------------------------------------------------


1224it [2:02:13,  3.29s/it]

objective/kl: -6.747565746307373
ppo/returns/mean: 0.2571702003479004
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


1225it [2:02:17,  3.33s/it]

objective/kl: -6.235769271850586
ppo/returns/mean: 0.19402261078357697
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1226it [2:02:20,  3.27s/it]

objective/kl: -8.027694702148438
ppo/returns/mean: 0.2659958004951477
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1227it [2:02:23,  3.25s/it]

objective/kl: -6.520002365112305
ppo/returns/mean: 0.211496502161026
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1228it [2:02:26,  3.36s/it]

objective/kl: -8.011297225952148
ppo/returns/mean: 0.2949642539024353
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1229it [2:02:30,  3.34s/it]

objective/kl: -7.978342056274414
ppo/returns/mean: 0.24649542570114136
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1230it [2:02:33,  3.30s/it]

objective/kl: -7.74068546295166
ppo/returns/mean: 0.29543817043304443
ppo/policy/advantages_mean: 0.0004146359860897064
---------------------------------------------------------------------------------------------------


1231it [2:02:36,  3.28s/it]

objective/kl: -7.768123626708984
ppo/returns/mean: 0.34437981247901917
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1232it [2:02:39,  3.25s/it]

objective/kl: -8.236928939819336
ppo/returns/mean: 0.3076660633087158
ppo/policy/advantages_mean: -0.002468101680278778
---------------------------------------------------------------------------------------------------


1233it [2:02:42,  3.20s/it]

objective/kl: -8.780235290527344
ppo/returns/mean: 0.3172972798347473
ppo/policy/advantages_mean: 0.002572484314441681
---------------------------------------------------------------------------------------------------


1234it [2:02:46,  3.23s/it]

objective/kl: -9.574121475219727
ppo/returns/mean: 0.4320859909057617
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1235it [2:02:49,  3.21s/it]

objective/kl: -10.007854461669922
ppo/returns/mean: 0.39080920815467834
ppo/policy/advantages_mean: 0.0018764734268188477
---------------------------------------------------------------------------------------------------


1236it [2:02:52,  3.20s/it]

objective/kl: -9.245677947998047
ppo/returns/mean: 0.408592164516449
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1237it [2:02:55,  3.17s/it]

objective/kl: -8.174930572509766
ppo/returns/mean: 0.2500719428062439
ppo/policy/advantages_mean: 0.0042506977915763855
---------------------------------------------------------------------------------------------------


1238it [2:02:58,  3.15s/it]

objective/kl: -8.355024337768555
ppo/returns/mean: 0.31009188294410706
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1239it [2:03:01,  3.14s/it]

objective/kl: -8.1650972366333
ppo/returns/mean: 0.3873300850391388
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1240it [2:03:05,  3.19s/it]

objective/kl: -8.198965072631836
ppo/returns/mean: 0.42528632283210754
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1241it [2:03:08,  3.16s/it]

objective/kl: -7.708392143249512
ppo/returns/mean: 0.3065282702445984
ppo/policy/advantages_mean: -0.0012243986129760742
---------------------------------------------------------------------------------------------------


1242it [2:03:11,  3.20s/it]

objective/kl: -9.420738220214844
ppo/returns/mean: 0.43983861804008484
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1243it [2:03:14,  3.18s/it]

objective/kl: -7.584300518035889
ppo/returns/mean: 0.3156158924102783
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1244it [2:03:17,  3.16s/it]

objective/kl: -8.428890228271484
ppo/returns/mean: 0.4032655358314514
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1245it [2:03:20,  3.13s/it]

objective/kl: -8.510436058044434
ppo/returns/mean: 0.3331981301307678
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1246it [2:03:24,  3.13s/it]

objective/kl: -9.643827438354492
ppo/returns/mean: 0.428425669670105
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1247it [2:03:27,  3.12s/it]

objective/kl: -9.027399063110352
ppo/returns/mean: 0.4213772118091583
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1248it [2:03:30,  3.13s/it]

objective/kl: -9.48281478881836
ppo/returns/mean: 0.40477603673934937
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1249it [2:03:33,  3.11s/it]

objective/kl: -9.489705085754395
ppo/returns/mean: 0.42919015884399414
ppo/policy/advantages_mean: -0.003645189106464386
---------------------------------------------------------------------------------------------------


1250it [2:03:36,  3.09s/it]

objective/kl: -8.877458572387695
ppo/returns/mean: 0.3621579706668854
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1251it [2:03:39,  3.09s/it]

objective/kl: -10.008872985839844
ppo/returns/mean: 0.43741723895072937
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1252it [2:03:42,  3.09s/it]

objective/kl: -10.813335418701172
ppo/returns/mean: 0.4870453178882599
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1253it [2:03:45,  3.10s/it]

objective/kl: -9.313389778137207
ppo/returns/mean: 0.5077385902404785
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1254it [2:03:48,  3.11s/it]

objective/kl: -9.87362289428711
ppo/returns/mean: 0.4491336941719055
ppo/policy/advantages_mean: 0.0007293112576007843
---------------------------------------------------------------------------------------------------


1255it [2:03:52,  3.16s/it]

objective/kl: -8.82783317565918
ppo/returns/mean: 0.44054877758026123
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1256it [2:03:55,  3.14s/it]

objective/kl: -8.590110778808594
ppo/returns/mean: 0.4380311667919159
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1257it [2:03:58,  3.17s/it]

objective/kl: -7.666410446166992
ppo/returns/mean: 0.36130577325820923
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1258it [2:04:01,  3.14s/it]

objective/kl: -7.92626428604126
ppo/returns/mean: 0.35295748710632324
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1259it [2:04:04,  3.13s/it]

objective/kl: -8.526834487915039
ppo/returns/mean: 0.37055617570877075
ppo/policy/advantages_mean: -0.00016908347606658936
---------------------------------------------------------------------------------------------------


1260it [2:04:07,  3.12s/it]

objective/kl: -8.798067092895508
ppo/returns/mean: 0.41999566555023193
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1261it [2:04:10,  3.11s/it]

objective/kl: -9.193252563476562
ppo/returns/mean: 0.4212540090084076
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1262it [2:04:14,  3.15s/it]

objective/kl: -9.176360130310059
ppo/returns/mean: 0.41400063037872314
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1263it [2:04:17,  3.13s/it]

objective/kl: -9.471379280090332
ppo/returns/mean: 0.3638109564781189
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


1264it [2:04:20,  3.29s/it]

objective/kl: -9.130983352661133
ppo/returns/mean: 0.40537410974502563
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


1265it [2:04:24,  3.26s/it]

objective/kl: -9.19322681427002
ppo/returns/mean: 0.3951183557510376
ppo/policy/advantages_mean: -0.0016719885170459747
---------------------------------------------------------------------------------------------------


1266it [2:04:27,  3.21s/it]

objective/kl: -8.840190887451172
ppo/returns/mean: 0.37450799345970154
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1267it [2:04:30,  3.17s/it]

objective/kl: -9.478798866271973
ppo/returns/mean: 0.36550986766815186
ppo/policy/advantages_mean: -0.0010470710694789886
---------------------------------------------------------------------------------------------------


1268it [2:04:33,  3.15s/it]

objective/kl: -9.290594100952148
ppo/returns/mean: 0.4303404688835144
ppo/policy/advantages_mean: 0.00033923983573913574
---------------------------------------------------------------------------------------------------


1269it [2:04:36,  3.15s/it]

objective/kl: -10.292156219482422
ppo/returns/mean: 0.43201595544815063
ppo/policy/advantages_mean: -0.0033211316913366318
---------------------------------------------------------------------------------------------------


1270it [2:04:39,  3.25s/it]

objective/kl: -10.76370906829834
ppo/returns/mean: 0.41617730259895325
ppo/policy/advantages_mean: 0.003886580467224121
---------------------------------------------------------------------------------------------------


1271it [2:04:43,  3.23s/it]

objective/kl: -11.486513137817383
ppo/returns/mean: 0.4917755722999573
ppo/policy/advantages_mean: 0.0006827861070632935
---------------------------------------------------------------------------------------------------


1272it [2:04:46,  3.22s/it]

objective/kl: -11.521553039550781
ppo/returns/mean: 0.48462188243865967
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1273it [2:04:49,  3.20s/it]

objective/kl: -10.62995719909668
ppo/returns/mean: 0.4718000292778015
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1274it [2:04:52,  3.18s/it]

objective/kl: -9.35555648803711
ppo/returns/mean: 0.4800185561180115
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1275it [2:04:55,  3.18s/it]

objective/kl: -10.139066696166992
ppo/returns/mean: 0.4610068202018738
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1276it [2:04:58,  3.19s/it]

objective/kl: -10.410198211669922
ppo/returns/mean: 0.492413729429245
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1277it [2:05:02,  3.18s/it]

objective/kl: -8.73513412475586
ppo/returns/mean: 0.33564382791519165
ppo/policy/advantages_mean: -0.0028908848762512207
---------------------------------------------------------------------------------------------------


1278it [2:05:05,  3.15s/it]

objective/kl: -9.42643928527832
ppo/returns/mean: 0.453162282705307
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1279it [2:05:08,  3.15s/it]

objective/kl: -8.773998260498047
ppo/returns/mean: 0.43626320362091064
ppo/policy/advantages_mean: -0.0005761571228504181
---------------------------------------------------------------------------------------------------


1280it [2:05:11,  3.18s/it]

objective/kl: -9.02359676361084
ppo/returns/mean: 0.42357420921325684
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1281it [2:05:14,  3.21s/it]

objective/kl: -10.865192413330078
ppo/returns/mean: 0.47354570031166077
ppo/policy/advantages_mean: -0.0020615682005882263
---------------------------------------------------------------------------------------------------


1282it [2:05:18,  3.36s/it]

objective/kl: -10.470949172973633
ppo/returns/mean: 0.451347291469574
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1283it [2:05:21,  3.28s/it]

objective/kl: -10.55030345916748
ppo/returns/mean: 0.4838351011276245
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1284it [2:05:24,  3.23s/it]

objective/kl: -12.086286544799805
ppo/returns/mean: 0.6098566055297852
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1285it [2:05:27,  3.19s/it]

objective/kl: -12.177955627441406
ppo/returns/mean: 0.547370195388794
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1286it [2:05:31,  3.17s/it]

objective/kl: -11.819160461425781
ppo/returns/mean: 0.5461718440055847
ppo/policy/advantages_mean: 1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


1287it [2:05:34,  3.16s/it]

objective/kl: -10.402311325073242
ppo/returns/mean: 0.46530458331108093
ppo/policy/advantages_mean: 0.0022631660103797913
---------------------------------------------------------------------------------------------------


1288it [2:05:37,  3.23s/it]

objective/kl: -8.621496200561523
ppo/returns/mean: 0.5299780368804932
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1289it [2:05:40,  3.18s/it]

objective/kl: -9.15224838256836
ppo/returns/mean: 0.5027316212654114
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1290it [2:05:43,  3.16s/it]

objective/kl: -10.884038925170898
ppo/returns/mean: 0.5401499271392822
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1291it [2:05:46,  3.14s/it]

objective/kl: -10.762984275817871
ppo/returns/mean: 0.5279656648635864
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1292it [2:05:49,  3.13s/it]

objective/kl: -10.245569229125977
ppo/returns/mean: 0.4789928197860718
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1293it [2:05:53,  3.14s/it]

objective/kl: -9.532651901245117
ppo/returns/mean: 0.49713134765625
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1294it [2:05:56,  3.13s/it]

objective/kl: -9.564698219299316
ppo/returns/mean: 0.4561593532562256
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1295it [2:05:59,  3.12s/it]

objective/kl: -10.085095405578613
ppo/returns/mean: 0.5081425905227661
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1296it [2:06:02,  3.20s/it]

objective/kl: -10.035372734069824
ppo/returns/mean: 0.4894712567329407
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1297it [2:06:05,  3.19s/it]

objective/kl: -11.532094955444336
ppo/returns/mean: 0.5110368728637695
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1298it [2:06:09,  3.22s/it]

objective/kl: -11.32926082611084
ppo/returns/mean: 0.5651459097862244
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1299it [2:06:12,  3.21s/it]

objective/kl: -9.864798545837402
ppo/returns/mean: 0.47796252369880676
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1300it [2:06:15,  3.18s/it]

objective/kl: -11.19520378112793
ppo/returns/mean: 0.544940173625946
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1301it [2:06:18,  3.16s/it]

objective/kl: -10.379894256591797
ppo/returns/mean: 0.5086196660995483
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1302it [2:06:21,  3.23s/it]

objective/kl: -9.86783218383789
ppo/returns/mean: 0.4871402382850647
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1303it [2:06:25,  3.22s/it]

objective/kl: -9.55475902557373
ppo/returns/mean: 0.5092240571975708
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1304it [2:06:28,  3.24s/it]

objective/kl: -10.371600151062012
ppo/returns/mean: 0.47892868518829346
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1305it [2:06:31,  3.21s/it]

objective/kl: -10.404141426086426
ppo/returns/mean: 0.4931865632534027
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1306it [2:06:34,  3.19s/it]

objective/kl: -9.538602828979492
ppo/returns/mean: 0.380418986082077
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1307it [2:06:37,  3.17s/it]

objective/kl: -9.381217956542969
ppo/returns/mean: 0.3456372320652008
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1308it [2:06:40,  3.16s/it]

objective/kl: -9.649129867553711
ppo/returns/mean: 0.3850681781768799
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1309it [2:06:44,  3.19s/it]

objective/kl: -9.63364315032959
ppo/returns/mean: 0.421173095703125
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1310it [2:06:47,  3.18s/it]

objective/kl: -10.030637741088867
ppo/returns/mean: 0.42058542370796204
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1311it [2:06:50,  3.16s/it]

objective/kl: -10.035377502441406
ppo/returns/mean: 0.39326536655426025
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1312it [2:06:53,  3.23s/it]

objective/kl: -10.755061149597168
ppo/returns/mean: 0.3756224811077118
ppo/policy/advantages_mean: -0.001122400164604187
---------------------------------------------------------------------------------------------------


1313it [2:06:56,  3.19s/it]

objective/kl: -9.7354154586792
ppo/returns/mean: 0.4461720585823059
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1314it [2:07:00,  3.19s/it]

objective/kl: -9.39157772064209
ppo/returns/mean: 0.38092440366744995
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1315it [2:07:03,  3.22s/it]

objective/kl: -10.464807510375977
ppo/returns/mean: 0.3821612000465393
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1316it [2:07:06,  3.18s/it]

objective/kl: -12.476770401000977
ppo/returns/mean: 0.47728854417800903
ppo/policy/advantages_mean: 0.0008527301251888275
---------------------------------------------------------------------------------------------------


1317it [2:07:09,  3.14s/it]

objective/kl: -11.001497268676758
ppo/returns/mean: 0.46498167514801025
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1318it [2:07:12,  3.13s/it]

objective/kl: -11.358296394348145
ppo/returns/mean: 0.37539127469062805
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1319it [2:07:15,  3.13s/it]

objective/kl: -11.138177871704102
ppo/returns/mean: 0.554871141910553
ppo/policy/advantages_mean: -0.0011908598244190216
---------------------------------------------------------------------------------------------------


1320it [2:07:18,  3.12s/it]

objective/kl: -10.657364845275879
ppo/returns/mean: 0.4273516535758972
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1321it [2:07:22,  3.12s/it]

objective/kl: -11.30569839477539
ppo/returns/mean: 0.4695759117603302
ppo/policy/advantages_mean: 0.002814657986164093
---------------------------------------------------------------------------------------------------


1322it [2:07:25,  3.10s/it]

objective/kl: -9.014892578125
ppo/returns/mean: 0.3961145281791687
ppo/policy/advantages_mean: -0.0009460644796490669
---------------------------------------------------------------------------------------------------


1323it [2:07:28,  3.10s/it]

objective/kl: -9.308712005615234
ppo/returns/mean: 0.4624423682689667
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1324it [2:07:31,  3.09s/it]

objective/kl: -8.511345863342285
ppo/returns/mean: 0.41909259557724
ppo/policy/advantages_mean: 6.949063390493393e-06
---------------------------------------------------------------------------------------------------


1325it [2:07:34,  3.09s/it]

objective/kl: -8.858854293823242
ppo/returns/mean: 0.35305988788604736
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1326it [2:07:37,  3.08s/it]

objective/kl: -10.363670349121094
ppo/returns/mean: 0.4660801291465759
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1327it [2:07:40,  3.08s/it]

objective/kl: -8.888442039489746
ppo/returns/mean: 0.26255857944488525
ppo/policy/advantages_mean: 0.0024867504835128784
---------------------------------------------------------------------------------------------------


1328it [2:07:43,  3.09s/it]

objective/kl: -10.069202423095703
ppo/returns/mean: 0.38770514726638794
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1329it [2:07:46,  3.16s/it]

objective/kl: -10.802339553833008
ppo/returns/mean: 0.41510242223739624
ppo/policy/advantages_mean: 0.0001989305019378662
---------------------------------------------------------------------------------------------------


1330it [2:07:50,  3.16s/it]

objective/kl: -10.782477378845215
ppo/returns/mean: 0.41478124260902405
ppo/policy/advantages_mean: -0.0021996498107910156
---------------------------------------------------------------------------------------------------


1331it [2:07:53,  3.31s/it]

objective/kl: -10.119722366333008
ppo/returns/mean: 0.39772120118141174
ppo/policy/advantages_mean: 0.0015030205249786377
---------------------------------------------------------------------------------------------------


1332it [2:07:56,  3.26s/it]

objective/kl: -11.28643798828125
ppo/returns/mean: 0.47844672203063965
ppo/policy/advantages_mean: -0.005764313042163849
---------------------------------------------------------------------------------------------------


1333it [2:07:59,  3.22s/it]

objective/kl: -10.972103118896484
ppo/returns/mean: 0.4565933048725128
ppo/policy/advantages_mean: 0.0009084045886993408
---------------------------------------------------------------------------------------------------


1334it [2:08:03,  3.21s/it]

objective/kl: -9.222524642944336
ppo/returns/mean: 0.39777255058288574
ppo/policy/advantages_mean: -0.002609163522720337
---------------------------------------------------------------------------------------------------


1335it [2:08:06,  3.20s/it]

objective/kl: -11.86410903930664
ppo/returns/mean: 0.53727126121521
ppo/policy/advantages_mean: -0.008492842316627502
---------------------------------------------------------------------------------------------------


1336it [2:08:10,  3.45s/it]

objective/kl: -12.240711212158203
ppo/returns/mean: 0.5763674974441528
ppo/policy/advantages_mean: -0.0006411820650100708
---------------------------------------------------------------------------------------------------


1337it [2:08:13,  3.46s/it]

objective/kl: -10.777332305908203
ppo/returns/mean: 0.5349776148796082
ppo/policy/advantages_mean: -0.013165168464183807
---------------------------------------------------------------------------------------------------


1338it [2:08:17,  3.38s/it]

objective/kl: -13.9045991897583
ppo/returns/mean: 0.6887844800949097
ppo/policy/advantages_mean: -0.013005804270505905
---------------------------------------------------------------------------------------------------


1339it [2:08:20,  3.33s/it]

objective/kl: -12.15369987487793
ppo/returns/mean: 0.6035614609718323
ppo/policy/advantages_mean: -0.0035550426691770554
---------------------------------------------------------------------------------------------------


1340it [2:08:23,  3.29s/it]

objective/kl: -13.130030632019043
ppo/returns/mean: 0.689666211605072
ppo/policy/advantages_mean: -0.014599800109863281
---------------------------------------------------------------------------------------------------


1341it [2:08:26,  3.28s/it]

objective/kl: -12.428175926208496
ppo/returns/mean: 0.6684952974319458
ppo/policy/advantages_mean: -0.005514789372682571
---------------------------------------------------------------------------------------------------


1342it [2:08:30,  3.29s/it]

objective/kl: -11.94424819946289
ppo/returns/mean: 0.5681250691413879
ppo/policy/advantages_mean: -0.0008353665471076965
---------------------------------------------------------------------------------------------------


1343it [2:08:33,  3.28s/it]

objective/kl: -12.491029739379883
ppo/returns/mean: 0.684819221496582
ppo/policy/advantages_mean: 0.010454490780830383
---------------------------------------------------------------------------------------------------


1344it [2:08:36,  3.28s/it]

objective/kl: -11.1383056640625
ppo/returns/mean: 0.5469980239868164
ppo/policy/advantages_mean: -0.004578061401844025
---------------------------------------------------------------------------------------------------


1345it [2:08:39,  3.29s/it]

objective/kl: -10.419581413269043
ppo/returns/mean: 0.5524032115936279
ppo/policy/advantages_mean: -0.002205364406108856
---------------------------------------------------------------------------------------------------


1346it [2:08:43,  3.28s/it]

objective/kl: -11.035730361938477
ppo/returns/mean: 0.6550748348236084
ppo/policy/advantages_mean: -0.0013488009572029114
---------------------------------------------------------------------------------------------------


1347it [2:08:46,  3.25s/it]

objective/kl: -12.271568298339844
ppo/returns/mean: 0.639507532119751
ppo/policy/advantages_mean: 0.0025694463402032852
---------------------------------------------------------------------------------------------------


1348it [2:08:49,  3.25s/it]

objective/kl: -12.940170288085938
ppo/returns/mean: 0.6806389093399048
ppo/policy/advantages_mean: -0.002341482788324356
---------------------------------------------------------------------------------------------------


1349it [2:08:53,  3.31s/it]

objective/kl: -13.011177062988281
ppo/returns/mean: 0.6854432225227356
ppo/policy/advantages_mean: -0.019780218601226807
---------------------------------------------------------------------------------------------------


1350it [2:08:56,  3.36s/it]

objective/kl: -12.85845947265625
ppo/returns/mean: 0.7067304849624634
ppo/policy/advantages_mean: -0.0027298703789711
---------------------------------------------------------------------------------------------------


1351it [2:08:59,  3.33s/it]

objective/kl: -12.757402420043945
ppo/returns/mean: 0.6650973558425903
ppo/policy/advantages_mean: -0.0016395077109336853
---------------------------------------------------------------------------------------------------


1352it [2:09:03,  3.31s/it]

objective/kl: -13.1605863571167
ppo/returns/mean: 0.6306875348091125
ppo/policy/advantages_mean: -0.0032386034727096558
---------------------------------------------------------------------------------------------------


1353it [2:09:06,  3.30s/it]

objective/kl: -12.25068187713623
ppo/returns/mean: 0.5965244770050049
ppo/policy/advantages_mean: -0.00487834308296442
---------------------------------------------------------------------------------------------------


1354it [2:09:09,  3.28s/it]

objective/kl: -13.242218971252441
ppo/returns/mean: 0.6520310044288635
ppo/policy/advantages_mean: 0.001108899712562561
---------------------------------------------------------------------------------------------------


1355it [2:09:12,  3.27s/it]

objective/kl: -13.568244934082031
ppo/returns/mean: 0.664806604385376
ppo/policy/advantages_mean: -0.002821369096636772
---------------------------------------------------------------------------------------------------


1356it [2:09:16,  3.27s/it]

objective/kl: -14.108945846557617
ppo/returns/mean: 0.6813074350357056
ppo/policy/advantages_mean: -0.0021360814571380615
---------------------------------------------------------------------------------------------------


1357it [2:09:19,  3.27s/it]

objective/kl: -13.633844375610352
ppo/returns/mean: 0.6831918954849243
ppo/policy/advantages_mean: -0.0019866228103637695
---------------------------------------------------------------------------------------------------


1358it [2:09:22,  3.25s/it]

objective/kl: -14.368522644042969
ppo/returns/mean: 0.7659076452255249
ppo/policy/advantages_mean: -0.005316726863384247
---------------------------------------------------------------------------------------------------


1359it [2:09:25,  3.27s/it]

objective/kl: -13.908677101135254
ppo/returns/mean: 0.6899735927581787
ppo/policy/advantages_mean: 0.0003061331808567047
---------------------------------------------------------------------------------------------------


1360it [2:09:29,  3.26s/it]

objective/kl: -13.349309921264648
ppo/returns/mean: 0.6569930911064148
ppo/policy/advantages_mean: -0.005543924868106842
---------------------------------------------------------------------------------------------------


1361it [2:09:32,  3.27s/it]

objective/kl: -13.354547500610352
ppo/returns/mean: 0.6730968952178955
ppo/policy/advantages_mean: 0.0019496474415063858
---------------------------------------------------------------------------------------------------


1362it [2:09:35,  3.28s/it]

objective/kl: -13.722013473510742
ppo/returns/mean: 0.6877329349517822
ppo/policy/advantages_mean: -0.0036461390554904938
---------------------------------------------------------------------------------------------------


1363it [2:09:38,  3.28s/it]

objective/kl: -14.001419067382812
ppo/returns/mean: 0.7159208059310913
ppo/policy/advantages_mean: -0.003360806033015251
---------------------------------------------------------------------------------------------------


1364it [2:09:42,  3.27s/it]

objective/kl: -12.90487289428711
ppo/returns/mean: 0.6623963117599487
ppo/policy/advantages_mean: 0.0009102709591388702
---------------------------------------------------------------------------------------------------


1365it [2:09:45,  3.27s/it]

objective/kl: -12.928169250488281
ppo/returns/mean: 0.6291813850402832
ppo/policy/advantages_mean: 0.007975950837135315
---------------------------------------------------------------------------------------------------


1366it [2:09:48,  3.27s/it]

objective/kl: -13.423035621643066
ppo/returns/mean: 0.6677569150924683
ppo/policy/advantages_mean: -0.00383966788649559
---------------------------------------------------------------------------------------------------


1367it [2:09:51,  3.26s/it]

objective/kl: -14.418912887573242
ppo/returns/mean: 0.729889452457428
ppo/policy/advantages_mean: -0.004188220947980881
---------------------------------------------------------------------------------------------------


1368it [2:09:55,  3.28s/it]

objective/kl: -13.283210754394531
ppo/returns/mean: 0.6934348344802856
ppo/policy/advantages_mean: -0.004264157265424728
---------------------------------------------------------------------------------------------------


1369it [2:09:58,  3.34s/it]

objective/kl: -13.910017013549805
ppo/returns/mean: 0.6671788692474365
ppo/policy/advantages_mean: -0.006900127977132797
---------------------------------------------------------------------------------------------------


1370it [2:10:02,  3.32s/it]

objective/kl: -14.526744842529297
ppo/returns/mean: 0.6988433599472046
ppo/policy/advantages_mean: 0.0012439284473657608
---------------------------------------------------------------------------------------------------


1371it [2:10:05,  3.32s/it]

objective/kl: -13.228985786437988
ppo/returns/mean: 0.6411958336830139
ppo/policy/advantages_mean: -0.009276151657104492
---------------------------------------------------------------------------------------------------


1372it [2:10:08,  3.31s/it]

objective/kl: -13.134175300598145
ppo/returns/mean: 0.5873478651046753
ppo/policy/advantages_mean: -0.0037882085889577866
---------------------------------------------------------------------------------------------------


1373it [2:10:11,  3.29s/it]

objective/kl: -13.180274963378906
ppo/returns/mean: 0.648023247718811
ppo/policy/advantages_mean: -0.0036476105451583862
---------------------------------------------------------------------------------------------------


1374it [2:10:15,  3.29s/it]

objective/kl: -12.355501174926758
ppo/returns/mean: 0.620452344417572
ppo/policy/advantages_mean: -0.001839982345700264
---------------------------------------------------------------------------------------------------


1375it [2:10:18,  3.28s/it]

objective/kl: -14.213820457458496
ppo/returns/mean: 0.7404124140739441
ppo/policy/advantages_mean: -0.005916088819503784
---------------------------------------------------------------------------------------------------


1376it [2:10:21,  3.25s/it]

objective/kl: -15.572824478149414
ppo/returns/mean: 0.8210180401802063
ppo/policy/advantages_mean: 0.0020184405148029327
---------------------------------------------------------------------------------------------------


1377it [2:10:24,  3.25s/it]

objective/kl: -15.007569313049316
ppo/returns/mean: 0.7744516134262085
ppo/policy/advantages_mean: -0.0016876459121704102
---------------------------------------------------------------------------------------------------


1378it [2:10:28,  3.24s/it]

objective/kl: -15.460996627807617
ppo/returns/mean: 0.7912584543228149
ppo/policy/advantages_mean: -0.005914982408285141
---------------------------------------------------------------------------------------------------


1379it [2:10:31,  3.23s/it]

objective/kl: -15.130240440368652
ppo/returns/mean: 0.7914238572120667
ppo/policy/advantages_mean: 0.0029966384172439575
---------------------------------------------------------------------------------------------------


1380it [2:10:34,  3.22s/it]

objective/kl: -16.823335647583008
ppo/returns/mean: 0.8786018490791321
ppo/policy/advantages_mean: -0.0004606172442436218
---------------------------------------------------------------------------------------------------


1381it [2:10:37,  3.22s/it]

objective/kl: -14.229534149169922
ppo/returns/mean: 0.7701402902603149
ppo/policy/advantages_mean: 0.0017483159899711609
---------------------------------------------------------------------------------------------------


1382it [2:10:40,  3.21s/it]

objective/kl: -15.144020080566406
ppo/returns/mean: 0.8491437435150146
ppo/policy/advantages_mean: 0.0020111985504627228
---------------------------------------------------------------------------------------------------


1383it [2:10:44,  3.21s/it]

objective/kl: -14.73042106628418
ppo/returns/mean: 0.8006796836853027
ppo/policy/advantages_mean: 0.0053633879870176315
---------------------------------------------------------------------------------------------------


1384it [2:10:47,  3.21s/it]

objective/kl: -15.155105590820312
ppo/returns/mean: 0.800796627998352
ppo/policy/advantages_mean: -0.001688830554485321
---------------------------------------------------------------------------------------------------


1385it [2:10:50,  3.20s/it]

objective/kl: -14.523805618286133
ppo/returns/mean: 0.7847204208374023
ppo/policy/advantages_mean: 4.682689905166626e-05
---------------------------------------------------------------------------------------------------


1386it [2:10:53,  3.22s/it]

objective/kl: -15.750741004943848
ppo/returns/mean: 0.7990919947624207
ppo/policy/advantages_mean: -0.005783852189779282
---------------------------------------------------------------------------------------------------


1387it [2:10:56,  3.22s/it]

objective/kl: -16.46875
ppo/returns/mean: 0.89597088098526
ppo/policy/advantages_mean: 0.004875063896179199
---------------------------------------------------------------------------------------------------


1388it [2:11:00,  3.22s/it]

objective/kl: -17.309860229492188
ppo/returns/mean: 0.899122953414917
ppo/policy/advantages_mean: -0.0018718093633651733
---------------------------------------------------------------------------------------------------


1389it [2:11:03,  3.24s/it]

objective/kl: -16.12969970703125
ppo/returns/mean: 0.8425053358078003
ppo/policy/advantages_mean: -0.005305450409650803
---------------------------------------------------------------------------------------------------


1390it [2:11:06,  3.24s/it]

objective/kl: -15.46456527709961
ppo/returns/mean: 0.769616961479187
ppo/policy/advantages_mean: 0.004576370120048523
---------------------------------------------------------------------------------------------------


1391it [2:11:09,  3.24s/it]

objective/kl: -15.179805755615234
ppo/returns/mean: 0.8265502452850342
ppo/policy/advantages_mean: -0.0022026225924491882
---------------------------------------------------------------------------------------------------


1392it [2:11:13,  3.24s/it]

objective/kl: -16.894943237304688
ppo/returns/mean: 0.8819934129714966
ppo/policy/advantages_mean: -0.00360078364610672
---------------------------------------------------------------------------------------------------


1393it [2:11:16,  3.24s/it]

objective/kl: -15.326080322265625
ppo/returns/mean: 0.8509126901626587
ppo/policy/advantages_mean: -0.00038546323776245117
---------------------------------------------------------------------------------------------------


1394it [2:11:19,  3.25s/it]

objective/kl: -15.073144912719727
ppo/returns/mean: 0.7893323302268982
ppo/policy/advantages_mean: -0.0010976269841194153
---------------------------------------------------------------------------------------------------


1395it [2:11:22,  3.25s/it]

objective/kl: -15.919265747070312
ppo/returns/mean: 0.8984434604644775
ppo/policy/advantages_mean: -0.0023896396160125732
---------------------------------------------------------------------------------------------------


1396it [2:11:26,  3.23s/it]

objective/kl: -15.934341430664062
ppo/returns/mean: 0.8377775549888611
ppo/policy/advantages_mean: -0.0026762746274471283
---------------------------------------------------------------------------------------------------


1397it [2:11:29,  3.24s/it]

objective/kl: -17.303112030029297
ppo/returns/mean: 0.8909953832626343
ppo/policy/advantages_mean: 0.0035760775208473206
---------------------------------------------------------------------------------------------------


1398it [2:11:32,  3.23s/it]

objective/kl: -15.852108001708984
ppo/returns/mean: 0.8285470604896545
ppo/policy/advantages_mean: -0.005513839423656464
---------------------------------------------------------------------------------------------------


1399it [2:11:36,  3.28s/it]

objective/kl: -15.359949111938477
ppo/returns/mean: 0.8181273937225342
ppo/policy/advantages_mean: -0.002397622913122177
---------------------------------------------------------------------------------------------------


1400it [2:11:39,  3.26s/it]

objective/kl: -15.275508880615234
ppo/returns/mean: 0.7958698272705078
ppo/policy/advantages_mean: 0.00010873004794120789
---------------------------------------------------------------------------------------------------


1401it [2:11:42,  3.32s/it]

objective/kl: -15.778399467468262
ppo/returns/mean: 0.8701772689819336
ppo/policy/advantages_mean: -0.0005665533244609833
---------------------------------------------------------------------------------------------------


1402it [2:11:45,  3.30s/it]

objective/kl: -16.161476135253906
ppo/returns/mean: 0.8423223495483398
ppo/policy/advantages_mean: 0.0022260695695877075
---------------------------------------------------------------------------------------------------


1403it [2:11:49,  3.27s/it]

objective/kl: -16.827442169189453
ppo/returns/mean: 0.8353812098503113
ppo/policy/advantages_mean: -0.0005749091506004333
---------------------------------------------------------------------------------------------------


1404it [2:11:52,  3.25s/it]

objective/kl: -16.121002197265625
ppo/returns/mean: 0.8108344078063965
ppo/policy/advantages_mean: -0.005814678966999054
---------------------------------------------------------------------------------------------------


1405it [2:11:55,  3.25s/it]

objective/kl: -16.328001022338867
ppo/returns/mean: 0.8645943403244019
ppo/policy/advantages_mean: -0.0025322996079921722
---------------------------------------------------------------------------------------------------


1406it [2:11:58,  3.26s/it]

objective/kl: -16.790863037109375
ppo/returns/mean: 0.8912691473960876
ppo/policy/advantages_mean: 0.004729382693767548
---------------------------------------------------------------------------------------------------


1407it [2:12:02,  3.25s/it]

objective/kl: -16.09906768798828
ppo/returns/mean: 0.920887291431427
ppo/policy/advantages_mean: 0.0011864937841892242
---------------------------------------------------------------------------------------------------


1408it [2:12:05,  3.24s/it]

objective/kl: -15.680832862854004
ppo/returns/mean: 0.8463866710662842
ppo/policy/advantages_mean: -0.0026017576456069946
---------------------------------------------------------------------------------------------------


1409it [2:12:08,  3.22s/it]

objective/kl: -15.901348114013672
ppo/returns/mean: 0.8885965347290039
ppo/policy/advantages_mean: -0.0025902874767780304
---------------------------------------------------------------------------------------------------


1410it [2:12:11,  3.21s/it]

objective/kl: -15.467132568359375
ppo/returns/mean: 0.8278083801269531
ppo/policy/advantages_mean: -0.014285050332546234
---------------------------------------------------------------------------------------------------


1411it [2:12:15,  3.28s/it]

objective/kl: -15.114958763122559
ppo/returns/mean: 0.8248240947723389
ppo/policy/advantages_mean: 0.0004253312945365906
---------------------------------------------------------------------------------------------------


1412it [2:12:18,  3.26s/it]

objective/kl: -15.823192596435547
ppo/returns/mean: 0.8065167665481567
ppo/policy/advantages_mean: -0.01786525547504425
---------------------------------------------------------------------------------------------------


1413it [2:12:21,  3.25s/it]

objective/kl: -16.509687423706055
ppo/returns/mean: 0.8929781913757324
ppo/policy/advantages_mean: -0.0016672145575284958
---------------------------------------------------------------------------------------------------


1414it [2:12:24,  3.25s/it]

objective/kl: -17.612430572509766
ppo/returns/mean: 0.9666378498077393
ppo/policy/advantages_mean: -0.0129631906747818
---------------------------------------------------------------------------------------------------


1415it [2:12:28,  3.24s/it]

objective/kl: -18.062061309814453
ppo/returns/mean: 0.9283688068389893
ppo/policy/advantages_mean: -0.004762791097164154
---------------------------------------------------------------------------------------------------


1416it [2:12:31,  3.26s/it]

objective/kl: -17.587772369384766
ppo/returns/mean: 0.9249473214149475
ppo/policy/advantages_mean: 0.00046607106924057007
---------------------------------------------------------------------------------------------------


1417it [2:12:34,  3.24s/it]

objective/kl: -16.376081466674805
ppo/returns/mean: 0.8716115355491638
ppo/policy/advantages_mean: 0.0004719570279121399
---------------------------------------------------------------------------------------------------


1418it [2:12:37,  3.28s/it]

objective/kl: -16.174882888793945
ppo/returns/mean: 0.8645504713058472
ppo/policy/advantages_mean: -0.0023015886545181274
---------------------------------------------------------------------------------------------------


1419it [2:12:41,  3.28s/it]

objective/kl: -15.937945365905762
ppo/returns/mean: 0.8778898119926453
ppo/policy/advantages_mean: 0.0003791525959968567
---------------------------------------------------------------------------------------------------


1420it [2:12:44,  3.26s/it]

objective/kl: -14.515453338623047
ppo/returns/mean: 0.7777193784713745
ppo/policy/advantages_mean: 0.0012148022651672363
---------------------------------------------------------------------------------------------------


1421it [2:12:47,  3.26s/it]

objective/kl: -16.00175666809082
ppo/returns/mean: 0.860142707824707
ppo/policy/advantages_mean: 0.0024077221751213074
---------------------------------------------------------------------------------------------------


1422it [2:12:50,  3.26s/it]

objective/kl: -16.089954376220703
ppo/returns/mean: 0.8810153007507324
ppo/policy/advantages_mean: 0.002915114164352417
---------------------------------------------------------------------------------------------------


1423it [2:12:54,  3.25s/it]

objective/kl: -16.08731460571289
ppo/returns/mean: 0.8427139520645142
ppo/policy/advantages_mean: -0.015376150608062744
---------------------------------------------------------------------------------------------------


1424it [2:12:57,  3.24s/it]

objective/kl: -16.06719970703125
ppo/returns/mean: 0.7977218627929688
ppo/policy/advantages_mean: 0.0022395923733711243
---------------------------------------------------------------------------------------------------


1425it [2:13:00,  3.23s/it]

objective/kl: -16.50849151611328
ppo/returns/mean: 0.8613162636756897
ppo/policy/advantages_mean: -0.007647007703781128
---------------------------------------------------------------------------------------------------


1426it [2:13:03,  3.25s/it]

objective/kl: -18.7849063873291
ppo/returns/mean: 0.850775957107544
ppo/policy/advantages_mean: 0.0019405409693717957
---------------------------------------------------------------------------------------------------


1427it [2:13:07,  3.26s/it]

objective/kl: -17.16123390197754
ppo/returns/mean: 0.8251738548278809
ppo/policy/advantages_mean: 0.0031044110655784607
---------------------------------------------------------------------------------------------------


1428it [2:13:10,  3.30s/it]

objective/kl: -16.97614860534668
ppo/returns/mean: 0.8369730710983276
ppo/policy/advantages_mean: -0.008236929774284363
---------------------------------------------------------------------------------------------------


1429it [2:13:13,  3.28s/it]

objective/kl: -17.361156463623047
ppo/returns/mean: 0.8672220706939697
ppo/policy/advantages_mean: -0.0006737038493156433
---------------------------------------------------------------------------------------------------


1430it [2:13:17,  3.27s/it]

objective/kl: -18.259292602539062
ppo/returns/mean: 0.8874747157096863
ppo/policy/advantages_mean: 0.002367839217185974
---------------------------------------------------------------------------------------------------


1431it [2:13:20,  3.30s/it]

objective/kl: -16.38660430908203
ppo/returns/mean: 0.8254737854003906
ppo/policy/advantages_mean: -0.0007719509303569794
---------------------------------------------------------------------------------------------------


1432it [2:13:23,  3.27s/it]

objective/kl: -16.103849411010742
ppo/returns/mean: 0.8365550637245178
ppo/policy/advantages_mean: 0.001403607428073883
---------------------------------------------------------------------------------------------------


1433it [2:13:26,  3.26s/it]

objective/kl: -16.85553550720215
ppo/returns/mean: 0.8625971078872681
ppo/policy/advantages_mean: -0.011352397501468658
---------------------------------------------------------------------------------------------------


1434it [2:13:30,  3.26s/it]

objective/kl: -16.865571975708008
ppo/returns/mean: 0.9302892684936523
ppo/policy/advantages_mean: -0.00012066960334777832
---------------------------------------------------------------------------------------------------


1435it [2:13:33,  3.26s/it]

objective/kl: -17.924415588378906
ppo/returns/mean: 0.8867762088775635
ppo/policy/advantages_mean: 0.009226653724908829
---------------------------------------------------------------------------------------------------


1436it [2:13:36,  3.25s/it]

objective/kl: -17.14643096923828
ppo/returns/mean: 0.8296877145767212
ppo/policy/advantages_mean: 0.0020330846309661865
---------------------------------------------------------------------------------------------------


1437it [2:13:39,  3.25s/it]

objective/kl: -19.303569793701172
ppo/returns/mean: 0.979949951171875
ppo/policy/advantages_mean: -8.457107469439507e-05
---------------------------------------------------------------------------------------------------


1438it [2:13:42,  3.23s/it]

objective/kl: -18.01823616027832
ppo/returns/mean: 0.9507120251655579
ppo/policy/advantages_mean: 0.003630921244621277
---------------------------------------------------------------------------------------------------


1439it [2:13:46,  3.23s/it]

objective/kl: -19.78485870361328
ppo/returns/mean: 1.0675866603851318
ppo/policy/advantages_mean: 0.008368317037820816
---------------------------------------------------------------------------------------------------


1440it [2:13:49,  3.23s/it]

objective/kl: -20.100116729736328
ppo/returns/mean: 1.1197636127471924
ppo/policy/advantages_mean: 0.004557453095912933
---------------------------------------------------------------------------------------------------


1441it [2:13:52,  3.22s/it]

objective/kl: -18.175613403320312
ppo/returns/mean: 1.0656120777130127
ppo/policy/advantages_mean: -0.005649484694004059
---------------------------------------------------------------------------------------------------


1442it [2:13:55,  3.21s/it]

objective/kl: -17.364917755126953
ppo/returns/mean: 0.9943246841430664
ppo/policy/advantages_mean: -0.00044861435890197754
---------------------------------------------------------------------------------------------------


1443it [2:13:59,  3.23s/it]

objective/kl: -18.29079818725586
ppo/returns/mean: 1.0032060146331787
ppo/policy/advantages_mean: -0.003740273416042328
---------------------------------------------------------------------------------------------------


1444it [2:14:02,  3.24s/it]

objective/kl: -18.167659759521484
ppo/returns/mean: 0.9416124224662781
ppo/policy/advantages_mean: 0.004492403939366341
---------------------------------------------------------------------------------------------------


1445it [2:14:05,  3.24s/it]

objective/kl: -18.95597267150879
ppo/returns/mean: 1.0289826393127441
ppo/policy/advantages_mean: -0.0031387805938720703
---------------------------------------------------------------------------------------------------


1446it [2:14:08,  3.25s/it]

objective/kl: -19.211688995361328
ppo/returns/mean: 1.0273339748382568
ppo/policy/advantages_mean: -0.0008163135498762131
---------------------------------------------------------------------------------------------------


1447it [2:14:12,  3.25s/it]

objective/kl: -18.84786605834961
ppo/returns/mean: 1.0425761938095093
ppo/policy/advantages_mean: -0.004256565123796463
---------------------------------------------------------------------------------------------------


1448it [2:14:15,  3.28s/it]

objective/kl: -19.40617561340332
ppo/returns/mean: 1.0599807500839233
ppo/policy/advantages_mean: -0.002693489193916321
---------------------------------------------------------------------------------------------------


1449it [2:14:18,  3.28s/it]

objective/kl: -22.7073974609375
ppo/returns/mean: 1.2138118743896484
ppo/policy/advantages_mean: -0.006008036434650421
---------------------------------------------------------------------------------------------------


1450it [2:14:22,  3.29s/it]

objective/kl: -28.426841735839844
ppo/returns/mean: 1.5285799503326416
ppo/policy/advantages_mean: -0.00026895105838775635
---------------------------------------------------------------------------------------------------


1451it [2:14:25,  3.25s/it]

objective/kl: -28.657470703125
ppo/returns/mean: 1.5082261562347412
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1452it [2:14:28,  3.21s/it]

objective/kl: -29.037220001220703
ppo/returns/mean: 1.5955132246017456
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1453it [2:14:31,  3.17s/it]

objective/kl: -31.15896224975586
ppo/returns/mean: 1.634711742401123
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1454it [2:14:34,  3.15s/it]

objective/kl: -29.033416748046875
ppo/returns/mean: 1.5924595594406128
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1455it [2:14:37,  3.12s/it]

objective/kl: -29.160558700561523
ppo/returns/mean: 1.60995352268219
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1456it [2:14:40,  3.11s/it]

objective/kl: -28.391462326049805
ppo/returns/mean: 1.5700836181640625
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1457it [2:14:43,  3.10s/it]

objective/kl: -30.62026596069336
ppo/returns/mean: 1.6570141315460205
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1458it [2:14:46,  3.10s/it]

objective/kl: -29.945194244384766
ppo/returns/mean: 1.6215484142303467
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1459it [2:14:50,  3.11s/it]

objective/kl: -28.930265426635742
ppo/returns/mean: 1.5894362926483154
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1460it [2:14:53,  3.10s/it]

objective/kl: -29.792461395263672
ppo/returns/mean: 1.6229740381240845
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1461it [2:14:56,  3.10s/it]

objective/kl: -30.729379653930664
ppo/returns/mean: 1.6740249395370483
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1462it [2:14:59,  3.11s/it]

objective/kl: -32.39191436767578
ppo/returns/mean: 1.7369506359100342
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1463it [2:15:02,  3.11s/it]

objective/kl: -34.49616622924805
ppo/returns/mean: 1.801318645477295
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1464it [2:15:05,  3.11s/it]

objective/kl: -35.284446716308594
ppo/returns/mean: 1.8201185464859009
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1465it [2:15:08,  3.10s/it]

objective/kl: -30.93798828125
ppo/returns/mean: 1.6631958484649658
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1466it [2:15:11,  3.10s/it]

objective/kl: -31.301055908203125
ppo/returns/mean: 1.6678767204284668
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1467it [2:15:14,  3.09s/it]

objective/kl: -33.546607971191406
ppo/returns/mean: 1.7535513639450073
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1468it [2:15:17,  3.10s/it]

objective/kl: -32.724952697753906
ppo/returns/mean: 1.703934907913208
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1469it [2:15:21,  3.13s/it]

objective/kl: -33.581787109375
ppo/returns/mean: 1.772316575050354
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1470it [2:15:24,  3.16s/it]

objective/kl: -33.88665008544922
ppo/returns/mean: 1.770219326019287
ppo/policy/advantages_mean: -3.026798367500305e-08
---------------------------------------------------------------------------------------------------


1471it [2:15:27,  3.14s/it]

objective/kl: -37.2680778503418
ppo/returns/mean: 1.9158331155776978
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1472it [2:15:30,  3.11s/it]

objective/kl: -49.763519287109375
ppo/returns/mean: 2.4725852012634277
ppo/policy/advantages_mean: -8.381903171539307e-09
---------------------------------------------------------------------------------------------------


1473it [2:15:33,  3.09s/it]

objective/kl: -44.77021789550781
ppo/returns/mean: 2.187397003173828
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1474it [2:15:36,  3.09s/it]

objective/kl: -47.489784240722656
ppo/returns/mean: 2.300112247467041
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1475it [2:15:39,  3.08s/it]

objective/kl: -52.815162658691406
ppo/returns/mean: 2.629312038421631
ppo/policy/advantages_mean: 6.332993507385254e-08
---------------------------------------------------------------------------------------------------


1476it [2:15:42,  3.06s/it]

objective/kl: -54.43264389038086
ppo/returns/mean: 2.689748764038086
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


1477it [2:15:45,  3.08s/it]

objective/kl: -54.69139099121094
ppo/returns/mean: 2.708439588546753
ppo/policy/advantages_mean: 4.284083843231201e-08
---------------------------------------------------------------------------------------------------


1478it [2:15:48,  3.08s/it]

objective/kl: -52.30509948730469
ppo/returns/mean: 2.680182695388794
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1479it [2:15:51,  3.08s/it]

objective/kl: -53.37805938720703
ppo/returns/mean: 2.7472474575042725
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1480it [2:15:55,  3.08s/it]

objective/kl: -57.548095703125
ppo/returns/mean: 2.9092488288879395
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1481it [2:15:58,  3.08s/it]

objective/kl: -57.86970520019531
ppo/returns/mean: 2.939206600189209
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1482it [2:16:01,  3.07s/it]

objective/kl: -56.251033782958984
ppo/returns/mean: 2.815155506134033
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


1483it [2:16:04,  3.08s/it]

objective/kl: -56.49403762817383
ppo/returns/mean: 2.8746209144592285
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1484it [2:16:07,  3.08s/it]

objective/kl: -58.166114807128906
ppo/returns/mean: 2.9281959533691406
ppo/policy/advantages_mean: 3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


1485it [2:16:10,  3.08s/it]

objective/kl: -58.311580657958984
ppo/returns/mean: 2.904963970184326
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1486it [2:16:13,  3.07s/it]

objective/kl: -56.535831451416016
ppo/returns/mean: 2.8326401710510254
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1487it [2:16:16,  3.13s/it]

objective/kl: -63.946720123291016
ppo/returns/mean: 3.1738157272338867
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1488it [2:16:19,  3.11s/it]

objective/kl: -62.889286041259766
ppo/returns/mean: 3.092076539993286
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1489it [2:16:22,  3.10s/it]

objective/kl: -66.21435546875
ppo/returns/mean: 3.2979068756103516
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1490it [2:16:25,  3.10s/it]

objective/kl: -60.68827438354492
ppo/returns/mean: 3.056250810623169
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1491it [2:16:29,  3.12s/it]

objective/kl: -65.16506958007812
ppo/returns/mean: 3.266735076904297
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1492it [2:16:32,  3.11s/it]

objective/kl: -64.04598236083984
ppo/returns/mean: 3.2259435653686523
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1493it [2:16:35,  3.11s/it]

objective/kl: -66.70299530029297
ppo/returns/mean: 3.359174966812134
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1494it [2:16:38,  3.12s/it]

objective/kl: -65.17535400390625
ppo/returns/mean: 3.2987313270568848
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1495it [2:16:41,  3.13s/it]

objective/kl: -63.863243103027344
ppo/returns/mean: 3.209789991378784
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1496it [2:16:44,  3.14s/it]

objective/kl: -52.919677734375
ppo/returns/mean: 2.6116085052490234
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


1497it [2:16:47,  3.15s/it]

objective/kl: -46.61672592163086
ppo/returns/mean: 2.0218679904937744
ppo/policy/advantages_mean: -3.91155481338501e-08
---------------------------------------------------------------------------------------------------


1498it [2:16:51,  3.15s/it]

objective/kl: -51.26451110839844
ppo/returns/mean: 1.8751776218414307
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1499it [2:16:54,  3.14s/it]

objective/kl: -52.14146423339844
ppo/returns/mean: 1.5651469230651855
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1500it [2:16:57,  3.15s/it]

objective/kl: -50.35826873779297
ppo/returns/mean: 1.3379011154174805
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1501it [2:17:00,  3.17s/it]

objective/kl: -47.57250213623047
ppo/returns/mean: 1.1590956449508667
ppo/policy/advantages_mean: 0.0028025880455970764
---------------------------------------------------------------------------------------------------


1502it [2:17:03,  3.19s/it]

objective/kl: -49.104736328125
ppo/returns/mean: 1.3080112934112549
ppo/policy/advantages_mean: -0.00038702040910720825
---------------------------------------------------------------------------------------------------


1503it [2:17:07,  3.17s/it]

objective/kl: -45.93927764892578
ppo/returns/mean: 1.1026480197906494
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1504it [2:17:10,  3.16s/it]

objective/kl: -54.417152404785156
ppo/returns/mean: 1.54007887840271
ppo/policy/advantages_mean: 0.0016664713621139526
---------------------------------------------------------------------------------------------------


1505it [2:17:13,  3.15s/it]

objective/kl: -48.89209747314453
ppo/returns/mean: 1.5960676670074463
ppo/policy/advantages_mean: -2.421438694000244e-08
---------------------------------------------------------------------------------------------------


1506it [2:17:16,  3.19s/it]

objective/kl: -53.0252571105957
ppo/returns/mean: 1.720015287399292
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1507it [2:17:19,  3.16s/it]

objective/kl: -48.42845153808594
ppo/returns/mean: 1.6417791843414307
ppo/policy/advantages_mean: 6.332993507385254e-08
---------------------------------------------------------------------------------------------------


1508it [2:17:22,  3.15s/it]

objective/kl: -46.92729568481445
ppo/returns/mean: 1.621293544769287
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1509it [2:17:25,  3.14s/it]

objective/kl: -42.82422637939453
ppo/returns/mean: 1.3501076698303223
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1510it [2:17:29,  3.15s/it]

objective/kl: -41.37529754638672
ppo/returns/mean: 1.2724820375442505
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1511it [2:17:32,  3.14s/it]

objective/kl: -32.14595031738281
ppo/returns/mean: 1.182501196861267
ppo/policy/advantages_mean: 8.381903171539307e-09
---------------------------------------------------------------------------------------------------


1512it [2:17:35,  3.15s/it]

objective/kl: -11.70935344696045
ppo/returns/mean: 0.02984827756881714
ppo/policy/advantages_mean: -0.0010981932282447815
---------------------------------------------------------------------------------------------------


1513it [2:17:38,  3.18s/it]

objective/kl: -12.113424301147461
ppo/returns/mean: -0.40231239795684814
ppo/policy/advantages_mean: -0.00038477592170238495
---------------------------------------------------------------------------------------------------


1514it [2:17:41,  3.23s/it]

objective/kl: -17.743244171142578
ppo/returns/mean: -0.27165353298187256
ppo/policy/advantages_mean: 0.000562736764550209
---------------------------------------------------------------------------------------------------


1515it [2:17:45,  3.26s/it]

objective/kl: -20.478477478027344
ppo/returns/mean: -0.40825605392456055
ppo/policy/advantages_mean: -0.0008011478930711746
---------------------------------------------------------------------------------------------------


1516it [2:17:48,  3.28s/it]

objective/kl: -27.35437774658203
ppo/returns/mean: -0.1959766298532486
ppo/policy/advantages_mean: 0.0016776211559772491
---------------------------------------------------------------------------------------------------


1517it [2:17:51,  3.30s/it]

objective/kl: -32.10356903076172
ppo/returns/mean: -0.022542426362633705
ppo/policy/advantages_mean: -0.0003928821533918381
---------------------------------------------------------------------------------------------------


1518it [2:17:55,  3.36s/it]

objective/kl: -40.462554931640625
ppo/returns/mean: 0.40346038341522217
ppo/policy/advantages_mean: -0.0005017966032028198
---------------------------------------------------------------------------------------------------


1519it [2:17:58,  3.28s/it]

objective/kl: -56.76486587524414
ppo/returns/mean: 1.2797889709472656
ppo/policy/advantages_mean: -2.1420419216156006e-08
---------------------------------------------------------------------------------------------------


1520it [2:18:01,  3.25s/it]

objective/kl: -67.28959655761719
ppo/returns/mean: 1.8152772188186646
ppo/policy/advantages_mean: -4.284083843231201e-08
---------------------------------------------------------------------------------------------------


1521it [2:18:04,  3.21s/it]

objective/kl: -67.23858642578125
ppo/returns/mean: 2.012195587158203
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


1522it [2:18:07,  3.18s/it]

objective/kl: -63.34698486328125
ppo/returns/mean: 1.9843316078186035
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1523it [2:18:11,  3.19s/it]

objective/kl: -53.80509948730469
ppo/returns/mean: 1.7250159978866577
ppo/policy/advantages_mean: 6.705522537231445e-08
---------------------------------------------------------------------------------------------------


1524it [2:18:14,  3.18s/it]

objective/kl: -51.46006393432617
ppo/returns/mean: 1.6633368730545044
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1525it [2:18:17,  3.15s/it]

objective/kl: -51.071868896484375
ppo/returns/mean: 1.584212303161621
ppo/policy/advantages_mean: 4.377216100692749e-08
---------------------------------------------------------------------------------------------------


1526it [2:18:20,  3.14s/it]

objective/kl: -48.75996398925781
ppo/returns/mean: 1.4245784282684326
ppo/policy/advantages_mean: -1.3504177331924438e-08
---------------------------------------------------------------------------------------------------


1527it [2:18:23,  3.13s/it]

objective/kl: -46.661312103271484
ppo/returns/mean: 1.3077316284179688
ppo/policy/advantages_mean: -3.166496753692627e-08
---------------------------------------------------------------------------------------------------


1528it [2:18:26,  3.13s/it]

objective/kl: -44.29175567626953
ppo/returns/mean: 1.061521053314209
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1529it [2:18:29,  3.13s/it]

objective/kl: -43.52627182006836
ppo/returns/mean: 0.968300461769104
ppo/policy/advantages_mean: 6.51925802230835e-08
---------------------------------------------------------------------------------------------------


1530it [2:18:32,  3.12s/it]

objective/kl: -42.71281814575195
ppo/returns/mean: 0.8907115459442139
ppo/policy/advantages_mean: 4.470348358154297e-08
---------------------------------------------------------------------------------------------------


1531it [2:18:36,  3.14s/it]

objective/kl: -40.782833099365234
ppo/returns/mean: 0.7740880846977234
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1532it [2:18:39,  3.13s/it]

objective/kl: -42.36040115356445
ppo/returns/mean: 0.8093830347061157
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1533it [2:18:42,  3.12s/it]

objective/kl: -44.27268981933594
ppo/returns/mean: 0.9315111041069031
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1534it [2:18:45,  3.11s/it]

objective/kl: -43.211212158203125
ppo/returns/mean: 0.9077126979827881
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


1535it [2:18:48,  3.12s/it]

objective/kl: -43.387847900390625
ppo/returns/mean: 0.9100273847579956
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1536it [2:18:51,  3.14s/it]

objective/kl: -43.317657470703125
ppo/returns/mean: 0.9265244007110596
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1537it [2:18:54,  3.13s/it]

objective/kl: -40.80315399169922
ppo/returns/mean: 0.8465863466262817
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1538it [2:18:58,  3.15s/it]

objective/kl: -41.68939971923828
ppo/returns/mean: 0.8625145554542542
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1539it [2:19:01,  3.15s/it]

objective/kl: -41.589210510253906
ppo/returns/mean: 0.8651888966560364
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1540it [2:19:04,  3.14s/it]

objective/kl: -39.00463104248047
ppo/returns/mean: 0.7677949666976929
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1541it [2:19:07,  3.16s/it]

objective/kl: -38.57225799560547
ppo/returns/mean: 0.731191873550415
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1542it [2:19:10,  3.15s/it]

objective/kl: -40.4798469543457
ppo/returns/mean: 0.7329093813896179
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1543it [2:19:13,  3.14s/it]

objective/kl: -41.28241729736328
ppo/returns/mean: 0.7170047163963318
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1544it [2:19:17,  3.15s/it]

objective/kl: -40.70598602294922
ppo/returns/mean: 0.7034438252449036
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1545it [2:19:20,  3.15s/it]

objective/kl: -39.98645782470703
ppo/returns/mean: 0.7060570120811462
ppo/policy/advantages_mean: -1.210719347000122e-08
---------------------------------------------------------------------------------------------------


1546it [2:19:23,  3.15s/it]

objective/kl: -29.76544189453125
ppo/returns/mean: 0.37836992740631104
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1547it [2:19:26,  3.15s/it]

objective/kl: -14.411685943603516
ppo/returns/mean: 0.06710109859704971
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1548it [2:19:29,  3.16s/it]

objective/kl: -15.823627471923828
ppo/returns/mean: 0.07300419360399246
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1549it [2:19:32,  3.17s/it]

objective/kl: -15.064067840576172
ppo/returns/mean: 0.05012970417737961
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1550it [2:19:35,  3.17s/it]

objective/kl: -16.363126754760742
ppo/returns/mean: 0.1543939709663391
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1551it [2:19:39,  3.20s/it]

objective/kl: -15.13981819152832
ppo/returns/mean: 0.11914670467376709
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1552it [2:19:42,  3.19s/it]

objective/kl: -17.92160987854004
ppo/returns/mean: 0.17516790330410004
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1553it [2:19:45,  3.18s/it]

objective/kl: -16.764629364013672
ppo/returns/mean: 0.14723503589630127
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1554it [2:19:48,  3.18s/it]

objective/kl: -13.733583450317383
ppo/returns/mean: 0.0424841046333313
ppo/policy/advantages_mean: -6.031990051269531e-05
---------------------------------------------------------------------------------------------------


1555it [2:19:51,  3.18s/it]

objective/kl: -12.645928382873535
ppo/returns/mean: 0.0023971148766577244
ppo/policy/advantages_mean: -0.0003394549712538719
---------------------------------------------------------------------------------------------------


1556it [2:19:55,  3.17s/it]

objective/kl: -16.682727813720703
ppo/returns/mean: 0.10523151606321335
ppo/policy/advantages_mean: -0.0009972676634788513
---------------------------------------------------------------------------------------------------


1557it [2:19:58,  3.18s/it]

objective/kl: -20.330524444580078
ppo/returns/mean: 0.23868674039840698
ppo/policy/advantages_mean: -0.0011240653693675995
---------------------------------------------------------------------------------------------------


1558it [2:20:01,  3.19s/it]

objective/kl: -19.54499626159668
ppo/returns/mean: 0.18722006678581238
ppo/policy/advantages_mean: -0.0009754970669746399
---------------------------------------------------------------------------------------------------


1559it [2:20:04,  3.18s/it]

objective/kl: -15.12183952331543
ppo/returns/mean: 0.10787586867809296
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1560it [2:20:08,  3.25s/it]

objective/kl: -23.081619262695312
ppo/returns/mean: 0.3242432475090027
ppo/policy/advantages_mean: -0.0010392526164650917
---------------------------------------------------------------------------------------------------


1561it [2:20:11,  3.23s/it]

objective/kl: -30.096494674682617
ppo/returns/mean: 0.5087460875511169
ppo/policy/advantages_mean: -0.00039728544652462006
---------------------------------------------------------------------------------------------------


1562it [2:20:14,  3.22s/it]

objective/kl: -38.24968719482422
ppo/returns/mean: 0.6206158399581909
ppo/policy/advantages_mean: 0.0022730156779289246
---------------------------------------------------------------------------------------------------


1563it [2:20:17,  3.22s/it]

objective/kl: -27.580398559570312
ppo/returns/mean: 0.5945343971252441
ppo/policy/advantages_mean: -0.0017039868980646133
---------------------------------------------------------------------------------------------------


1564it [2:20:20,  3.22s/it]

objective/kl: -33.6951904296875
ppo/returns/mean: 0.7452539205551147
ppo/policy/advantages_mean: 0.00024211686104536057
---------------------------------------------------------------------------------------------------


1565it [2:20:24,  3.22s/it]

objective/kl: -33.47261047363281
ppo/returns/mean: 0.7681666612625122
ppo/policy/advantages_mean: -0.001628875732421875
---------------------------------------------------------------------------------------------------


1566it [2:20:27,  3.22s/it]

objective/kl: -33.81932830810547
ppo/returns/mean: 0.8694127798080444
ppo/policy/advantages_mean: 0.0008844472467899323
---------------------------------------------------------------------------------------------------


1567it [2:20:30,  3.20s/it]

objective/kl: -39.240570068359375
ppo/returns/mean: 1.0084856748580933
ppo/policy/advantages_mean: -0.0026629017665982246
---------------------------------------------------------------------------------------------------


1568it [2:20:33,  3.21s/it]

objective/kl: -37.47526931762695
ppo/returns/mean: 1.0599524974822998
ppo/policy/advantages_mean: -0.0006640162318944931
---------------------------------------------------------------------------------------------------


1569it [2:20:36,  3.20s/it]

objective/kl: -51.587379455566406
ppo/returns/mean: 1.5184767246246338
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1570it [2:20:40,  3.22s/it]

objective/kl: -60.78437042236328
ppo/returns/mean: 1.8421026468276978
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1571it [2:20:43,  3.20s/it]

objective/kl: -53.59333038330078
ppo/returns/mean: 1.7173503637313843
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1572it [2:20:46,  3.25s/it]

objective/kl: -62.730712890625
ppo/returns/mean: 2.1317739486694336
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


1573it [2:20:49,  3.21s/it]

objective/kl: -66.71923065185547
ppo/returns/mean: 2.4376344680786133
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1574it [2:20:53,  3.28s/it]

objective/kl: -26.998361587524414
ppo/returns/mean: 1.042457103729248
ppo/policy/advantages_mean: 3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


1575it [2:20:56,  3.24s/it]

objective/kl: -79.21768188476562
ppo/returns/mean: 2.963768482208252
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1576it [2:20:59,  3.21s/it]

objective/kl: -84.60416412353516
ppo/returns/mean: 2.9608421325683594
ppo/policy/advantages_mean: -5.4016709327697754e-08
---------------------------------------------------------------------------------------------------


1577it [2:21:02,  3.19s/it]

objective/kl: -72.35580444335938
ppo/returns/mean: 2.497575521469116
ppo/policy/advantages_mean: -1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


1578it [2:21:05,  3.18s/it]

objective/kl: -71.17710876464844
ppo/returns/mean: 2.5866613388061523
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1579it [2:21:09,  3.21s/it]

objective/kl: -71.36320495605469
ppo/returns/mean: 2.7373945713043213
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1580it [2:21:12,  3.18s/it]

objective/kl: -72.31687927246094
ppo/returns/mean: 2.9350991249084473
ppo/policy/advantages_mean: -2.514570951461792e-08
---------------------------------------------------------------------------------------------------


1581it [2:21:15,  3.17s/it]

objective/kl: -62.1235466003418
ppo/returns/mean: 2.6747939586639404
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1582it [2:21:18,  3.20s/it]

objective/kl: -68.9490737915039
ppo/returns/mean: 2.9162087440490723
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1583it [2:21:21,  3.17s/it]

objective/kl: -64.05865478515625
ppo/returns/mean: 2.722168445587158
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1584it [2:21:25,  3.28s/it]

objective/kl: -54.612022399902344
ppo/returns/mean: 2.119551658630371
ppo/policy/advantages_mean: 4.190951585769653e-09
---------------------------------------------------------------------------------------------------


1585it [2:21:28,  3.32s/it]

objective/kl: -40.65877914428711
ppo/returns/mean: 1.0868597030639648
ppo/policy/advantages_mean: -2.8870999813079834e-08
---------------------------------------------------------------------------------------------------


1586it [2:21:31,  3.28s/it]

objective/kl: -15.410835266113281
ppo/returns/mean: -0.0781356617808342
ppo/policy/advantages_mean: 8.288770914077759e-08
---------------------------------------------------------------------------------------------------


1587it [2:21:35,  3.30s/it]

objective/kl: -13.320865631103516
ppo/returns/mean: -0.292887806892395
ppo/policy/advantages_mean: 6.332993507385254e-08
---------------------------------------------------------------------------------------------------


1588it [2:21:38,  3.29s/it]

objective/kl: -6.595368385314941
ppo/returns/mean: -0.3658207654953003
ppo/policy/advantages_mean: -2.514570951461792e-08
---------------------------------------------------------------------------------------------------


1589it [2:21:41,  3.26s/it]

objective/kl: -16.087556838989258
ppo/returns/mean: -0.33715105056762695
ppo/policy/advantages_mean: -0.0003361552953720093
---------------------------------------------------------------------------------------------------


1590it [2:21:45,  3.29s/it]

objective/kl: -20.153400421142578
ppo/returns/mean: -0.40319979190826416
ppo/policy/advantages_mean: -0.0009060497395694256
---------------------------------------------------------------------------------------------------


1591it [2:21:48,  3.26s/it]

objective/kl: -21.362079620361328
ppo/returns/mean: -0.34975117444992065
ppo/policy/advantages_mean: 0.00040525011718273163
---------------------------------------------------------------------------------------------------


1592it [2:21:51,  3.30s/it]

objective/kl: -24.107555389404297
ppo/returns/mean: -0.2846408486366272
ppo/policy/advantages_mean: 0.0008921176195144653
---------------------------------------------------------------------------------------------------


1593it [2:21:55,  3.39s/it]

objective/kl: -30.97623062133789
ppo/returns/mean: -0.17176946997642517
ppo/policy/advantages_mean: -0.0017458386719226837
---------------------------------------------------------------------------------------------------


1594it [2:21:58,  3.35s/it]

objective/kl: -29.170772552490234
ppo/returns/mean: -0.23716577887535095
ppo/policy/advantages_mean: -0.0022348538041114807
---------------------------------------------------------------------------------------------------


1595it [2:22:01,  3.37s/it]

objective/kl: -33.68965148925781
ppo/returns/mean: -0.1371403932571411
ppo/policy/advantages_mean: 0.0025292672216892242
---------------------------------------------------------------------------------------------------


1596it [2:22:05,  3.36s/it]

objective/kl: -34.18962860107422
ppo/returns/mean: -0.024154847487807274
ppo/policy/advantages_mean: -0.003920229151844978
---------------------------------------------------------------------------------------------------


1597it [2:22:08,  3.39s/it]

objective/kl: -36.23439025878906
ppo/returns/mean: -0.03311196714639664
ppo/policy/advantages_mean: -0.0015160441398620605
---------------------------------------------------------------------------------------------------


1598it [2:22:12,  3.55s/it]

objective/kl: -38.608245849609375
ppo/returns/mean: 0.20640996098518372
ppo/policy/advantages_mean: 0.0025893375277519226
---------------------------------------------------------------------------------------------------


1599it [2:22:16,  3.55s/it]

objective/kl: -39.60028076171875
ppo/returns/mean: 0.26609912514686584
ppo/policy/advantages_mean: 0.0027536898851394653
---------------------------------------------------------------------------------------------------


1600it [2:22:19,  3.45s/it]

objective/kl: -38.00267028808594
ppo/returns/mean: 0.2760794460773468
ppo/policy/advantages_mean: -0.0012221112847328186
---------------------------------------------------------------------------------------------------


1601it [2:22:22,  3.35s/it]

objective/kl: -38.78815460205078
ppo/returns/mean: 0.4487484097480774
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1602it [2:22:25,  3.27s/it]

objective/kl: -38.704200744628906
ppo/returns/mean: 0.5273977518081665
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1603it [2:22:29,  3.36s/it]

objective/kl: -32.756038665771484
ppo/returns/mean: 0.4472363591194153
ppo/policy/advantages_mean: -2.8870999813079834e-08
---------------------------------------------------------------------------------------------------


1604it [2:22:32,  3.29s/it]

objective/kl: -23.378456115722656
ppo/returns/mean: 0.25760960578918457
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1605it [2:22:35,  3.24s/it]

objective/kl: 1.0422316789627075
ppo/returns/mean: -0.2743600010871887
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1606it [2:22:38,  3.21s/it]

objective/kl: -1.1168190240859985
ppo/returns/mean: -0.3120045065879822
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1607it [2:22:41,  3.19s/it]

objective/kl: -5.4656572341918945
ppo/returns/mean: -0.40200573205947876
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1608it [2:22:44,  3.17s/it]

objective/kl: -21.239404678344727
ppo/returns/mean: -0.06799374520778656
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1609it [2:22:47,  3.15s/it]

objective/kl: -18.60190200805664
ppo/returns/mean: -0.09868164360523224
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1610it [2:22:51,  3.15s/it]

objective/kl: -21.754478454589844
ppo/returns/mean: -0.0794956237077713
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1611it [2:22:54,  3.14s/it]

objective/kl: -29.908763885498047
ppo/returns/mean: 0.10077515244483948
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1612it [2:22:57,  3.13s/it]

objective/kl: -30.620819091796875
ppo/returns/mean: 0.08196327090263367
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1613it [2:23:00,  3.15s/it]

objective/kl: -32.171875
ppo/returns/mean: 0.15402363240718842
ppo/policy/advantages_mean: -0.0012523457407951355
---------------------------------------------------------------------------------------------------


1614it [2:23:03,  3.16s/it]

objective/kl: -34.50944900512695
ppo/returns/mean: 0.2141476720571518
ppo/policy/advantages_mean: -0.0018585920333862305
---------------------------------------------------------------------------------------------------


1615it [2:23:06,  3.17s/it]

objective/kl: -40.14794158935547
ppo/returns/mean: 0.3960041403770447
ppo/policy/advantages_mean: 0.005683008581399918
---------------------------------------------------------------------------------------------------


1616it [2:23:10,  3.17s/it]

objective/kl: -47.517181396484375
ppo/returns/mean: 0.7598921656608582
ppo/policy/advantages_mean: 0.00046217814087867737
---------------------------------------------------------------------------------------------------


1617it [2:23:13,  3.16s/it]

objective/kl: -55.84574508666992
ppo/returns/mean: 1.233915090560913
ppo/policy/advantages_mean: 0.000561591237783432
---------------------------------------------------------------------------------------------------


1618it [2:23:16,  3.16s/it]

objective/kl: -52.69047927856445
ppo/returns/mean: 1.1707748174667358
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1619it [2:23:19,  3.17s/it]

objective/kl: -53.45733642578125
ppo/returns/mean: 1.277207612991333
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1620it [2:23:22,  3.15s/it]

objective/kl: -53.94355010986328
ppo/returns/mean: 1.3590936660766602
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1621it [2:23:25,  3.15s/it]

objective/kl: -53.94989776611328
ppo/returns/mean: 1.3602871894836426
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1622it [2:23:29,  3.19s/it]

objective/kl: -56.589515686035156
ppo/returns/mean: 1.5352084636688232
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


1623it [2:23:32,  3.16s/it]

objective/kl: -61.00651550292969
ppo/returns/mean: 1.792391300201416
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


1624it [2:23:35,  3.16s/it]

objective/kl: -62.13309860229492
ppo/returns/mean: 1.8641209602355957
ppo/policy/advantages_mean: -5.21540641784668e-08
---------------------------------------------------------------------------------------------------


1625it [2:23:38,  3.15s/it]

objective/kl: -61.787376403808594
ppo/returns/mean: 1.882733941078186
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


1626it [2:23:41,  3.14s/it]

objective/kl: -64.88552856445312
ppo/returns/mean: 2.041706085205078
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1627it [2:23:44,  3.13s/it]

objective/kl: -62.20486831665039
ppo/returns/mean: 1.926041603088379
ppo/policy/advantages_mean: -0.0019240230321884155
---------------------------------------------------------------------------------------------------


1628it [2:23:47,  3.15s/it]

objective/kl: -59.326168060302734
ppo/returns/mean: 1.7680141925811768
ppo/policy/advantages_mean: -0.0013531185686588287
---------------------------------------------------------------------------------------------------


1629it [2:23:51,  3.27s/it]

objective/kl: -58.38175582885742
ppo/returns/mean: 1.5024287700653076
ppo/policy/advantages_mean: 0.0026830174028873444
---------------------------------------------------------------------------------------------------


1630it [2:23:54,  3.28s/it]

objective/kl: -55.73265838623047
ppo/returns/mean: 1.3669449090957642
ppo/policy/advantages_mean: 0.003376916516572237
---------------------------------------------------------------------------------------------------


1631it [2:23:58,  3.33s/it]

objective/kl: -55.20672607421875
ppo/returns/mean: 1.3401098251342773
ppo/policy/advantages_mean: 0.01081453263759613
---------------------------------------------------------------------------------------------------


1632it [2:24:01,  3.33s/it]

objective/kl: -53.525482177734375
ppo/returns/mean: 1.170741319656372
ppo/policy/advantages_mean: -0.0009440183639526367
---------------------------------------------------------------------------------------------------


1633it [2:24:04,  3.34s/it]

objective/kl: -54.41419982910156
ppo/returns/mean: 1.171047329902649
ppo/policy/advantages_mean: 0.0033739618957042694
---------------------------------------------------------------------------------------------------


1634it [2:24:08,  3.30s/it]

objective/kl: -53.36029815673828
ppo/returns/mean: 1.1343419551849365
ppo/policy/advantages_mean: 0.0025836871936917305
---------------------------------------------------------------------------------------------------


1635it [2:24:11,  3.31s/it]

objective/kl: -48.23694610595703
ppo/returns/mean: 0.7787163257598877
ppo/policy/advantages_mean: -0.0005897488445043564
---------------------------------------------------------------------------------------------------


1636it [2:24:15,  3.41s/it]

objective/kl: -45.69112014770508
ppo/returns/mean: 0.5740246772766113
ppo/policy/advantages_mean: -0.0006099883466959
---------------------------------------------------------------------------------------------------


1637it [2:24:18,  3.44s/it]

objective/kl: -41.82146072387695
ppo/returns/mean: 0.37006470561027527
ppo/policy/advantages_mean: -9.592622518539429e-08
---------------------------------------------------------------------------------------------------


1638it [2:24:22,  3.55s/it]

objective/kl: -41.36775207519531
ppo/returns/mean: 0.29778236150741577
ppo/policy/advantages_mean: 5.960464477539063e-08
---------------------------------------------------------------------------------------------------


1639it [2:24:25,  3.50s/it]

objective/kl: -40.86800765991211
ppo/returns/mean: 0.21100449562072754
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1640it [2:24:29,  3.46s/it]

objective/kl: -43.19813919067383
ppo/returns/mean: 0.2398798018693924
ppo/policy/advantages_mean: -0.0008212653920054436
---------------------------------------------------------------------------------------------------


1641it [2:24:32,  3.42s/it]

objective/kl: -43.977237701416016
ppo/returns/mean: 0.2577558159828186
ppo/policy/advantages_mean: -0.0014695152640342712
---------------------------------------------------------------------------------------------------


1642it [2:24:36,  3.51s/it]

objective/kl: -45.17770004272461
ppo/returns/mean: 0.281166672706604
ppo/policy/advantages_mean: -0.0011582933366298676
---------------------------------------------------------------------------------------------------


1643it [2:24:39,  3.50s/it]

objective/kl: -48.662841796875
ppo/returns/mean: 0.5202611088752747
ppo/policy/advantages_mean: 0.0008316487073898315
---------------------------------------------------------------------------------------------------


1644it [2:24:42,  3.43s/it]

objective/kl: -51.93681335449219
ppo/returns/mean: 0.5475363731384277
ppo/policy/advantages_mean: 5.733966827392578e-05
---------------------------------------------------------------------------------------------------


1645it [2:24:46,  3.39s/it]

objective/kl: -51.36436462402344
ppo/returns/mean: 0.5338516235351562
ppo/policy/advantages_mean: 0.008024774491786957
---------------------------------------------------------------------------------------------------


1646it [2:24:49,  3.37s/it]

objective/kl: -49.042869567871094
ppo/returns/mean: 0.5402039289474487
ppo/policy/advantages_mean: 0.003555912524461746
---------------------------------------------------------------------------------------------------


1647it [2:24:52,  3.31s/it]

objective/kl: -50.65324401855469
ppo/returns/mean: 0.9143297672271729
ppo/policy/advantages_mean: 0.006846088916063309
---------------------------------------------------------------------------------------------------


1648it [2:24:56,  3.31s/it]

objective/kl: -40.83389663696289
ppo/returns/mean: 0.6077673435211182
ppo/policy/advantages_mean: 0.0014615431427955627
---------------------------------------------------------------------------------------------------


1649it [2:24:59,  3.27s/it]

objective/kl: -47.149169921875
ppo/returns/mean: 0.749152660369873
ppo/policy/advantages_mean: 0.0018322537653148174
---------------------------------------------------------------------------------------------------


1650it [2:25:02,  3.23s/it]

objective/kl: -37.68617248535156
ppo/returns/mean: 0.5857176184654236
ppo/policy/advantages_mean: 0.00016017258167266846
---------------------------------------------------------------------------------------------------


1651it [2:25:05,  3.30s/it]

objective/kl: -38.18391418457031
ppo/returns/mean: 0.5485768914222717
ppo/policy/advantages_mean: -0.001626415178179741
---------------------------------------------------------------------------------------------------


1652it [2:25:08,  3.26s/it]

objective/kl: -32.90456771850586
ppo/returns/mean: 0.30395254492759705
ppo/policy/advantages_mean: -8.874572813510895e-05
---------------------------------------------------------------------------------------------------


1653it [2:25:12,  3.28s/it]

objective/kl: -35.10451889038086
ppo/returns/mean: 0.4268549978733063
ppo/policy/advantages_mean: -0.003777381032705307
---------------------------------------------------------------------------------------------------


1654it [2:25:15,  3.23s/it]

objective/kl: -37.27134323120117
ppo/returns/mean: 0.4051131010055542
ppo/policy/advantages_mean: 0.0036770738661289215
---------------------------------------------------------------------------------------------------


1655it [2:25:18,  3.19s/it]

objective/kl: -40.28718185424805
ppo/returns/mean: 0.54023277759552
ppo/policy/advantages_mean: 0.005104199051856995
---------------------------------------------------------------------------------------------------


1656it [2:25:21,  3.19s/it]

objective/kl: -43.623085021972656
ppo/returns/mean: 0.5904067754745483
ppo/policy/advantages_mean: -0.0008169673383235931
---------------------------------------------------------------------------------------------------


1657it [2:25:24,  3.19s/it]

objective/kl: -43.64491271972656
ppo/returns/mean: 0.6628235578536987
ppo/policy/advantages_mean: -0.002796567976474762
---------------------------------------------------------------------------------------------------


1658it [2:25:27,  3.17s/it]

objective/kl: -43.56740951538086
ppo/returns/mean: 0.7060760259628296
ppo/policy/advantages_mean: 0.0009088926017284393
---------------------------------------------------------------------------------------------------


1659it [2:25:31,  3.17s/it]

objective/kl: -42.84722137451172
ppo/returns/mean: 0.6653537154197693
ppo/policy/advantages_mean: -0.0007518753409385681
---------------------------------------------------------------------------------------------------


1660it [2:25:34,  3.34s/it]

objective/kl: -22.923946380615234
ppo/returns/mean: 0.29563212394714355
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1661it [2:25:38,  3.29s/it]

objective/kl: -5.944746971130371
ppo/returns/mean: 0.03259658068418503
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1662it [2:25:41,  3.25s/it]

objective/kl: 12.484875679016113
ppo/returns/mean: -0.43692874908447266
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1663it [2:25:44,  3.21s/it]

objective/kl: 26.80443000793457
ppo/returns/mean: -1.0326069593429565
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1664it [2:25:47,  3.19s/it]

objective/kl: 27.14657211303711
ppo/returns/mean: -1.059468150138855
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1665it [2:25:50,  3.21s/it]

objective/kl: 25.911869049072266
ppo/returns/mean: -1.0434796810150146
ppo/policy/advantages_mean: 1.909211277961731e-08
---------------------------------------------------------------------------------------------------


1666it [2:25:54,  3.24s/it]

objective/kl: 14.741482734680176
ppo/returns/mean: -0.8232057094573975
ppo/policy/advantages_mean: 0.0008806847035884857
---------------------------------------------------------------------------------------------------


1667it [2:25:57,  3.29s/it]

objective/kl: 8.729227066040039
ppo/returns/mean: -0.7361437082290649
ppo/policy/advantages_mean: -0.0009078215807676315
---------------------------------------------------------------------------------------------------


1668it [2:26:00,  3.31s/it]

objective/kl: 7.208895683288574
ppo/returns/mean: -0.6927446722984314
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1669it [2:26:04,  3.32s/it]

objective/kl: 3.9788994789123535
ppo/returns/mean: -0.5808842182159424
ppo/policy/advantages_mean: -0.0017060358077287674
---------------------------------------------------------------------------------------------------


1670it [2:26:07,  3.38s/it]

objective/kl: -1.5640223026275635
ppo/returns/mean: -0.40603065490722656
ppo/policy/advantages_mean: -4.5705586671829224e-05
---------------------------------------------------------------------------------------------------


1671it [2:26:11,  3.38s/it]

objective/kl: -1.7335013151168823
ppo/returns/mean: -0.36649447679519653
ppo/policy/advantages_mean: 0.0004087276756763458
---------------------------------------------------------------------------------------------------


1672it [2:26:14,  3.35s/it]

objective/kl: -4.3729963302612305
ppo/returns/mean: -0.18315482139587402
ppo/policy/advantages_mean: 0.004388602450489998
---------------------------------------------------------------------------------------------------


1673it [2:26:17,  3.40s/it]

objective/kl: -4.52114200592041
ppo/returns/mean: -0.13569819927215576
ppo/policy/advantages_mean: 0.005019068717956543
---------------------------------------------------------------------------------------------------


1674it [2:26:21,  3.38s/it]

objective/kl: -4.042242050170898
ppo/returns/mean: -0.08961234241724014
ppo/policy/advantages_mean: 0.0006048157811164856
---------------------------------------------------------------------------------------------------


1675it [2:26:24,  3.34s/it]

objective/kl: -2.3403334617614746
ppo/returns/mean: -0.11523321270942688
ppo/policy/advantages_mean: 0.004752937704324722
---------------------------------------------------------------------------------------------------


1676it [2:26:27,  3.31s/it]

objective/kl: 1.175240397453308
ppo/returns/mean: -0.21241772174835205
ppo/policy/advantages_mean: 0.004191134124994278
---------------------------------------------------------------------------------------------------


1677it [2:26:31,  3.31s/it]

objective/kl: 4.775269508361816
ppo/returns/mean: -0.34050899744033813
ppo/policy/advantages_mean: 0.006522778421640396
---------------------------------------------------------------------------------------------------


1678it [2:26:34,  3.33s/it]

objective/kl: 5.685379505157471
ppo/returns/mean: -0.34218981862068176
ppo/policy/advantages_mean: 0.000981360673904419
---------------------------------------------------------------------------------------------------


1679it [2:26:37,  3.29s/it]

objective/kl: 6.691580772399902
ppo/returns/mean: -0.37016141414642334
ppo/policy/advantages_mean: 0.002190973609685898
---------------------------------------------------------------------------------------------------


1680it [2:26:40,  3.27s/it]

objective/kl: 5.3019633293151855
ppo/returns/mean: -0.34855276346206665
ppo/policy/advantages_mean: -0.000822344096377492
---------------------------------------------------------------------------------------------------


1681it [2:26:44,  3.27s/it]

objective/kl: 2.2930057048797607
ppo/returns/mean: -0.2339085042476654
ppo/policy/advantages_mean: 0.0028842538595199585
---------------------------------------------------------------------------------------------------


1682it [2:26:47,  3.22s/it]

objective/kl: -0.8932487964630127
ppo/returns/mean: -0.06815291196107864
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1683it [2:26:50,  3.27s/it]

objective/kl: -4.464820861816406
ppo/returns/mean: 0.07973163574934006
ppo/policy/advantages_mean: 3.166496753692627e-08
---------------------------------------------------------------------------------------------------


1684it [2:26:53,  3.21s/it]

objective/kl: -6.776263236999512
ppo/returns/mean: 0.21079480648040771
ppo/policy/advantages_mean: 5.960464477539063e-08
---------------------------------------------------------------------------------------------------


1685it [2:26:56,  3.19s/it]

objective/kl: -6.540729999542236
ppo/returns/mean: 0.2065751552581787
ppo/policy/advantages_mean: -0.0008519375696778297
---------------------------------------------------------------------------------------------------


1686it [2:26:59,  3.19s/it]

objective/kl: -7.656264781951904
ppo/returns/mean: 0.23514720797538757
ppo/policy/advantages_mean: -0.0022850846871733665
---------------------------------------------------------------------------------------------------


1687it [2:27:03,  3.19s/it]

objective/kl: -7.045220375061035
ppo/returns/mean: 0.24678966403007507
ppo/policy/advantages_mean: -0.0008087996393442154
---------------------------------------------------------------------------------------------------


1688it [2:27:06,  3.18s/it]

objective/kl: -5.637107849121094
ppo/returns/mean: 0.20209938287734985
ppo/policy/advantages_mean: -0.004856660962104797
---------------------------------------------------------------------------------------------------


1689it [2:27:09,  3.18s/it]

objective/kl: -4.497949600219727
ppo/returns/mean: 0.12333028763532639
ppo/policy/advantages_mean: -0.0008455216884613037
---------------------------------------------------------------------------------------------------


1690it [2:27:12,  3.16s/it]

objective/kl: -4.5214128494262695
ppo/returns/mean: 0.1619364321231842
ppo/policy/advantages_mean: 0.0009337924420833588
---------------------------------------------------------------------------------------------------


1691it [2:27:15,  3.15s/it]

objective/kl: -3.503023624420166
ppo/returns/mean: 0.12343573570251465
ppo/policy/advantages_mean: -0.00027861446142196655
---------------------------------------------------------------------------------------------------


1692it [2:27:18,  3.15s/it]

objective/kl: -4.793270111083984
ppo/returns/mean: 0.1538238227367401
ppo/policy/advantages_mean: -0.0008287802338600159
---------------------------------------------------------------------------------------------------


1693it [2:27:22,  3.15s/it]

objective/kl: -1.817002773284912
ppo/returns/mean: 0.03732422739267349
ppo/policy/advantages_mean: -0.0036093592643737793
---------------------------------------------------------------------------------------------------


1694it [2:27:25,  3.14s/it]

objective/kl: -2.2700438499450684
ppo/returns/mean: 0.0534973181784153
ppo/policy/advantages_mean: -0.0015777312219142914
---------------------------------------------------------------------------------------------------


1695it [2:27:28,  3.13s/it]

objective/kl: -5.2748308181762695
ppo/returns/mean: 0.1795385181903839
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


1696it [2:27:31,  3.13s/it]

objective/kl: -5.744859218597412
ppo/returns/mean: 0.1795622855424881
ppo/policy/advantages_mean: -0.0007579885423183441
---------------------------------------------------------------------------------------------------


1697it [2:27:34,  3.12s/it]

objective/kl: -4.179457187652588
ppo/returns/mean: 0.1713504195213318
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1698it [2:27:37,  3.11s/it]

objective/kl: -4.505353927612305
ppo/returns/mean: 0.17551003396511078
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1699it [2:27:40,  3.11s/it]

objective/kl: -4.214125156402588
ppo/returns/mean: 0.15257033705711365
ppo/policy/advantages_mean: 0.0013689622282981873
---------------------------------------------------------------------------------------------------


1700it [2:27:43,  3.13s/it]

objective/kl: -1.4750473499298096
ppo/returns/mean: 0.0414818599820137
ppo/policy/advantages_mean: 2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


1701it [2:27:46,  3.12s/it]

objective/kl: 0.3283083140850067
ppo/returns/mean: -0.043819695711135864
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1702it [2:27:50,  3.13s/it]

objective/kl: -1.331545114517212
ppo/returns/mean: -0.016338612884283066
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1703it [2:27:53,  3.31s/it]

objective/kl: -0.31299567222595215
ppo/returns/mean: -0.042615678161382675
ppo/policy/advantages_mean: 4.470348358154297e-08
---------------------------------------------------------------------------------------------------


1704it [2:27:56,  3.26s/it]

objective/kl: 1.2797833681106567
ppo/returns/mean: -0.14789918065071106
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1705it [2:28:00,  3.21s/it]

objective/kl: -1.0541000366210938
ppo/returns/mean: -0.06488238275051117
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


1706it [2:28:03,  3.25s/it]

objective/kl: -0.7501241564750671
ppo/returns/mean: -0.07718874514102936
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1707it [2:28:06,  3.21s/it]

objective/kl: -1.1791255474090576
ppo/returns/mean: -0.07768435031175613
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1708it [2:28:09,  3.21s/it]

objective/kl: -1.5317591428756714
ppo/returns/mean: -0.06788592040538788
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1709it [2:28:12,  3.17s/it]

objective/kl: -1.9633660316467285
ppo/returns/mean: -0.0658770352602005
ppo/policy/advantages_mean: 4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


1710it [2:28:15,  3.15s/it]

objective/kl: -2.1051182746887207
ppo/returns/mean: -0.052089255303144455
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1711it [2:28:19,  3.18s/it]

objective/kl: -2.4576637744903564
ppo/returns/mean: -0.0350244902074337
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1712it [2:28:22,  3.19s/it]

objective/kl: -0.9480772614479065
ppo/returns/mean: -0.09512326866388321
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1713it [2:28:25,  3.18s/it]

objective/kl: -1.888199806213379
ppo/returns/mean: -0.07400336861610413
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1714it [2:28:28,  3.23s/it]

objective/kl: -4.249460220336914
ppo/returns/mean: 0.04589666798710823
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1715it [2:28:32,  3.20s/it]

objective/kl: -3.851673126220703
ppo/returns/mean: 0.023442674428224564
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1716it [2:28:35,  3.19s/it]

objective/kl: -6.471861839294434
ppo/returns/mean: 0.08801500499248505
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1717it [2:28:38,  3.17s/it]

objective/kl: -6.925580024719238
ppo/returns/mean: 0.10153263807296753
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1718it [2:28:41,  3.16s/it]

objective/kl: -6.279716491699219
ppo/returns/mean: 0.1194605752825737
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1719it [2:28:44,  3.14s/it]

objective/kl: -5.423986434936523
ppo/returns/mean: 0.1010383814573288
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1720it [2:28:47,  3.14s/it]

objective/kl: -5.9308180809021
ppo/returns/mean: 0.12211664766073227
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1721it [2:28:50,  3.13s/it]

objective/kl: -4.9834675788879395
ppo/returns/mean: 0.1358269900083542
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1722it [2:28:53,  3.14s/it]

objective/kl: -6.436747074127197
ppo/returns/mean: 0.1686885952949524
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1723it [2:28:57,  3.18s/it]

objective/kl: -4.548828125
ppo/returns/mean: 0.07945156842470169
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1724it [2:29:00,  3.17s/it]

objective/kl: -11.62644100189209
ppo/returns/mean: 0.22319337725639343
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1725it [2:29:03,  3.14s/it]

objective/kl: -15.214540481567383
ppo/returns/mean: 0.23213252425193787
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1726it [2:29:06,  3.12s/it]

objective/kl: -23.11793327331543
ppo/returns/mean: 0.35912010073661804
ppo/policy/advantages_mean: 1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


1727it [2:29:09,  3.12s/it]

objective/kl: -25.574539184570312
ppo/returns/mean: 0.357232928276062
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1728it [2:29:12,  3.12s/it]

objective/kl: -21.24850082397461
ppo/returns/mean: 0.2791063189506531
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1729it [2:29:15,  3.13s/it]

objective/kl: -17.962059020996094
ppo/returns/mean: 0.23610252141952515
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1730it [2:29:19,  3.13s/it]

objective/kl: -9.340261459350586
ppo/returns/mean: 0.08843778818845749
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1731it [2:29:22,  3.12s/it]

objective/kl: -9.211389541625977
ppo/returns/mean: 0.1445547640323639
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1732it [2:29:25,  3.11s/it]

objective/kl: -11.373184204101562
ppo/returns/mean: 0.2600042521953583
ppo/policy/advantages_mean: -6.332993507385254e-08
---------------------------------------------------------------------------------------------------


1733it [2:29:28,  3.23s/it]

objective/kl: -11.572694778442383
ppo/returns/mean: 0.31386667490005493
ppo/policy/advantages_mean: -7.078051567077637e-08
---------------------------------------------------------------------------------------------------


1734it [2:29:31,  3.19s/it]

objective/kl: -13.440021514892578
ppo/returns/mean: 0.39405643939971924
ppo/policy/advantages_mean: -4.6566128730773926e-08
---------------------------------------------------------------------------------------------------


1735it [2:29:34,  3.17s/it]

objective/kl: -14.730449676513672
ppo/returns/mean: 0.44946491718292236
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1736it [2:29:38,  3.14s/it]

objective/kl: -14.561721801757812
ppo/returns/mean: 0.46475544571876526
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


1737it [2:29:41,  3.12s/it]

objective/kl: -13.22398567199707
ppo/returns/mean: 0.43741971254348755
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1738it [2:29:44,  3.11s/it]

objective/kl: -11.109236717224121
ppo/returns/mean: 0.39559003710746765
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1739it [2:29:47,  3.10s/it]

objective/kl: -8.297080993652344
ppo/returns/mean: 0.2823644280433655
ppo/policy/advantages_mean: 2.3283064365386963e-08
---------------------------------------------------------------------------------------------------


1740it [2:29:50,  3.16s/it]

objective/kl: -4.738811492919922
ppo/returns/mean: 0.15489783883094788
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


1741it [2:29:53,  3.15s/it]

objective/kl: -0.444278359413147
ppo/returns/mean: -0.003001755103468895
ppo/policy/advantages_mean: -3.91155481338501e-08
---------------------------------------------------------------------------------------------------


1742it [2:29:56,  3.14s/it]

objective/kl: 0.7145928740501404
ppo/returns/mean: -0.046217430382966995
ppo/policy/advantages_mean: -0.00024506449699401855
---------------------------------------------------------------------------------------------------


1743it [2:30:00,  3.20s/it]

objective/kl: 4.9646806716918945
ppo/returns/mean: -0.2597377300262451
ppo/policy/advantages_mean: -0.0002122335135936737
---------------------------------------------------------------------------------------------------


1744it [2:30:03,  3.19s/it]

objective/kl: 7.622106552124023
ppo/returns/mean: -0.3830615282058716
ppo/policy/advantages_mean: -7.483805529773235e-05
---------------------------------------------------------------------------------------------------


1745it [2:30:06,  3.18s/it]

objective/kl: 8.563566207885742
ppo/returns/mean: -0.4561408460140228
ppo/policy/advantages_mean: 0.0022593140602111816
---------------------------------------------------------------------------------------------------


1746it [2:30:09,  3.18s/it]

objective/kl: 9.387399673461914
ppo/returns/mean: -0.48778533935546875
ppo/policy/advantages_mean: 0.0013724453747272491
---------------------------------------------------------------------------------------------------


1747it [2:30:12,  3.17s/it]

objective/kl: 9.29728889465332
ppo/returns/mean: -0.48774224519729614
ppo/policy/advantages_mean: -0.0015171170234680176
---------------------------------------------------------------------------------------------------


1748it [2:30:15,  3.16s/it]

objective/kl: 9.172760963439941
ppo/returns/mean: -0.5119300484657288
ppo/policy/advantages_mean: -3.166496753692627e-08
---------------------------------------------------------------------------------------------------


1749it [2:30:19,  3.18s/it]

objective/kl: 9.65654182434082
ppo/returns/mean: -0.4986726939678192
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1750it [2:30:22,  3.20s/it]

objective/kl: 9.00685977935791
ppo/returns/mean: -0.49845173954963684
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1751it [2:30:25,  3.18s/it]

objective/kl: 8.077932357788086
ppo/returns/mean: -0.45842546224594116
ppo/policy/advantages_mean: 0.0007139928638935089
---------------------------------------------------------------------------------------------------


1752it [2:30:28,  3.18s/it]

objective/kl: 6.939146041870117
ppo/returns/mean: -0.4119340479373932
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1753it [2:30:31,  3.16s/it]

objective/kl: 5.341117858886719
ppo/returns/mean: -0.34428274631500244
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1754it [2:30:34,  3.14s/it]

objective/kl: 4.554649353027344
ppo/returns/mean: -0.31712180376052856
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1755it [2:30:38,  3.15s/it]

objective/kl: 1.5839767456054688
ppo/returns/mean: -0.21401432156562805
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1756it [2:30:41,  3.14s/it]

objective/kl: 0.43047478795051575
ppo/returns/mean: -0.16452208161354065
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


1757it [2:30:44,  3.14s/it]

objective/kl: -1.3718574047088623
ppo/returns/mean: -0.11555713415145874
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


1758it [2:30:47,  3.12s/it]

objective/kl: -1.7372303009033203
ppo/returns/mean: -0.08232098817825317
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1759it [2:30:50,  3.13s/it]

objective/kl: -1.4195213317871094
ppo/returns/mean: -0.08926191926002502
ppo/policy/advantages_mean: -0.0027207881212234497
---------------------------------------------------------------------------------------------------


1760it [2:30:53,  3.12s/it]

objective/kl: -1.6024867296218872
ppo/returns/mean: -0.04217952489852905
ppo/policy/advantages_mean: -1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


1761it [2:30:56,  3.14s/it]

objective/kl: -2.322725296020508
ppo/returns/mean: -0.051452022045850754
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


1762it [2:31:00,  3.14s/it]

objective/kl: -1.3042737245559692
ppo/returns/mean: -0.07265813648700714
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1763it [2:31:03,  3.13s/it]

objective/kl: -0.2291499674320221
ppo/returns/mean: -0.09183911234140396
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1764it [2:31:06,  3.13s/it]

objective/kl: -0.6830554008483887
ppo/returns/mean: -0.09269796311855316
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1765it [2:31:09,  3.14s/it]

objective/kl: -4.2588348388671875
ppo/returns/mean: 0.022975334897637367
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1766it [2:31:12,  3.14s/it]

objective/kl: -5.736237525939941
ppo/returns/mean: 0.05523104965686798
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1767it [2:31:15,  3.13s/it]

objective/kl: -5.528321743011475
ppo/returns/mean: 0.09093280881643295
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1768it [2:31:18,  3.13s/it]

objective/kl: -6.355032920837402
ppo/returns/mean: 0.1243499368429184
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1769it [2:31:21,  3.12s/it]

objective/kl: -7.020524024963379
ppo/returns/mean: 0.15393301844596863
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1770it [2:31:25,  3.16s/it]

objective/kl: -6.877813816070557
ppo/returns/mean: 0.16642826795578003
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


1771it [2:31:28,  3.15s/it]

objective/kl: -6.720401763916016
ppo/returns/mean: 0.17120254039764404
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1772it [2:31:31,  3.15s/it]

objective/kl: -7.966195583343506
ppo/returns/mean: 0.21721473336219788
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1773it [2:31:34,  3.15s/it]

objective/kl: -7.910305500030518
ppo/returns/mean: 0.2113482654094696
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1774it [2:31:37,  3.21s/it]

objective/kl: -8.13563060760498
ppo/returns/mean: 0.24582865834236145
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1775it [2:31:41,  3.19s/it]

objective/kl: -8.651390075683594
ppo/returns/mean: 0.24987518787384033
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1776it [2:31:44,  3.19s/it]

objective/kl: -9.343769073486328
ppo/returns/mean: 0.28653740882873535
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1777it [2:31:47,  3.17s/it]

objective/kl: -9.270485877990723
ppo/returns/mean: 0.30423644185066223
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1778it [2:31:50,  3.15s/it]

objective/kl: -9.96255111694336
ppo/returns/mean: 0.2802743911743164
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


1779it [2:31:54,  3.40s/it]

objective/kl: -8.466962814331055
ppo/returns/mean: 0.27183687686920166
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1780it [2:31:57,  3.32s/it]

objective/kl: -7.120198726654053
ppo/returns/mean: 0.22196143865585327
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


1781it [2:32:00,  3.25s/it]

objective/kl: -6.548513889312744
ppo/returns/mean: 0.221237450838089
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1782it [2:32:03,  3.23s/it]

objective/kl: -6.466238975524902
ppo/returns/mean: 0.20168253779411316
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1783it [2:32:07,  3.20s/it]

objective/kl: -6.125014305114746
ppo/returns/mean: 0.1764632761478424
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


1784it [2:32:10,  3.16s/it]

objective/kl: -4.685215473175049
ppo/returns/mean: 0.11614494025707245
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1785it [2:32:13,  3.14s/it]

objective/kl: -3.9940786361694336
ppo/returns/mean: 0.07835795730352402
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1786it [2:32:16,  3.16s/it]

objective/kl: -4.025615692138672
ppo/returns/mean: 0.03273884579539299
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1787it [2:32:19,  3.14s/it]

objective/kl: -4.000962257385254
ppo/returns/mean: 0.026367468759417534
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1788it [2:32:22,  3.13s/it]

objective/kl: -4.47259521484375
ppo/returns/mean: 0.01789778470993042
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1789it [2:32:25,  3.12s/it]

objective/kl: -4.728016376495361
ppo/returns/mean: 0.03974822163581848
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1790it [2:32:28,  3.11s/it]

objective/kl: -4.987439155578613
ppo/returns/mean: 0.059526748955249786
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1791it [2:32:31,  3.11s/it]

objective/kl: -4.236954212188721
ppo/returns/mean: 0.004262142349034548
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1792it [2:32:35,  3.12s/it]

objective/kl: -4.917292594909668
ppo/returns/mean: 0.03331952169537544
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1793it [2:32:38,  3.12s/it]

objective/kl: -4.854362487792969
ppo/returns/mean: 0.004384330473840237
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1794it [2:32:41,  3.13s/it]

objective/kl: -6.1044158935546875
ppo/returns/mean: 0.05799061805009842
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1795it [2:32:44,  3.15s/it]

objective/kl: -6.6466569900512695
ppo/returns/mean: 0.07032695412635803
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1796it [2:32:47,  3.18s/it]

objective/kl: -7.282840251922607
ppo/returns/mean: 0.11732462048530579
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1797it [2:32:50,  3.16s/it]

objective/kl: -8.650508880615234
ppo/returns/mean: 0.16746002435684204
ppo/policy/advantages_mean: 1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


1798it [2:32:53,  3.15s/it]

objective/kl: -7.986833572387695
ppo/returns/mean: 0.16899511218070984
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1799it [2:32:57,  3.14s/it]

objective/kl: -8.727479934692383
ppo/returns/mean: 0.19292956590652466
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1800it [2:33:00,  3.13s/it]

objective/kl: -9.737979888916016
ppo/returns/mean: 0.2368502914905548
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


1801it [2:33:03,  3.13s/it]

objective/kl: -10.653989791870117
ppo/returns/mean: 0.25522327423095703
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


1802it [2:33:06,  3.13s/it]

objective/kl: -12.267395973205566
ppo/returns/mean: 0.3114490807056427
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1803it [2:33:09,  3.13s/it]

objective/kl: -13.638896942138672
ppo/returns/mean: 0.3913939595222473
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1804it [2:33:12,  3.13s/it]

objective/kl: -18.783079147338867
ppo/returns/mean: 0.5804255604743958
ppo/policy/advantages_mean: -3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


1805it [2:33:15,  3.13s/it]

objective/kl: -22.241867065429688
ppo/returns/mean: 0.7115992307662964
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1806it [2:33:19,  3.16s/it]

objective/kl: -21.876750946044922
ppo/returns/mean: 0.6730203628540039
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1807it [2:33:22,  3.14s/it]

objective/kl: -31.34325408935547
ppo/returns/mean: 1.0444037914276123
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


1808it [2:33:25,  3.13s/it]

objective/kl: -34.08580017089844
ppo/returns/mean: 1.1861026287078857
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


1809it [2:33:28,  3.12s/it]

objective/kl: -36.6546630859375
ppo/returns/mean: 1.4553358554840088
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1810it [2:33:31,  3.12s/it]

objective/kl: -42.81889724731445
ppo/returns/mean: 1.7039958238601685
ppo/policy/advantages_mean: -0.00013288483023643494
---------------------------------------------------------------------------------------------------


1811it [2:33:34,  3.18s/it]

objective/kl: -41.25740051269531
ppo/returns/mean: 1.7740063667297363
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1812it [2:33:37,  3.15s/it]

objective/kl: -36.29547119140625
ppo/returns/mean: 1.5251891613006592
ppo/policy/advantages_mean: 1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


1813it [2:33:40,  3.13s/it]

objective/kl: -36.77285385131836
ppo/returns/mean: 1.508903980255127
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1814it [2:33:44,  3.13s/it]

objective/kl: -38.03144836425781
ppo/returns/mean: 1.6006224155426025
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1815it [2:33:47,  3.13s/it]

objective/kl: -32.696372985839844
ppo/returns/mean: 1.3099746704101562
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


1816it [2:33:50,  3.17s/it]

objective/kl: -26.877660751342773
ppo/returns/mean: 1.0557507276535034
ppo/policy/advantages_mean: 0.0054505616426467896
---------------------------------------------------------------------------------------------------


1817it [2:33:53,  3.17s/it]

objective/kl: -28.377859115600586
ppo/returns/mean: 1.1595463752746582
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1818it [2:33:56,  3.16s/it]

objective/kl: -2.3096084594726562
ppo/returns/mean: 0.016290739178657532
ppo/policy/advantages_mean: 0.0011687744408845901
---------------------------------------------------------------------------------------------------


1819it [2:33:59,  3.16s/it]

objective/kl: 5.884228229522705
ppo/returns/mean: -0.268716037273407
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


1820it [2:34:03,  3.14s/it]

objective/kl: 16.481788635253906
ppo/returns/mean: -0.6853467226028442
ppo/policy/advantages_mean: 9.313225746154785e-10
---------------------------------------------------------------------------------------------------


1821it [2:34:06,  3.14s/it]

objective/kl: 23.625242233276367
ppo/returns/mean: -0.977680504322052
ppo/policy/advantages_mean: -4.467461258172989e-05
---------------------------------------------------------------------------------------------------


1822it [2:34:09,  3.25s/it]

objective/kl: 29.32025146484375
ppo/returns/mean: -1.181199073791504
ppo/policy/advantages_mean: -0.0001341458410024643
---------------------------------------------------------------------------------------------------


1823it [2:34:12,  3.23s/it]

objective/kl: 31.58901023864746
ppo/returns/mean: -1.2821334600448608
ppo/policy/advantages_mean: -0.00044465879909694195
---------------------------------------------------------------------------------------------------


1824it [2:34:16,  3.22s/it]

objective/kl: 30.60456085205078
ppo/returns/mean: -1.2648289203643799
ppo/policy/advantages_mean: 3.9956532418727875e-05
---------------------------------------------------------------------------------------------------


1825it [2:34:19,  3.18s/it]

objective/kl: 31.280000686645508
ppo/returns/mean: -1.294884443283081
ppo/policy/advantages_mean: 0.00011007674038410187
---------------------------------------------------------------------------------------------------


1826it [2:34:22,  3.16s/it]

objective/kl: 30.803152084350586
ppo/returns/mean: -1.2919275760650635
ppo/policy/advantages_mean: -1.487787812948227e-06
---------------------------------------------------------------------------------------------------


1827it [2:34:25,  3.22s/it]

objective/kl: 30.771995544433594
ppo/returns/mean: -1.3093938827514648
ppo/policy/advantages_mean: -0.0014866814017295837
---------------------------------------------------------------------------------------------------


1828it [2:34:28,  3.19s/it]

objective/kl: 30.428823471069336
ppo/returns/mean: -1.3114885091781616
ppo/policy/advantages_mean: 3.259629011154175e-09
---------------------------------------------------------------------------------------------------


1829it [2:34:31,  3.19s/it]

objective/kl: 30.511869430541992
ppo/returns/mean: -1.2993485927581787
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


1830it [2:34:35,  3.19s/it]

objective/kl: 29.70409393310547
ppo/returns/mean: -1.263035774230957
ppo/policy/advantages_mean: -8.989009074866772e-07
---------------------------------------------------------------------------------------------------


1831it [2:34:38,  3.18s/it]

objective/kl: 29.062780380249023
ppo/returns/mean: -1.2768372297286987
ppo/policy/advantages_mean: 3.405846655368805e-05
---------------------------------------------------------------------------------------------------


1832it [2:34:41,  3.16s/it]

objective/kl: 29.2415771484375
ppo/returns/mean: -1.270991563796997
ppo/policy/advantages_mean: -6.183981895446777e-07
---------------------------------------------------------------------------------------------------


1833it [2:34:44,  3.15s/it]

objective/kl: 29.944446563720703
ppo/returns/mean: -1.2651512622833252
ppo/policy/advantages_mean: -0.0012779468670487404
---------------------------------------------------------------------------------------------------


1834it [2:34:47,  3.16s/it]

objective/kl: 26.791017532348633
ppo/returns/mean: -1.2073255777359009
ppo/policy/advantages_mean: 4.937313497066498e-05
---------------------------------------------------------------------------------------------------


1835it [2:34:50,  3.16s/it]

objective/kl: 26.33414077758789
ppo/returns/mean: -1.2088907957077026
ppo/policy/advantages_mean: -0.000791783444583416
---------------------------------------------------------------------------------------------------


1836it [2:34:54,  3.34s/it]

objective/kl: 26.21073341369629
ppo/returns/mean: -1.1983163356781006
ppo/policy/advantages_mean: -0.0016150930896401405
---------------------------------------------------------------------------------------------------


1837it [2:34:57,  3.28s/it]

objective/kl: 22.55499267578125
ppo/returns/mean: -1.0659668445587158
ppo/policy/advantages_mean: 0.0006948411464691162
---------------------------------------------------------------------------------------------------


1838it [2:35:01,  3.26s/it]

objective/kl: 19.151596069335938
ppo/returns/mean: -0.9465123414993286
ppo/policy/advantages_mean: -0.0016969535499811172
---------------------------------------------------------------------------------------------------


1839it [2:35:04,  3.29s/it]

objective/kl: 8.723583221435547
ppo/returns/mean: -0.598500669002533
ppo/policy/advantages_mean: 0.006041951477527618
---------------------------------------------------------------------------------------------------


1840it [2:35:07,  3.25s/it]

objective/kl: -7.191879749298096
ppo/returns/mean: 0.17752179503440857
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1841it [2:35:10,  3.21s/it]

objective/kl: -4.890722274780273
ppo/returns/mean: 0.14813238382339478
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1842it [2:35:13,  3.19s/it]

objective/kl: -20.499847412109375
ppo/returns/mean: 0.7664819955825806
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1843it [2:35:16,  3.17s/it]

objective/kl: -38.120609283447266
ppo/returns/mean: 1.5235941410064697
ppo/policy/advantages_mean: 4.470348358154297e-08
---------------------------------------------------------------------------------------------------


1844it [2:35:19,  3.14s/it]

objective/kl: -36.23030471801758
ppo/returns/mean: 1.4656896591186523
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1845it [2:35:23,  3.12s/it]

objective/kl: -35.62126922607422
ppo/returns/mean: 1.4562530517578125
ppo/policy/advantages_mean: -3.073364496231079e-08
---------------------------------------------------------------------------------------------------


1846it [2:35:26,  3.17s/it]

objective/kl: -35.004364013671875
ppo/returns/mean: 1.4565147161483765
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1847it [2:35:29,  3.15s/it]

objective/kl: -34.8023567199707
ppo/returns/mean: 1.4664256572723389
ppo/policy/advantages_mean: -2.3283064365386963e-08
---------------------------------------------------------------------------------------------------


1848it [2:35:32,  3.14s/it]

objective/kl: -32.83073425292969
ppo/returns/mean: 1.443530797958374
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


1849it [2:35:35,  3.16s/it]

objective/kl: -31.372474670410156
ppo/returns/mean: 1.422236442565918
ppo/policy/advantages_mean: -1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


1850it [2:35:38,  3.14s/it]

objective/kl: -21.766185760498047
ppo/returns/mean: 1.0257885456085205
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1851it [2:35:41,  3.12s/it]

objective/kl: -0.7532048225402832
ppo/returns/mean: 0.0640089213848114
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1852it [2:35:45,  3.11s/it]

objective/kl: 11.649491310119629
ppo/returns/mean: -0.47225868701934814
ppo/policy/advantages_mean: -0.0013581328094005585
---------------------------------------------------------------------------------------------------


1853it [2:35:48,  3.10s/it]

objective/kl: 13.367919921875
ppo/returns/mean: -0.5736408233642578
ppo/policy/advantages_mean: -0.00013193674385547638
---------------------------------------------------------------------------------------------------


1854it [2:35:51,  3.17s/it]

objective/kl: 16.115787506103516
ppo/returns/mean: -0.7867704629898071
ppo/policy/advantages_mean: 0.006766037084162235
---------------------------------------------------------------------------------------------------


1855it [2:35:54,  3.16s/it]

objective/kl: 10.966451644897461
ppo/returns/mean: -0.6088680028915405
ppo/policy/advantages_mean: 0.0013822559267282486
---------------------------------------------------------------------------------------------------


1856it [2:35:57,  3.18s/it]

objective/kl: 13.526665687561035
ppo/returns/mean: -0.6983464956283569
ppo/policy/advantages_mean: 0.0007553361356258392
---------------------------------------------------------------------------------------------------


1857it [2:36:00,  3.14s/it]

objective/kl: 19.314790725708008
ppo/returns/mean: -0.9032394886016846
ppo/policy/advantages_mean: 4.6566128730773926e-08
---------------------------------------------------------------------------------------------------


1858it [2:36:03,  3.14s/it]

objective/kl: 20.33282470703125
ppo/returns/mean: -0.9705728888511658
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1859it [2:36:07,  3.12s/it]

objective/kl: 20.889904022216797
ppo/returns/mean: -1.0414085388183594
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


1860it [2:36:10,  3.11s/it]

objective/kl: 21.31194305419922
ppo/returns/mean: -1.0706063508987427
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


1861it [2:36:13,  3.10s/it]

objective/kl: 21.593589782714844
ppo/returns/mean: -1.0740182399749756
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1862it [2:36:16,  3.10s/it]

objective/kl: 21.701629638671875
ppo/returns/mean: -1.087911605834961
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


1863it [2:36:19,  3.11s/it]

objective/kl: 22.132719039916992
ppo/returns/mean: -1.0837819576263428
ppo/policy/advantages_mean: -2.421438694000244e-08
---------------------------------------------------------------------------------------------------


1864it [2:36:22,  3.14s/it]

objective/kl: 21.933732986450195
ppo/returns/mean: -1.0899139642715454
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1865it [2:36:25,  3.12s/it]

objective/kl: 21.714027404785156
ppo/returns/mean: -1.0802149772644043
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1866it [2:36:28,  3.12s/it]

objective/kl: 21.02775764465332
ppo/returns/mean: -1.0644805431365967
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1867it [2:36:31,  3.11s/it]

objective/kl: 22.285123825073242
ppo/returns/mean: -1.0718306303024292
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1868it [2:36:35,  3.13s/it]

objective/kl: 21.662973403930664
ppo/returns/mean: -1.0535756349563599
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


1869it [2:36:38,  3.10s/it]

objective/kl: 31.11294174194336
ppo/returns/mean: -1.3161022663116455
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1870it [2:36:41,  3.10s/it]

objective/kl: 22.087932586669922
ppo/returns/mean: -1.0785555839538574
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1871it [2:36:44,  3.10s/it]

objective/kl: 21.068119049072266
ppo/returns/mean: -1.0253732204437256
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1872it [2:36:47,  3.13s/it]

objective/kl: 26.957801818847656
ppo/returns/mean: -1.2165241241455078
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1873it [2:36:50,  3.12s/it]

objective/kl: 21.59824562072754
ppo/returns/mean: -1.0620453357696533
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1874it [2:36:53,  3.12s/it]

objective/kl: 21.512134552001953
ppo/returns/mean: -1.0545282363891602
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1875it [2:36:56,  3.12s/it]

objective/kl: 21.811058044433594
ppo/returns/mean: -1.077392816543579
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1876it [2:37:00,  3.13s/it]

objective/kl: 21.329612731933594
ppo/returns/mean: -1.0584427118301392
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1877it [2:37:03,  3.19s/it]

objective/kl: 21.861621856689453
ppo/returns/mean: -1.0814392566680908
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1878it [2:37:06,  3.24s/it]

objective/kl: 27.451412200927734
ppo/returns/mean: -1.2535130977630615
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1879it [2:37:09,  3.24s/it]

objective/kl: 22.177410125732422
ppo/returns/mean: -1.078681230545044
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1880it [2:37:13,  3.21s/it]

objective/kl: 22.141468048095703
ppo/returns/mean: -1.0712621212005615
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1881it [2:37:16,  3.19s/it]

objective/kl: 24.525848388671875
ppo/returns/mean: -1.1706798076629639
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1882it [2:37:19,  3.18s/it]

objective/kl: 22.228761672973633
ppo/returns/mean: -1.0883835554122925
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1883it [2:37:22,  3.17s/it]

objective/kl: 21.89775848388672
ppo/returns/mean: -1.0759501457214355
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1884it [2:37:25,  3.16s/it]

objective/kl: 22.64794158935547
ppo/returns/mean: -1.11598801612854
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1885it [2:37:28,  3.14s/it]

objective/kl: 22.548969268798828
ppo/returns/mean: -1.0960421562194824
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1886it [2:37:32,  3.20s/it]

objective/kl: 22.089160919189453
ppo/returns/mean: -1.0924530029296875
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1887it [2:37:35,  3.18s/it]

objective/kl: 20.56920623779297
ppo/returns/mean: -1.0307201147079468
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1888it [2:37:38,  3.15s/it]

objective/kl: 25.549480438232422
ppo/returns/mean: -1.1762986183166504
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


1889it [2:37:41,  3.14s/it]

objective/kl: 21.468854904174805
ppo/returns/mean: -1.054767370223999
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1890it [2:37:44,  3.13s/it]

objective/kl: 21.831722259521484
ppo/returns/mean: -1.068877935409546
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1891it [2:37:47,  3.12s/it]

objective/kl: 21.990436553955078
ppo/returns/mean: -1.0728905200958252
ppo/policy/advantages_mean: -2.514570951461792e-08
---------------------------------------------------------------------------------------------------


1892it [2:37:50,  3.16s/it]

objective/kl: 22.016223907470703
ppo/returns/mean: -1.0874724388122559
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1893it [2:37:54,  3.17s/it]

objective/kl: 21.260793685913086
ppo/returns/mean: -1.0728943347930908
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1894it [2:37:57,  3.15s/it]

objective/kl: 21.23238182067871
ppo/returns/mean: -1.040839672088623
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1895it [2:38:00,  3.14s/it]

objective/kl: 22.022890090942383
ppo/returns/mean: -1.057915210723877
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1896it [2:38:03,  3.14s/it]

objective/kl: 22.82796859741211
ppo/returns/mean: -1.108755350112915
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


1897it [2:38:06,  3.14s/it]

objective/kl: 22.725753784179688
ppo/returns/mean: -1.0971721410751343
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1898it [2:38:09,  3.13s/it]

objective/kl: 22.234107971191406
ppo/returns/mean: -1.0854252576828003
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1899it [2:38:12,  3.13s/it]

objective/kl: 22.159748077392578
ppo/returns/mean: -1.091116189956665
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1900it [2:38:15,  3.12s/it]

objective/kl: 21.96697235107422
ppo/returns/mean: -1.0771149396896362
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1901it [2:38:19,  3.12s/it]

objective/kl: 21.748554229736328
ppo/returns/mean: -1.081390142440796
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1902it [2:38:22,  3.10s/it]

objective/kl: 23.351886749267578
ppo/returns/mean: -1.1252670288085938
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1903it [2:38:25,  3.21s/it]

objective/kl: 20.444581985473633
ppo/returns/mean: -1.0441501140594482
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1904it [2:38:28,  3.20s/it]

objective/kl: 20.774944305419922
ppo/returns/mean: -1.059598445892334
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1905it [2:38:31,  3.17s/it]

objective/kl: 19.734663009643555
ppo/returns/mean: -1.017482042312622
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1906it [2:38:34,  3.14s/it]

objective/kl: 15.126955032348633
ppo/returns/mean: -0.8272300958633423
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


1907it [2:38:38,  3.15s/it]

objective/kl: 11.51811408996582
ppo/returns/mean: -0.698326051235199
ppo/policy/advantages_mean: 3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


1908it [2:38:41,  3.16s/it]

objective/kl: 8.383354187011719
ppo/returns/mean: -0.5356172323226929
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1909it [2:38:44,  3.15s/it]

objective/kl: 7.315945625305176
ppo/returns/mean: -0.491208553314209
ppo/policy/advantages_mean: 3.0586495995521545e-05
---------------------------------------------------------------------------------------------------


1910it [2:38:47,  3.15s/it]

objective/kl: 7.641757011413574
ppo/returns/mean: -0.4931414723396301
ppo/policy/advantages_mean: -0.0007965937256813049
---------------------------------------------------------------------------------------------------


1911it [2:38:50,  3.13s/it]

objective/kl: 5.608443737030029
ppo/returns/mean: -0.38851049542427063
ppo/policy/advantages_mean: 0.0010440610349178314
---------------------------------------------------------------------------------------------------


1912it [2:38:54,  3.25s/it]

objective/kl: 6.798885345458984
ppo/returns/mean: -0.42882490158081055
ppo/policy/advantages_mean: -0.0007825475186109543
---------------------------------------------------------------------------------------------------


1913it [2:38:57,  3.23s/it]

objective/kl: 5.049469947814941
ppo/returns/mean: -0.30849242210388184
ppo/policy/advantages_mean: -0.0013546422123908997
---------------------------------------------------------------------------------------------------


1914it [2:39:00,  3.23s/it]

objective/kl: 3.108231544494629
ppo/returns/mean: -0.24120470881462097
ppo/policy/advantages_mean: 0.0072846487164497375
---------------------------------------------------------------------------------------------------


1915it [2:39:03,  3.20s/it]

objective/kl: 0.4286428987979889
ppo/returns/mean: -0.06806425750255585
ppo/policy/advantages_mean: -1.1573545634746552e-05
---------------------------------------------------------------------------------------------------


1916it [2:39:06,  3.21s/it]

objective/kl: 1.6724531650543213
ppo/returns/mean: -0.07240985333919525
ppo/policy/advantages_mean: 0.0003720466047525406
---------------------------------------------------------------------------------------------------


1917it [2:39:10,  3.17s/it]

objective/kl: 1.7245185375213623
ppo/returns/mean: -0.06527495384216309
ppo/policy/advantages_mean: 8.568167686462402e-08
---------------------------------------------------------------------------------------------------


1918it [2:39:13,  3.19s/it]

objective/kl: 2.29941463470459
ppo/returns/mean: -0.07664081454277039
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1919it [2:39:16,  3.20s/it]

objective/kl: 2.1556854248046875
ppo/returns/mean: -0.09969420731067657
ppo/policy/advantages_mean: 6.705522537231445e-08
---------------------------------------------------------------------------------------------------


1920it [2:39:19,  3.19s/it]

objective/kl: 3.1324596405029297
ppo/returns/mean: -0.13359194993972778
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


1921it [2:39:22,  3.16s/it]

objective/kl: 2.1983633041381836
ppo/returns/mean: -0.1297273337841034
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1922it [2:39:25,  3.15s/it]

objective/kl: 3.1542277336120605
ppo/returns/mean: -0.14257729053497314
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1923it [2:39:28,  3.14s/it]

objective/kl: 4.104352951049805
ppo/returns/mean: -0.20947760343551636
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1924it [2:39:32,  3.14s/it]

objective/kl: 5.153512001037598
ppo/returns/mean: -0.24958667159080505
ppo/policy/advantages_mean: 6.51925802230835e-08
---------------------------------------------------------------------------------------------------


1925it [2:39:35,  3.12s/it]

objective/kl: 7.0082292556762695
ppo/returns/mean: -0.3256921172142029
ppo/policy/advantages_mean: 4.0978193283081055e-08
---------------------------------------------------------------------------------------------------


1926it [2:39:38,  3.13s/it]

objective/kl: 7.6434149742126465
ppo/returns/mean: -0.3953269422054291
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1927it [2:39:41,  3.15s/it]

objective/kl: 9.38365364074707
ppo/returns/mean: -0.4649757742881775
ppo/policy/advantages_mean: 3.91155481338501e-08
---------------------------------------------------------------------------------------------------


1928it [2:39:44,  3.15s/it]

objective/kl: 12.865248680114746
ppo/returns/mean: -0.6448339819908142
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1929it [2:39:48,  3.22s/it]

objective/kl: 15.75551986694336
ppo/returns/mean: -0.8110060691833496
ppo/policy/advantages_mean: -4.377216100692749e-08
---------------------------------------------------------------------------------------------------


1930it [2:39:51,  3.26s/it]

objective/kl: 14.44711685180664
ppo/returns/mean: -0.7715907096862793
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1931it [2:39:54,  3.23s/it]

objective/kl: 13.320024490356445
ppo/returns/mean: -0.7864603400230408
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1932it [2:39:57,  3.19s/it]

objective/kl: 13.162826538085938
ppo/returns/mean: -0.770573616027832
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1933it [2:40:00,  3.17s/it]

objective/kl: 12.483312606811523
ppo/returns/mean: -0.6779387593269348
ppo/policy/advantages_mean: -0.00047312863171100616
---------------------------------------------------------------------------------------------------


1934it [2:40:04,  3.22s/it]

objective/kl: 14.75185775756836
ppo/returns/mean: -0.6565374732017517
ppo/policy/advantages_mean: 0.0007611773908138275
---------------------------------------------------------------------------------------------------


1935it [2:40:07,  3.21s/it]

objective/kl: 10.061053276062012
ppo/returns/mean: -0.7019240856170654
ppo/policy/advantages_mean: 0.0005048587918281555
---------------------------------------------------------------------------------------------------


1936it [2:40:10,  3.18s/it]

objective/kl: 10.314906120300293
ppo/returns/mean: -0.7267434000968933
ppo/policy/advantages_mean: 0.00041696615517139435
---------------------------------------------------------------------------------------------------


1937it [2:40:13,  3.16s/it]

objective/kl: 11.483442306518555
ppo/returns/mean: -0.7831683158874512
ppo/policy/advantages_mean: 0.0008323527872562408
---------------------------------------------------------------------------------------------------


1938it [2:40:16,  3.15s/it]

objective/kl: 10.46006965637207
ppo/returns/mean: -0.7515017986297607
ppo/policy/advantages_mean: 0.002511207014322281
---------------------------------------------------------------------------------------------------


1939it [2:40:19,  3.14s/it]

objective/kl: 11.77633285522461
ppo/returns/mean: -0.7677944898605347
ppo/policy/advantages_mean: -0.0008707307279109955
---------------------------------------------------------------------------------------------------


1940it [2:40:22,  3.14s/it]

objective/kl: 9.499444961547852
ppo/returns/mean: -0.7046059966087341
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1941it [2:40:26,  3.16s/it]

objective/kl: 8.768624305725098
ppo/returns/mean: -0.6621819138526917
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1942it [2:40:29,  3.23s/it]

objective/kl: 7.881013870239258
ppo/returns/mean: -0.6263713240623474
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1943it [2:40:32,  3.21s/it]

objective/kl: 8.224529266357422
ppo/returns/mean: -0.6071006655693054
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


1944it [2:40:35,  3.17s/it]

objective/kl: 6.552437782287598
ppo/returns/mean: -0.5466177463531494
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1945it [2:40:39,  3.34s/it]

objective/kl: 5.245218276977539
ppo/returns/mean: -0.4959060251712799
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1946it [2:40:42,  3.27s/it]

objective/kl: 5.651398658752441
ppo/returns/mean: -0.48490363359451294
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


1947it [2:40:45,  3.22s/it]

objective/kl: 5.890538215637207
ppo/returns/mean: -0.48496952652931213
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1948it [2:40:48,  3.18s/it]

objective/kl: 5.438094139099121
ppo/returns/mean: -0.4469466209411621
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1949it [2:40:52,  3.24s/it]

objective/kl: 6.623432636260986
ppo/returns/mean: -0.48830047249794006
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1950it [2:40:55,  3.24s/it]

objective/kl: 5.356442451477051
ppo/returns/mean: -0.4479111135005951
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1951it [2:40:58,  3.25s/it]

objective/kl: 6.64202356338501
ppo/returns/mean: -0.48609471321105957
ppo/policy/advantages_mean: -1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


1952it [2:41:01,  3.21s/it]

objective/kl: 7.279361724853516
ppo/returns/mean: -0.5159111022949219
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1953it [2:41:05,  3.21s/it]

objective/kl: 7.96549129486084
ppo/returns/mean: -0.5468118786811829
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1954it [2:41:08,  3.18s/it]

objective/kl: 7.444019317626953
ppo/returns/mean: -0.5341589450836182
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1955it [2:41:11,  3.15s/it]

objective/kl: 7.7888360023498535
ppo/returns/mean: -0.5531545877456665
ppo/policy/advantages_mean: -0.0007394663989543915
---------------------------------------------------------------------------------------------------


1956it [2:41:14,  3.14s/it]

objective/kl: 6.288135528564453
ppo/returns/mean: -0.5410285592079163
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1957it [2:41:17,  3.16s/it]

objective/kl: 7.467062473297119
ppo/returns/mean: -0.5884745717048645
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1958it [2:41:20,  3.21s/it]

objective/kl: 7.4691481590271
ppo/returns/mean: -0.6137533187866211
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


1959it [2:41:23,  3.17s/it]

objective/kl: 5.487739562988281
ppo/returns/mean: -0.5335094332695007
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1960it [2:41:27,  3.15s/it]

objective/kl: 5.155644416809082
ppo/returns/mean: -0.514968752861023
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


1961it [2:41:30,  3.13s/it]

objective/kl: 5.823892593383789
ppo/returns/mean: -0.5459873080253601
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


1962it [2:41:33,  3.18s/it]

objective/kl: 5.312537670135498
ppo/returns/mean: -0.5456032752990723
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1963it [2:41:36,  3.16s/it]

objective/kl: 2.819202423095703
ppo/returns/mean: -0.4165441393852234
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1964it [2:41:39,  3.15s/it]

objective/kl: 2.3929500579833984
ppo/returns/mean: -0.3690129816532135
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


1965it [2:41:42,  3.14s/it]

objective/kl: 2.663335084915161
ppo/returns/mean: -0.3745729625225067
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1966it [2:41:45,  3.14s/it]

objective/kl: 3.040966033935547
ppo/returns/mean: -0.36581695079803467
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


1967it [2:41:49,  3.19s/it]

objective/kl: 2.795311450958252
ppo/returns/mean: -0.33173927664756775
ppo/policy/advantages_mean: -0.003687456250190735
---------------------------------------------------------------------------------------------------


1968it [2:41:52,  3.21s/it]

objective/kl: 3.7492949962615967
ppo/returns/mean: -0.3445887565612793
ppo/policy/advantages_mean: -0.00032158009707927704
---------------------------------------------------------------------------------------------------


1969it [2:41:55,  3.22s/it]

objective/kl: 3.761953353881836
ppo/returns/mean: -0.3412054181098938
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1970it [2:41:58,  3.19s/it]

objective/kl: 4.170958518981934
ppo/returns/mean: -0.33673372864723206
ppo/policy/advantages_mean: 0.001830562949180603
---------------------------------------------------------------------------------------------------


1971it [2:42:02,  3.25s/it]

objective/kl: 5.040467262268066
ppo/returns/mean: -0.352276474237442
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


1972it [2:42:05,  3.21s/it]

objective/kl: 4.279529571533203
ppo/returns/mean: -0.33164650201797485
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1973it [2:42:08,  3.17s/it]

objective/kl: 3.2986202239990234
ppo/returns/mean: -0.29544907808303833
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


1974it [2:42:11,  3.15s/it]

objective/kl: 4.121760368347168
ppo/returns/mean: -0.3364986181259155
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1975it [2:42:14,  3.13s/it]

objective/kl: 3.3263392448425293
ppo/returns/mean: -0.305417537689209
ppo/policy/advantages_mean: -3.073364496231079e-08
---------------------------------------------------------------------------------------------------


1976it [2:42:17,  3.11s/it]

objective/kl: 2.3289084434509277
ppo/returns/mean: -0.294259637594223
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1977it [2:42:20,  3.10s/it]

objective/kl: 2.3195436000823975
ppo/returns/mean: -0.3122238516807556
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1978it [2:42:23,  3.10s/it]

objective/kl: 2.1343538761138916
ppo/returns/mean: -0.3019459843635559
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1979it [2:42:26,  3.09s/it]

objective/kl: 2.1291956901550293
ppo/returns/mean: -0.30169954895973206
ppo/policy/advantages_mean: 0.00040232017636299133
---------------------------------------------------------------------------------------------------


1980it [2:42:30,  3.08s/it]

objective/kl: 2.4986586570739746
ppo/returns/mean: -0.3015451729297638
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1981it [2:42:33,  3.09s/it]

objective/kl: 2.9217729568481445
ppo/returns/mean: -0.3104458749294281
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


1982it [2:42:36,  3.10s/it]

objective/kl: 2.924274444580078
ppo/returns/mean: -0.32182037830352783
ppo/policy/advantages_mean: 0.00039440393447875977
---------------------------------------------------------------------------------------------------


1983it [2:42:39,  3.10s/it]

objective/kl: 2.848172187805176
ppo/returns/mean: -0.30083608627319336
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


1984it [2:42:42,  3.09s/it]

objective/kl: 3.5468740463256836
ppo/returns/mean: -0.3203742504119873
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1985it [2:42:45,  3.09s/it]

objective/kl: 4.187084674835205
ppo/returns/mean: -0.3456547260284424
ppo/policy/advantages_mean: -0.0010730456560850143
---------------------------------------------------------------------------------------------------


1986it [2:42:48,  3.10s/it]

objective/kl: 4.009284019470215
ppo/returns/mean: -0.3432249426841736
ppo/policy/advantages_mean: -0.000922422856092453
---------------------------------------------------------------------------------------------------


1987it [2:42:51,  3.10s/it]

objective/kl: 4.58676815032959
ppo/returns/mean: -0.36742308735847473
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1988it [2:42:54,  3.09s/it]

objective/kl: 4.304723739624023
ppo/returns/mean: -0.35484808683395386
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


1989it [2:42:57,  3.08s/it]

objective/kl: 4.397692680358887
ppo/returns/mean: -0.36705854535102844
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


1990it [2:43:00,  3.08s/it]

objective/kl: 2.9809131622314453
ppo/returns/mean: -0.30799371004104614
ppo/policy/advantages_mean: 0.00198262557387352
---------------------------------------------------------------------------------------------------


1991it [2:43:04,  3.10s/it]

objective/kl: 3.7503983974456787
ppo/returns/mean: -0.31834766268730164
ppo/policy/advantages_mean: -0.00014643371105194092
---------------------------------------------------------------------------------------------------


1992it [2:43:07,  3.09s/it]

objective/kl: 3.690762996673584
ppo/returns/mean: -0.3250107765197754
ppo/policy/advantages_mean: -0.0010102558881044388
---------------------------------------------------------------------------------------------------


1993it [2:43:10,  3.21s/it]

objective/kl: 2.9879579544067383
ppo/returns/mean: -0.27989885210990906
ppo/policy/advantages_mean: 3.748852759599686e-05
---------------------------------------------------------------------------------------------------


1994it [2:43:13,  3.19s/it]

objective/kl: 3.723238468170166
ppo/returns/mean: -0.31362172961235046
ppo/policy/advantages_mean: 0.0017258748412132263
---------------------------------------------------------------------------------------------------


1995it [2:43:16,  3.16s/it]

objective/kl: 2.032744884490967
ppo/returns/mean: -0.21566225588321686
ppo/policy/advantages_mean: 0.00182279571890831
---------------------------------------------------------------------------------------------------


1996it [2:43:20,  3.23s/it]

objective/kl: 2.180671215057373
ppo/returns/mean: -0.20492230355739594
ppo/policy/advantages_mean: 0.0011618360877037048
---------------------------------------------------------------------------------------------------


1997it [2:43:23,  3.23s/it]

objective/kl: 1.8677629232406616
ppo/returns/mean: -0.2028866708278656
ppo/policy/advantages_mean: -0.0014722030609846115
---------------------------------------------------------------------------------------------------


1998it [2:43:26,  3.25s/it]

objective/kl: 3.175811767578125
ppo/returns/mean: -0.25553226470947266
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


1999it [2:43:29,  3.19s/it]

objective/kl: 2.845487117767334
ppo/returns/mean: -0.2462618350982666
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2000it [2:43:32,  3.16s/it]

objective/kl: 3.060204029083252
ppo/returns/mean: -0.2372225821018219
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2001it [2:43:35,  3.13s/it]

objective/kl: 3.7895936965942383
ppo/returns/mean: -0.30400463938713074
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2002it [2:43:40,  3.50s/it]

objective/kl: 3.208151340484619
ppo/returns/mean: -0.27446985244750977
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2003it [2:43:43,  3.42s/it]

objective/kl: 4.70276403427124
ppo/returns/mean: -0.3319442570209503
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2004it [2:43:46,  3.31s/it]

objective/kl: 4.825984477996826
ppo/returns/mean: -0.3256855607032776
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2005it [2:43:49,  3.32s/it]

objective/kl: 5.995180606842041
ppo/returns/mean: -0.35662490129470825
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2006it [2:43:53,  3.34s/it]

objective/kl: 5.582304000854492
ppo/returns/mean: -0.37180525064468384
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2007it [2:43:56,  3.27s/it]

objective/kl: 6.190482139587402
ppo/returns/mean: -0.4183399975299835
ppo/policy/advantages_mean: -3.91155481338501e-08
---------------------------------------------------------------------------------------------------


2008it [2:43:59,  3.25s/it]

objective/kl: 5.859898090362549
ppo/returns/mean: -0.394304096698761
ppo/policy/advantages_mean: -1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


2009it [2:44:03,  3.36s/it]

objective/kl: 5.762749671936035
ppo/returns/mean: -0.41051092743873596
ppo/policy/advantages_mean: -0.0007906071841716766
---------------------------------------------------------------------------------------------------


2010it [2:44:06,  3.35s/it]

objective/kl: 4.9467668533325195
ppo/returns/mean: -0.3793124258518219
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2011it [2:44:09,  3.28s/it]

objective/kl: 5.685636043548584
ppo/returns/mean: -0.3871169686317444
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2012it [2:44:12,  3.23s/it]

objective/kl: 5.500959873199463
ppo/returns/mean: -0.40412619709968567
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2013it [2:44:15,  3.19s/it]

objective/kl: 5.0154266357421875
ppo/returns/mean: -0.36723461747169495
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2014it [2:44:19,  3.17s/it]

objective/kl: 4.375666618347168
ppo/returns/mean: -0.3306739330291748
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2015it [2:44:22,  3.16s/it]

objective/kl: 4.071061611175537
ppo/returns/mean: -0.3314552903175354
ppo/policy/advantages_mean: -3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


2016it [2:44:25,  3.15s/it]

objective/kl: 2.6961729526519775
ppo/returns/mean: -0.2376757562160492
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2017it [2:44:28,  3.14s/it]

objective/kl: 2.2225663661956787
ppo/returns/mean: -0.20765221118927002
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2018it [2:44:31,  3.13s/it]

objective/kl: 4.438751220703125
ppo/returns/mean: -0.3040899634361267
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2019it [2:44:34,  3.14s/it]

objective/kl: 3.2104616165161133
ppo/returns/mean: -0.2760188579559326
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2020it [2:44:37,  3.14s/it]

objective/kl: 2.9582831859588623
ppo/returns/mean: -0.21341249346733093
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2021it [2:44:40,  3.13s/it]

objective/kl: 1.705182671546936
ppo/returns/mean: -0.1711147278547287
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2022it [2:44:44,  3.13s/it]

objective/kl: -0.431081622838974
ppo/returns/mean: -0.11339281499385834
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2023it [2:44:47,  3.12s/it]

objective/kl: -0.3150884509086609
ppo/returns/mean: -0.09572835266590118
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2024it [2:44:50,  3.12s/it]

objective/kl: -1.6989152431488037
ppo/returns/mean: -0.017642270773649216
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2025it [2:44:53,  3.11s/it]

objective/kl: 0.467004656791687
ppo/returns/mean: -0.06969913840293884
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2026it [2:44:56,  3.12s/it]

objective/kl: 2.0813822746276855
ppo/returns/mean: -0.10369247198104858
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2027it [2:44:59,  3.11s/it]

objective/kl: -0.03506952524185181
ppo/returns/mean: -0.03537416830658913
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2028it [2:45:02,  3.12s/it]

objective/kl: -0.7033387422561646
ppo/returns/mean: -0.023100169375538826
ppo/policy/advantages_mean: 0.0002770237624645233
---------------------------------------------------------------------------------------------------


2029it [2:45:05,  3.12s/it]

objective/kl: 1.3666472434997559
ppo/returns/mean: -0.08325187861919403
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2030it [2:45:09,  3.12s/it]

objective/kl: 1.6491554975509644
ppo/returns/mean: -0.10135017335414886
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2031it [2:45:12,  3.11s/it]

objective/kl: 2.6261396408081055
ppo/returns/mean: -0.13493052124977112
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2032it [2:45:15,  3.17s/it]

objective/kl: -0.3803732395172119
ppo/returns/mean: -0.014114147052168846
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2033it [2:45:18,  3.13s/it]

objective/kl: -0.028299987316131592
ppo/returns/mean: -0.030414149165153503
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2034it [2:45:21,  3.22s/it]

objective/kl: -1.5078390836715698
ppo/returns/mean: 0.0018966477364301682
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2035it [2:45:24,  3.16s/it]

objective/kl: -1.6223034858703613
ppo/returns/mean: 0.026435211300849915
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2036it [2:45:27,  3.13s/it]

objective/kl: 0.04018115997314453
ppo/returns/mean: -0.052805230021476746
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2037it [2:45:31,  3.11s/it]

objective/kl: -0.19106464087963104
ppo/returns/mean: -0.0493704229593277
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2038it [2:45:34,  3.10s/it]

objective/kl: 0.2299463152885437
ppo/returns/mean: -0.07798497378826141
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2039it [2:45:37,  3.09s/it]

objective/kl: 0.6513906717300415
ppo/returns/mean: -0.11198422312736511
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2040it [2:45:40,  3.08s/it]

objective/kl: -0.6196339130401611
ppo/returns/mean: -0.06483536958694458
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2041it [2:45:43,  3.09s/it]

objective/kl: -9.265045166015625
ppo/returns/mean: 0.22797179222106934
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


2042it [2:45:46,  3.11s/it]

objective/kl: -6.6307454109191895
ppo/returns/mean: 0.15455472469329834
ppo/policy/advantages_mean: 5.21540641784668e-08
---------------------------------------------------------------------------------------------------


2043it [2:45:49,  3.12s/it]

objective/kl: -3.840859889984131
ppo/returns/mean: 0.0970480740070343
ppo/policy/advantages_mean: 2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


2044it [2:45:52,  3.12s/it]

objective/kl: -3.9253642559051514
ppo/returns/mean: 0.14461149275302887
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2045it [2:45:55,  3.13s/it]

objective/kl: -0.32001709938049316
ppo/returns/mean: 0.013714834116399288
ppo/policy/advantages_mean: 0.0012267827987670898
---------------------------------------------------------------------------------------------------


2046it [2:45:59,  3.13s/it]

objective/kl: -1.2282826900482178
ppo/returns/mean: 0.03401434049010277
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2047it [2:46:02,  3.11s/it]

objective/kl: -0.9547147750854492
ppo/returns/mean: 0.05977601930499077
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2048it [2:46:05,  3.17s/it]

objective/kl: -2.481700897216797
ppo/returns/mean: 0.13110938668251038
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2049it [2:46:08,  3.14s/it]

objective/kl: -3.116868019104004
ppo/returns/mean: 0.15984222292900085
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2050it [2:46:11,  3.13s/it]

objective/kl: -2.349403142929077
ppo/returns/mean: 0.1289651095867157
ppo/policy/advantages_mean: 1.955777406692505e-08
---------------------------------------------------------------------------------------------------


2051it [2:46:14,  3.11s/it]

objective/kl: -4.427666187286377
ppo/returns/mean: 0.19917580485343933
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2052it [2:46:17,  3.09s/it]

objective/kl: -1.4253928661346436
ppo/returns/mean: 0.09475193917751312
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2053it [2:46:20,  3.11s/it]

objective/kl: -1.8729755878448486
ppo/returns/mean: 0.10029385983943939
ppo/policy/advantages_mean: -9.313225746154785e-10
---------------------------------------------------------------------------------------------------


2054it [2:46:23,  3.10s/it]

objective/kl: -2.836895704269409
ppo/returns/mean: 0.13935904204845428
ppo/policy/advantages_mean: 9.092316031455994e-05
---------------------------------------------------------------------------------------------------


2055it [2:46:27,  3.09s/it]

objective/kl: -2.7128634452819824
ppo/returns/mean: 0.1444346010684967
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2056it [2:46:30,  3.10s/it]

objective/kl: -2.998476505279541
ppo/returns/mean: 0.1547817587852478
ppo/policy/advantages_mean: -0.0008222311735153198
---------------------------------------------------------------------------------------------------


2057it [2:46:33,  3.09s/it]

objective/kl: -4.3505964279174805
ppo/returns/mean: 0.2342267632484436
ppo/policy/advantages_mean: -0.0001416318118572235
---------------------------------------------------------------------------------------------------


2058it [2:46:36,  3.09s/it]

objective/kl: -6.232865333557129
ppo/returns/mean: 0.30750396847724915
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2059it [2:46:39,  3.11s/it]

objective/kl: -6.358659744262695
ppo/returns/mean: 0.3251020312309265
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2060it [2:46:42,  3.10s/it]

objective/kl: -10.069771766662598
ppo/returns/mean: 0.5111430883407593
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2061it [2:46:45,  3.12s/it]

objective/kl: -14.175338745117188
ppo/returns/mean: 0.6710765361785889
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2062it [2:46:48,  3.12s/it]

objective/kl: -14.109721183776855
ppo/returns/mean: 0.7101094126701355
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2063it [2:46:51,  3.11s/it]

objective/kl: -15.537883758544922
ppo/returns/mean: 0.7627044916152954
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2064it [2:46:55,  3.11s/it]

objective/kl: -15.222684860229492
ppo/returns/mean: 0.7422234416007996
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2065it [2:46:58,  3.10s/it]

objective/kl: -15.462362289428711
ppo/returns/mean: 0.776500940322876
ppo/policy/advantages_mean: 0.0018278881907463074
---------------------------------------------------------------------------------------------------


2066it [2:47:01,  3.12s/it]

objective/kl: -15.961454391479492
ppo/returns/mean: 0.7593035697937012
ppo/policy/advantages_mean: 0.0045552924275398254
---------------------------------------------------------------------------------------------------


2067it [2:47:04,  3.12s/it]

objective/kl: -16.59183120727539
ppo/returns/mean: 0.8312795162200928
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2068it [2:47:07,  3.12s/it]

objective/kl: -19.02914810180664
ppo/returns/mean: 0.9225989580154419
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


2069it [2:47:10,  3.11s/it]

objective/kl: -17.744476318359375
ppo/returns/mean: 0.9171954393386841
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2070it [2:47:13,  3.11s/it]

objective/kl: -19.908950805664062
ppo/returns/mean: 0.9805501103401184
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2071it [2:47:16,  3.09s/it]

objective/kl: -18.07015037536621
ppo/returns/mean: 0.9216194748878479
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2072it [2:47:19,  3.13s/it]

objective/kl: -17.518104553222656
ppo/returns/mean: 0.9095414876937866
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2073it [2:47:23,  3.16s/it]

objective/kl: -17.487789154052734
ppo/returns/mean: 0.8895868062973022
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2074it [2:47:26,  3.14s/it]

objective/kl: -17.11440658569336
ppo/returns/mean: 0.8904843330383301
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2075it [2:47:29,  3.13s/it]

objective/kl: -12.696981430053711
ppo/returns/mean: 0.6512854099273682
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2076it [2:47:32,  3.15s/it]

objective/kl: 2.2697157859802246
ppo/returns/mean: -0.07984380424022675
ppo/policy/advantages_mean: 6.891787052154541e-08
---------------------------------------------------------------------------------------------------


2077it [2:47:35,  3.16s/it]

objective/kl: 11.524946212768555
ppo/returns/mean: -0.4868185520172119
ppo/policy/advantages_mean: -0.0014292709529399872
---------------------------------------------------------------------------------------------------


2078it [2:47:38,  3.14s/it]

objective/kl: -0.03768599033355713
ppo/returns/mean: -0.02049238793551922
ppo/policy/advantages_mean: 0.0007541626691818237
---------------------------------------------------------------------------------------------------


2079it [2:47:42,  3.15s/it]

objective/kl: -9.511430740356445
ppo/returns/mean: 0.19201278686523438
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2080it [2:47:45,  3.15s/it]

objective/kl: -10.536452293395996
ppo/returns/mean: 0.2608884572982788
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2081it [2:47:48,  3.18s/it]

objective/kl: -22.53191375732422
ppo/returns/mean: 0.610539436340332
ppo/policy/advantages_mean: 0.000332757830619812
---------------------------------------------------------------------------------------------------


2082it [2:47:51,  3.19s/it]

objective/kl: -37.30882263183594
ppo/returns/mean: 1.0714082717895508
ppo/policy/advantages_mean: 3.166496753692627e-08
---------------------------------------------------------------------------------------------------


2083it [2:47:54,  3.19s/it]

objective/kl: -45.71647262573242
ppo/returns/mean: 1.6035594940185547
ppo/policy/advantages_mean: -0.0018015727400779724
---------------------------------------------------------------------------------------------------


2084it [2:47:57,  3.17s/it]

objective/kl: -53.25819778442383
ppo/returns/mean: 2.0678012371063232
ppo/policy/advantages_mean: -0.0006602248176932335
---------------------------------------------------------------------------------------------------


2085it [2:48:01,  3.14s/it]

objective/kl: -51.52397155761719
ppo/returns/mean: 2.0314455032348633
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2086it [2:48:04,  3.20s/it]

objective/kl: -44.41905212402344
ppo/returns/mean: 1.688971757888794
ppo/policy/advantages_mean: -3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


2087it [2:48:07,  3.17s/it]

objective/kl: -53.17595291137695
ppo/returns/mean: 2.0858638286590576
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


2088it [2:48:10,  3.15s/it]

objective/kl: -55.70451354980469
ppo/returns/mean: 2.1880862712860107
ppo/policy/advantages_mean: 1.955777406692505e-08
---------------------------------------------------------------------------------------------------


2089it [2:48:13,  3.13s/it]

objective/kl: -59.04270935058594
ppo/returns/mean: 2.3178248405456543
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2090it [2:48:16,  3.13s/it]

objective/kl: -60.46438217163086
ppo/returns/mean: 2.4413015842437744
ppo/policy/advantages_mean: -9.313225746154785e-10
---------------------------------------------------------------------------------------------------


2091it [2:48:19,  3.13s/it]

objective/kl: -61.956138610839844
ppo/returns/mean: 2.49975323677063
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2092it [2:48:23,  3.16s/it]

objective/kl: -61.00640869140625
ppo/returns/mean: 2.469372510910034
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2093it [2:48:26,  3.14s/it]

objective/kl: -61.00689697265625
ppo/returns/mean: 2.4583423137664795
ppo/policy/advantages_mean: -6.51925802230835e-09
---------------------------------------------------------------------------------------------------


2094it [2:48:29,  3.19s/it]

objective/kl: -62.417633056640625
ppo/returns/mean: 2.5262203216552734
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2095it [2:48:32,  3.18s/it]

objective/kl: -60.562286376953125
ppo/returns/mean: 2.473987102508545
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2096it [2:48:35,  3.16s/it]

objective/kl: -60.481536865234375
ppo/returns/mean: 2.4227585792541504
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2097it [2:48:38,  3.16s/it]

objective/kl: -64.1324462890625
ppo/returns/mean: 2.483377456665039
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2098it [2:48:42,  3.13s/it]

objective/kl: -61.43359375
ppo/returns/mean: 2.4408130645751953
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2099it [2:48:45,  3.24s/it]

objective/kl: -56.257022857666016
ppo/returns/mean: 2.1564197540283203
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2100it [2:48:48,  3.24s/it]

objective/kl: -46.336387634277344
ppo/returns/mean: 1.5693638324737549
ppo/policy/advantages_mean: 4.0978193283081055e-08
---------------------------------------------------------------------------------------------------


2101it [2:48:51,  3.20s/it]

objective/kl: -18.40997886657715
ppo/returns/mean: 0.5313029289245605
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2102it [2:48:55,  3.18s/it]

objective/kl: 6.276607990264893
ppo/returns/mean: -0.3037354052066803
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2103it [2:48:58,  3.17s/it]

objective/kl: 12.101853370666504
ppo/returns/mean: -0.5379081964492798
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2104it [2:49:01,  3.20s/it]

objective/kl: 19.57440948486328
ppo/returns/mean: -0.8117484450340271
ppo/policy/advantages_mean: -0.00020553776994347572
---------------------------------------------------------------------------------------------------


2105it [2:49:04,  3.18s/it]

objective/kl: 30.155261993408203
ppo/returns/mean: -1.208540678024292
ppo/policy/advantages_mean: -1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


2106it [2:49:07,  3.22s/it]

objective/kl: 34.774505615234375
ppo/returns/mean: -1.3118808269500732
ppo/policy/advantages_mean: 0.0001945924013853073
---------------------------------------------------------------------------------------------------


2107it [2:49:11,  3.24s/it]

objective/kl: 37.338600158691406
ppo/returns/mean: -1.4358993768692017
ppo/policy/advantages_mean: -0.0020110206678509712
---------------------------------------------------------------------------------------------------


2108it [2:49:14,  3.24s/it]

objective/kl: 35.93256759643555
ppo/returns/mean: -1.3951449394226074
ppo/policy/advantages_mean: -0.0005324464291334152
---------------------------------------------------------------------------------------------------


2109it [2:49:17,  3.23s/it]

objective/kl: 33.68090057373047
ppo/returns/mean: -1.3300602436065674
ppo/policy/advantages_mean: 0.00022643711417913437
---------------------------------------------------------------------------------------------------


2110it [2:49:20,  3.23s/it]

objective/kl: 35.96691131591797
ppo/returns/mean: -1.439929723739624
ppo/policy/advantages_mean: 0.0003108931705355644
---------------------------------------------------------------------------------------------------


2111it [2:49:23,  3.21s/it]

objective/kl: 33.297183990478516
ppo/returns/mean: -1.3625379800796509
ppo/policy/advantages_mean: -0.0005757026374340057
---------------------------------------------------------------------------------------------------


2112it [2:49:27,  3.20s/it]

objective/kl: 29.53643226623535
ppo/returns/mean: -1.0455421209335327
ppo/policy/advantages_mean: 9.584822691977024e-06
---------------------------------------------------------------------------------------------------


2113it [2:49:30,  3.33s/it]

objective/kl: 29.686687469482422
ppo/returns/mean: -1.0995888710021973
ppo/policy/advantages_mean: 0.0026998184621334076
---------------------------------------------------------------------------------------------------


2114it [2:49:34,  3.37s/it]

objective/kl: 32.90824890136719
ppo/returns/mean: -1.273646593093872
ppo/policy/advantages_mean: -0.0010071750730276108
---------------------------------------------------------------------------------------------------


2115it [2:49:37,  3.34s/it]

objective/kl: 28.915470123291016
ppo/returns/mean: -1.1227684020996094
ppo/policy/advantages_mean: 0.004870560020208359
---------------------------------------------------------------------------------------------------


2116it [2:49:40,  3.29s/it]

objective/kl: 23.079910278320312
ppo/returns/mean: -0.8125605583190918
ppo/policy/advantages_mean: 0.0006276555359363556
---------------------------------------------------------------------------------------------------


2117it [2:49:43,  3.25s/it]

objective/kl: 20.008514404296875
ppo/returns/mean: -0.7130855321884155
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2118it [2:49:46,  3.21s/it]

objective/kl: 13.34692096710205
ppo/returns/mean: -0.5564314723014832
ppo/policy/advantages_mean: 1.0710209608078003e-08
---------------------------------------------------------------------------------------------------


2119it [2:49:50,  3.20s/it]

objective/kl: 9.276023864746094
ppo/returns/mean: -0.5724577903747559
ppo/policy/advantages_mean: 0.003412805497646332
---------------------------------------------------------------------------------------------------


2120it [2:49:53,  3.21s/it]

objective/kl: 1.879317283630371
ppo/returns/mean: -0.641819953918457
ppo/policy/advantages_mean: 0.00033597834408283234
---------------------------------------------------------------------------------------------------


2121it [2:49:56,  3.23s/it]

objective/kl: -13.281929969787598
ppo/returns/mean: -0.28672680258750916
ppo/policy/advantages_mean: 0.0032641422003507614
---------------------------------------------------------------------------------------------------


2122it [2:49:59,  3.20s/it]

objective/kl: -24.570871353149414
ppo/returns/mean: 0.12360532581806183
ppo/policy/advantages_mean: 0.0010077664628624916
---------------------------------------------------------------------------------------------------


2123it [2:50:03,  3.28s/it]

objective/kl: -30.993316650390625
ppo/returns/mean: 0.30607104301452637
ppo/policy/advantages_mean: -0.0015133842825889587
---------------------------------------------------------------------------------------------------


2124it [2:50:06,  3.25s/it]

objective/kl: -26.6004638671875
ppo/returns/mean: 0.4821853041648865
ppo/policy/advantages_mean: 0.0005152765661478043
---------------------------------------------------------------------------------------------------


2125it [2:50:09,  3.20s/it]

objective/kl: -20.69366455078125
ppo/returns/mean: 0.470467746257782
ppo/policy/advantages_mean: -5.4016709327697754e-08
---------------------------------------------------------------------------------------------------


2126it [2:50:12,  3.20s/it]

objective/kl: -10.661505699157715
ppo/returns/mean: 0.23968884348869324
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2127it [2:50:15,  3.19s/it]

objective/kl: -11.39095687866211
ppo/returns/mean: 0.3117348551750183
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2128it [2:50:19,  3.23s/it]

objective/kl: -8.770594596862793
ppo/returns/mean: 0.31615859270095825
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2129it [2:50:22,  3.22s/it]

objective/kl: -10.528047561645508
ppo/returns/mean: 0.35436171293258667
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2130it [2:50:25,  3.19s/it]

objective/kl: -6.8828277587890625
ppo/returns/mean: 0.30331504344940186
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2131it [2:50:28,  3.19s/it]

objective/kl: -4.271173477172852
ppo/returns/mean: 0.24430827796459198
ppo/policy/advantages_mean: 3.67872416973114e-08
---------------------------------------------------------------------------------------------------


2132it [2:50:31,  3.19s/it]

objective/kl: -2.6280133724212646
ppo/returns/mean: 0.19914370775222778
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2133it [2:50:35,  3.33s/it]

objective/kl: -0.04869121313095093
ppo/returns/mean: 0.13401897251605988
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2134it [2:50:38,  3.31s/it]

objective/kl: 6.924968719482422
ppo/returns/mean: -0.12133997678756714
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2135it [2:50:41,  3.25s/it]

objective/kl: 7.872524738311768
ppo/returns/mean: -0.16846507787704468
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2136it [2:50:45,  3.21s/it]

objective/kl: 6.490833282470703
ppo/returns/mean: -0.25815173983573914
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2137it [2:50:48,  3.19s/it]

objective/kl: 11.202237129211426
ppo/returns/mean: -0.557275652885437
ppo/policy/advantages_mean: -0.00010758079588413239
---------------------------------------------------------------------------------------------------


2138it [2:50:52,  3.41s/it]

objective/kl: 7.114041328430176
ppo/returns/mean: -0.49664443731307983
ppo/policy/advantages_mean: 0.000694621354341507
---------------------------------------------------------------------------------------------------


2139it [2:50:55,  3.38s/it]

objective/kl: 2.836169719696045
ppo/returns/mean: -0.46818387508392334
ppo/policy/advantages_mean: -0.0002251816913485527
---------------------------------------------------------------------------------------------------


2140it [2:50:58,  3.32s/it]

objective/kl: -3.474059820175171
ppo/returns/mean: -0.3693864047527313
ppo/policy/advantages_mean: -0.0003553498536348343
---------------------------------------------------------------------------------------------------


2141it [2:51:01,  3.28s/it]

objective/kl: -8.23760986328125
ppo/returns/mean: -0.29463356733322144
ppo/policy/advantages_mean: -0.0018441304564476013
---------------------------------------------------------------------------------------------------


2142it [2:51:05,  3.28s/it]

objective/kl: -22.38349151611328
ppo/returns/mean: 0.018125155940651894
ppo/policy/advantages_mean: 0.0015547331422567368
---------------------------------------------------------------------------------------------------


2143it [2:51:08,  3.30s/it]

objective/kl: -40.18267822265625
ppo/returns/mean: 0.4569365084171295
ppo/policy/advantages_mean: 0.0017463266849517822
---------------------------------------------------------------------------------------------------


2144it [2:51:11,  3.26s/it]

objective/kl: -70.40278625488281
ppo/returns/mean: 1.5447633266448975
ppo/policy/advantages_mean: -6.978213787078857e-05
---------------------------------------------------------------------------------------------------


2145it [2:51:14,  3.23s/it]

objective/kl: -77.0152587890625
ppo/returns/mean: 2.0376393795013428
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2146it [2:51:17,  3.19s/it]

objective/kl: -79.81608581542969
ppo/returns/mean: 2.255303144454956
ppo/policy/advantages_mean: 4.936009645462036e-08
---------------------------------------------------------------------------------------------------


2147it [2:51:20,  3.17s/it]

objective/kl: -72.18412017822266
ppo/returns/mean: 2.101710319519043
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2148it [2:51:24,  3.17s/it]

objective/kl: -75.07119750976562
ppo/returns/mean: 2.316784381866455
ppo/policy/advantages_mean: 3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


2149it [2:51:27,  3.16s/it]

objective/kl: -68.03923797607422
ppo/returns/mean: 2.2937443256378174
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2150it [2:51:30,  3.15s/it]

objective/kl: -65.58926391601562
ppo/returns/mean: 2.2497246265411377
ppo/policy/advantages_mean: 4.190951585769653e-08
---------------------------------------------------------------------------------------------------


2151it [2:51:33,  3.19s/it]

objective/kl: -56.84510803222656
ppo/returns/mean: 2.013352870941162
ppo/policy/advantages_mean: 6.426125764846802e-08
---------------------------------------------------------------------------------------------------


2152it [2:51:36,  3.16s/it]

objective/kl: -34.31602096557617
ppo/returns/mean: 1.2362877130508423
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2153it [2:51:39,  3.13s/it]

objective/kl: -10.729547500610352
ppo/returns/mean: 0.4208361804485321
ppo/policy/advantages_mean: -4.842877388000488e-08
---------------------------------------------------------------------------------------------------


2154it [2:51:42,  3.12s/it]

objective/kl: -12.659797668457031
ppo/returns/mean: 0.43056434392929077
ppo/policy/advantages_mean: -9.685754776000977e-08
---------------------------------------------------------------------------------------------------


2155it [2:51:46,  3.11s/it]

objective/kl: -9.683484077453613
ppo/returns/mean: 0.28248798847198486
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2156it [2:51:49,  3.15s/it]

objective/kl: -11.0668363571167
ppo/returns/mean: 0.2551962733268738
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2157it [2:51:52,  3.24s/it]

objective/kl: -9.1173095703125
ppo/returns/mean: 0.0952908992767334
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2158it [2:51:55,  3.19s/it]

objective/kl: -6.317106246948242
ppo/returns/mean: -0.021457301452755928
ppo/policy/advantages_mean: -4.7031790018081665e-08
---------------------------------------------------------------------------------------------------


2159it [2:51:58,  3.16s/it]

objective/kl: -6.514734745025635
ppo/returns/mean: -0.04524137079715729
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2160it [2:52:01,  3.14s/it]

objective/kl: -5.689035892486572
ppo/returns/mean: -0.10082030296325684
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2161it [2:52:05,  3.13s/it]

objective/kl: -4.835814952850342
ppo/returns/mean: -0.15449124574661255
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2162it [2:52:08,  3.12s/it]

objective/kl: -5.218561172485352
ppo/returns/mean: -0.1715179681777954
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2163it [2:52:11,  3.10s/it]

objective/kl: -4.028519153594971
ppo/returns/mean: -0.21670788526535034
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2164it [2:52:14,  3.09s/it]

objective/kl: -3.025257110595703
ppo/returns/mean: -0.27587607502937317
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2165it [2:52:18,  3.29s/it]

objective/kl: -2.0884087085723877
ppo/returns/mean: -0.30526840686798096
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2166it [2:52:21,  3.25s/it]

objective/kl: -5.166901111602783
ppo/returns/mean: -0.22677573561668396
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2167it [2:52:25,  3.43s/it]

objective/kl: -3.585782527923584
ppo/returns/mean: -0.2800941467285156
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2168it [2:52:28,  3.37s/it]

objective/kl: -5.114136695861816
ppo/returns/mean: -0.21628138422966003
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2169it [2:52:31,  3.29s/it]

objective/kl: -4.6906352043151855
ppo/returns/mean: -0.2652689218521118
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2170it [2:52:34,  3.32s/it]

objective/kl: -2.88555908203125
ppo/returns/mean: -0.3360038995742798
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2171it [2:52:38,  3.34s/it]

objective/kl: -7.379930019378662
ppo/returns/mean: -0.1943954974412918
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2172it [2:52:41,  3.26s/it]

objective/kl: -9.686529159545898
ppo/returns/mean: -0.1220160722732544
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2173it [2:52:44,  3.22s/it]

objective/kl: -8.059539794921875
ppo/returns/mean: -0.15945972502231598
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2174it [2:52:47,  3.20s/it]

objective/kl: -8.830036163330078
ppo/returns/mean: -0.16671480238437653
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2175it [2:52:50,  3.23s/it]

objective/kl: -8.643367767333984
ppo/returns/mean: -0.14281785488128662
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2176it [2:52:53,  3.18s/it]

objective/kl: -7.197073936462402
ppo/returns/mean: -0.18126866221427917
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2177it [2:52:57,  3.20s/it]

objective/kl: -9.564743041992188
ppo/returns/mean: -0.10923920571804047
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2178it [2:53:00,  3.26s/it]

objective/kl: -9.287163734436035
ppo/returns/mean: -0.08983510732650757
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2179it [2:53:03,  3.20s/it]

objective/kl: -9.821845054626465
ppo/returns/mean: -0.05836544558405876
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2180it [2:53:06,  3.21s/it]

objective/kl: -6.870429992675781
ppo/returns/mean: -0.18229898810386658
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2181it [2:53:09,  3.18s/it]

objective/kl: -7.999608993530273
ppo/returns/mean: -0.1255028396844864
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2182it [2:53:13,  3.40s/it]

objective/kl: -6.577098369598389
ppo/returns/mean: -0.18301703035831451
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2183it [2:53:16,  3.32s/it]

objective/kl: -6.655570030212402
ppo/returns/mean: -0.19601434469223022
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2184it [2:53:20,  3.37s/it]

objective/kl: -6.337027549743652
ppo/returns/mean: -0.21202825009822845
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2185it [2:53:23,  3.28s/it]

objective/kl: -3.124422311782837
ppo/returns/mean: -0.31259265542030334
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2186it [2:53:26,  3.23s/it]

objective/kl: -3.9286296367645264
ppo/returns/mean: -0.289695143699646
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2187it [2:53:29,  3.19s/it]

objective/kl: -4.195706844329834
ppo/returns/mean: -0.2984433174133301
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2188it [2:53:32,  3.18s/it]

objective/kl: -5.861939430236816
ppo/returns/mean: -0.25939542055130005
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2189it [2:53:35,  3.15s/it]

objective/kl: -5.968886375427246
ppo/returns/mean: -0.25390174984931946
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2190it [2:53:39,  3.14s/it]

objective/kl: -6.769524574279785
ppo/returns/mean: -0.22981315851211548
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2191it [2:53:42,  3.16s/it]

objective/kl: -3.0390572547912598
ppo/returns/mean: -0.3413936495780945
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2192it [2:53:45,  3.14s/it]

objective/kl: -6.3503193855285645
ppo/returns/mean: -0.22357213497161865
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2193it [2:53:48,  3.13s/it]

objective/kl: -5.11075496673584
ppo/returns/mean: -0.25725001096725464
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2194it [2:53:51,  3.12s/it]

objective/kl: -4.260333061218262
ppo/returns/mean: -0.289625346660614
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2195it [2:53:54,  3.16s/it]

objective/kl: -3.6319937705993652
ppo/returns/mean: -0.3117273151874542
ppo/policy/advantages_mean: 2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


2196it [2:53:57,  3.14s/it]

objective/kl: -3.5561211109161377
ppo/returns/mean: -0.3472312092781067
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2197it [2:54:01,  3.13s/it]

objective/kl: -6.535023212432861
ppo/returns/mean: -0.28185489773750305
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2198it [2:54:04,  3.13s/it]

objective/kl: -5.720986843109131
ppo/returns/mean: -0.29693832993507385
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2199it [2:54:07,  3.11s/it]

objective/kl: -5.829658508300781
ppo/returns/mean: -0.28874555230140686
ppo/policy/advantages_mean: -2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


2200it [2:54:10,  3.15s/it]

objective/kl: -4.638762474060059
ppo/returns/mean: -0.31665417551994324
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2201it [2:54:13,  3.14s/it]

objective/kl: -5.201838970184326
ppo/returns/mean: -0.29724210500717163
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2202it [2:54:17,  3.42s/it]

objective/kl: -7.302148818969727
ppo/returns/mean: -0.22730158269405365
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2203it [2:54:20,  3.33s/it]

objective/kl: -2.936936855316162
ppo/returns/mean: -0.3306753933429718
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2204it [2:54:23,  3.26s/it]

objective/kl: -5.623287200927734
ppo/returns/mean: -0.251204252243042
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2205it [2:54:27,  3.21s/it]

objective/kl: -4.540778160095215
ppo/returns/mean: -0.2981419563293457
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2206it [2:54:30,  3.18s/it]

objective/kl: -3.623417854309082
ppo/returns/mean: -0.35365819931030273
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2207it [2:54:33,  3.15s/it]

objective/kl: -4.617896556854248
ppo/returns/mean: -0.29942071437835693
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2208it [2:54:36,  3.15s/it]

objective/kl: -3.463132858276367
ppo/returns/mean: -0.3515706956386566
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2209it [2:54:39,  3.15s/it]

objective/kl: -5.38776969909668
ppo/returns/mean: -0.2806466817855835
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2210it [2:54:42,  3.18s/it]

objective/kl: -4.399009704589844
ppo/returns/mean: -0.3156701326370239
ppo/policy/advantages_mean: -1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


2211it [2:54:46,  3.35s/it]

objective/kl: -7.0374836921691895
ppo/returns/mean: -0.2081356942653656
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2212it [2:54:49,  3.28s/it]

objective/kl: -8.604990005493164
ppo/returns/mean: -0.15128275752067566
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2213it [2:54:52,  3.21s/it]

objective/kl: -7.0582427978515625
ppo/returns/mean: -0.1854928731918335
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2214it [2:54:55,  3.17s/it]

objective/kl: -4.348912715911865
ppo/returns/mean: -0.25993233919143677
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2215it [2:54:58,  3.14s/it]

objective/kl: -4.270486831665039
ppo/returns/mean: -0.2516276240348816
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2216it [2:55:01,  3.13s/it]

objective/kl: -5.644512176513672
ppo/returns/mean: -0.20558059215545654
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2217it [2:55:04,  3.11s/it]

objective/kl: -3.311152219772339
ppo/returns/mean: -0.28638944029808044
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2218it [2:55:08,  3.11s/it]

objective/kl: -6.501370429992676
ppo/returns/mean: -0.19443336129188538
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2219it [2:55:11,  3.10s/it]

objective/kl: -6.8572845458984375
ppo/returns/mean: -0.18164975941181183
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2220it [2:55:14,  3.11s/it]

objective/kl: -9.961685180664062
ppo/returns/mean: -0.05702952295541763
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2221it [2:55:17,  3.11s/it]

objective/kl: -16.236358642578125
ppo/returns/mean: 0.22497454285621643
ppo/policy/advantages_mean: 1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


2222it [2:55:20,  3.15s/it]

objective/kl: -13.607583045959473
ppo/returns/mean: 0.1779913306236267
ppo/policy/advantages_mean: -1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


2223it [2:55:23,  3.15s/it]

objective/kl: -13.299487113952637
ppo/returns/mean: 0.20597223937511444
ppo/policy/advantages_mean: 9.313225746154785e-10
---------------------------------------------------------------------------------------------------


2224it [2:55:26,  3.14s/it]

objective/kl: -8.055574417114258
ppo/returns/mean: 0.05874102562665939
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2225it [2:55:30,  3.12s/it]

objective/kl: -10.352998733520508
ppo/returns/mean: 0.22308091819286346
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2226it [2:55:33,  3.14s/it]

objective/kl: -8.628063201904297
ppo/returns/mean: 0.12727749347686768
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2227it [2:55:36,  3.13s/it]

objective/kl: -11.841022491455078
ppo/returns/mean: 0.31437528133392334
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


2228it [2:55:39,  3.12s/it]

objective/kl: -17.22858428955078
ppo/returns/mean: 0.5065933465957642
ppo/policy/advantages_mean: -4.284083843231201e-08
---------------------------------------------------------------------------------------------------


2229it [2:55:42,  3.10s/it]

objective/kl: -15.187849044799805
ppo/returns/mean: 0.4612810015678406
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2230it [2:55:45,  3.13s/it]

objective/kl: -12.992412567138672
ppo/returns/mean: 0.39645349979400635
ppo/policy/advantages_mean: 4.0978193283081055e-08
---------------------------------------------------------------------------------------------------


2231it [2:55:48,  3.12s/it]

objective/kl: -12.43167495727539
ppo/returns/mean: 0.3639468252658844
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2232it [2:55:51,  3.10s/it]

objective/kl: -11.782833099365234
ppo/returns/mean: 0.291961669921875
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2233it [2:55:54,  3.10s/it]

objective/kl: -10.514561653137207
ppo/returns/mean: 0.17719849944114685
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2234it [2:55:57,  3.10s/it]

objective/kl: -9.376591682434082
ppo/returns/mean: 0.09622754156589508
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2235it [2:56:01,  3.11s/it]

objective/kl: -11.719963073730469
ppo/returns/mean: 0.11170315742492676
ppo/policy/advantages_mean: 2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


2236it [2:56:04,  3.12s/it]

objective/kl: -12.646482467651367
ppo/returns/mean: 0.06602521985769272
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2237it [2:56:07,  3.11s/it]

objective/kl: -14.428855895996094
ppo/returns/mean: 0.13606655597686768
ppo/policy/advantages_mean: 2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


2238it [2:56:10,  3.13s/it]

objective/kl: -14.034729957580566
ppo/returns/mean: 0.12847240269184113
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2239it [2:56:13,  3.12s/it]

objective/kl: -15.291754722595215
ppo/returns/mean: 0.07984526455402374
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2240it [2:56:17,  3.23s/it]

objective/kl: -14.44390869140625
ppo/returns/mean: 0.028063137084245682
ppo/policy/advantages_mean: 9.313225746154785e-10
---------------------------------------------------------------------------------------------------


2241it [2:56:20,  3.19s/it]

objective/kl: -15.799878120422363
ppo/returns/mean: 0.11671025305986404
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2242it [2:56:23,  3.16s/it]

objective/kl: -15.236211776733398
ppo/returns/mean: 0.09115561842918396
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2243it [2:56:26,  3.15s/it]

objective/kl: -18.52407455444336
ppo/returns/mean: 0.19780094921588898
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2244it [2:56:29,  3.14s/it]

objective/kl: -17.63465118408203
ppo/returns/mean: 0.1932118535041809
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2245it [2:56:32,  3.21s/it]

objective/kl: -17.616687774658203
ppo/returns/mean: 0.1899682879447937
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2246it [2:56:36,  3.18s/it]

objective/kl: -17.51814842224121
ppo/returns/mean: 0.20283672213554382
ppo/policy/advantages_mean: -1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


2247it [2:56:39,  3.25s/it]

objective/kl: -19.049724578857422
ppo/returns/mean: 0.2630433440208435
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


2248it [2:56:42,  3.28s/it]

objective/kl: -17.008861541748047
ppo/returns/mean: 0.17377156019210815
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2249it [2:56:45,  3.23s/it]

objective/kl: -16.02988052368164
ppo/returns/mean: 0.13582289218902588
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2250it [2:56:49,  3.20s/it]

objective/kl: -17.314655303955078
ppo/returns/mean: 0.17954376339912415
ppo/policy/advantages_mean: -0.000566801056265831
---------------------------------------------------------------------------------------------------


2251it [2:56:52,  3.24s/it]

objective/kl: -18.331886291503906
ppo/returns/mean: 0.19147765636444092
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


2252it [2:56:55,  3.25s/it]

objective/kl: -20.405607223510742
ppo/returns/mean: 0.2466265857219696
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2253it [2:56:58,  3.21s/it]

objective/kl: -21.049331665039062
ppo/returns/mean: 0.2458399385213852
ppo/policy/advantages_mean: 2.514570951461792e-08
---------------------------------------------------------------------------------------------------


2254it [2:57:01,  3.19s/it]

objective/kl: -21.30322265625
ppo/returns/mean: 0.24905447661876678
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2255it [2:57:05,  3.18s/it]

objective/kl: -20.421611785888672
ppo/returns/mean: 0.23304179310798645
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2256it [2:57:08,  3.23s/it]

objective/kl: -22.542251586914062
ppo/returns/mean: 0.29032444953918457
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2257it [2:57:11,  3.20s/it]

objective/kl: -24.170591354370117
ppo/returns/mean: 0.36313197016716003
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2258it [2:57:14,  3.17s/it]

objective/kl: -24.012746810913086
ppo/returns/mean: 0.37224358320236206
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2259it [2:57:17,  3.16s/it]

objective/kl: -21.812578201293945
ppo/returns/mean: 0.30708038806915283
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2260it [2:57:20,  3.15s/it]

objective/kl: -21.425491333007812
ppo/returns/mean: 0.29842180013656616
ppo/policy/advantages_mean: -0.0017446577548980713
---------------------------------------------------------------------------------------------------


2261it [2:57:24,  3.15s/it]

objective/kl: -19.06401824951172
ppo/returns/mean: 0.2375609278678894
ppo/policy/advantages_mean: -0.0010625720024108887
---------------------------------------------------------------------------------------------------


2262it [2:57:27,  3.13s/it]

objective/kl: -19.0958251953125
ppo/returns/mean: 0.1867476999759674
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2263it [2:57:30,  3.13s/it]

objective/kl: -21.057971954345703
ppo/returns/mean: 0.23261651396751404
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2264it [2:57:33,  3.12s/it]

objective/kl: -25.38443374633789
ppo/returns/mean: 0.37251096963882446
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


2265it [2:57:36,  3.12s/it]

objective/kl: -24.79917335510254
ppo/returns/mean: 0.3665054142475128
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2266it [2:57:39,  3.13s/it]

objective/kl: -22.69086456298828
ppo/returns/mean: 0.3105742335319519
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2267it [2:57:42,  3.12s/it]

objective/kl: -23.374267578125
ppo/returns/mean: 0.35129493474960327
ppo/policy/advantages_mean: 4.470348358154297e-08
---------------------------------------------------------------------------------------------------


2268it [2:57:45,  3.12s/it]

objective/kl: -22.67015266418457
ppo/returns/mean: 0.33611029386520386
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2269it [2:57:48,  3.12s/it]

objective/kl: -24.154125213623047
ppo/returns/mean: 0.39354363083839417
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2270it [2:57:52,  3.13s/it]

objective/kl: -23.781986236572266
ppo/returns/mean: 0.3939763903617859
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2271it [2:57:55,  3.14s/it]

objective/kl: -27.48059844970703
ppo/returns/mean: 0.48485997319221497
ppo/policy/advantages_mean: 4.470348358154297e-08
---------------------------------------------------------------------------------------------------


2272it [2:57:58,  3.12s/it]

objective/kl: -28.253419876098633
ppo/returns/mean: 0.5093389749526978
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2273it [2:58:01,  3.14s/it]

objective/kl: -27.920040130615234
ppo/returns/mean: 0.5067922472953796
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2274it [2:58:04,  3.19s/it]

objective/kl: -26.235639572143555
ppo/returns/mean: 0.42894861102104187
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2275it [2:58:07,  3.16s/it]

objective/kl: -26.94959259033203
ppo/returns/mean: 0.46683332324028015
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2276it [2:58:11,  3.14s/it]

objective/kl: -28.899789810180664
ppo/returns/mean: 0.5288881063461304
ppo/policy/advantages_mean: -1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


2277it [2:58:14,  3.13s/it]

objective/kl: -26.892776489257812
ppo/returns/mean: 0.4353787302970886
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2278it [2:58:17,  3.12s/it]

objective/kl: -29.69025421142578
ppo/returns/mean: 0.5477333068847656
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2279it [2:58:20,  3.12s/it]

objective/kl: -32.92777633666992
ppo/returns/mean: 0.703406572341919
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2280it [2:58:23,  3.12s/it]

objective/kl: -32.27204132080078
ppo/returns/mean: 0.7607330083847046
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2281it [2:58:26,  3.12s/it]

objective/kl: -34.2139892578125
ppo/returns/mean: 0.9184033870697021
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2282it [2:58:29,  3.12s/it]

objective/kl: -36.383052825927734
ppo/returns/mean: 1.0978479385375977
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2283it [2:58:32,  3.11s/it]

objective/kl: -37.2474250793457
ppo/returns/mean: 1.2229762077331543
ppo/policy/advantages_mean: -0.0006666667759418488
---------------------------------------------------------------------------------------------------


2284it [2:58:35,  3.13s/it]

objective/kl: -38.377685546875
ppo/returns/mean: 1.3198028802871704
ppo/policy/advantages_mean: -4.7497451305389404e-08
---------------------------------------------------------------------------------------------------


2285it [2:58:39,  3.13s/it]

objective/kl: -28.3740234375
ppo/returns/mean: 1.0041064023971558
ppo/policy/advantages_mean: 0.00019868463277816772
---------------------------------------------------------------------------------------------------


2286it [2:58:42,  3.12s/it]

objective/kl: -30.99542236328125
ppo/returns/mean: 1.101648211479187
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2287it [2:58:45,  3.11s/it]

objective/kl: -32.27295684814453
ppo/returns/mean: 1.1639336347579956
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2288it [2:58:48,  3.11s/it]

objective/kl: -27.91425132751465
ppo/returns/mean: 0.9850199222564697
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2289it [2:58:51,  3.10s/it]

objective/kl: -29.865629196166992
ppo/returns/mean: 1.1160954236984253
ppo/policy/advantages_mean: 3.91155481338501e-08
---------------------------------------------------------------------------------------------------


2290it [2:58:54,  3.09s/it]

objective/kl: -25.067276000976562
ppo/returns/mean: 0.9326492547988892
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2291it [2:58:57,  3.11s/it]

objective/kl: -23.688705444335938
ppo/returns/mean: 0.8397277593612671
ppo/policy/advantages_mean: 1.210719347000122e-08
---------------------------------------------------------------------------------------------------


2292it [2:59:01,  3.22s/it]

objective/kl: -14.82077693939209
ppo/returns/mean: 0.4450664520263672
ppo/policy/advantages_mean: 0.0012535899877548218
---------------------------------------------------------------------------------------------------


2293it [2:59:04,  3.22s/it]

objective/kl: 7.06666374206543
ppo/returns/mean: -0.38486248254776
ppo/policy/advantages_mean: 0.0008615180850028992
---------------------------------------------------------------------------------------------------


2294it [2:59:07,  3.23s/it]

objective/kl: 34.150474548339844
ppo/returns/mean: -1.2016992568969727
ppo/policy/advantages_mean: 0.0004888204857707024
---------------------------------------------------------------------------------------------------


2295it [2:59:11,  3.30s/it]

objective/kl: 29.270000457763672
ppo/returns/mean: -1.0487644672393799
ppo/policy/advantages_mean: -0.0017144866287708282
---------------------------------------------------------------------------------------------------


2296it [2:59:14,  3.26s/it]

objective/kl: 29.024417877197266
ppo/returns/mean: -1.0855882167816162
ppo/policy/advantages_mean: -0.0014315787702798843
---------------------------------------------------------------------------------------------------


2297it [2:59:17,  3.23s/it]

objective/kl: 29.635150909423828
ppo/returns/mean: -1.1999024152755737
ppo/policy/advantages_mean: -0.0005900729447603226
---------------------------------------------------------------------------------------------------


2298it [2:59:20,  3.27s/it]

objective/kl: 21.49726104736328
ppo/returns/mean: -1.0597968101501465
ppo/policy/advantages_mean: 0.0022545848041772842
---------------------------------------------------------------------------------------------------


2299it [2:59:23,  3.23s/it]

objective/kl: 13.830841064453125
ppo/returns/mean: -0.9256471395492554
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2300it [2:59:27,  3.24s/it]

objective/kl: 11.382282257080078
ppo/returns/mean: -0.9360322952270508
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2301it [2:59:30,  3.20s/it]

objective/kl: 6.490359306335449
ppo/returns/mean: -0.8433347344398499
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2302it [2:59:34,  3.35s/it]

objective/kl: -0.28787466883659363
ppo/returns/mean: -0.6080300211906433
ppo/policy/advantages_mean: -4.842877388000488e-08
---------------------------------------------------------------------------------------------------


2303it [2:59:37,  3.27s/it]

objective/kl: -0.15964767336845398
ppo/returns/mean: -0.5743234157562256
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2304it [2:59:40,  3.24s/it]

objective/kl: -1.088472843170166
ppo/returns/mean: -0.4365520477294922
ppo/policy/advantages_mean: -4.0978193283081055e-08
---------------------------------------------------------------------------------------------------


2305it [2:59:43,  3.22s/it]

objective/kl: -4.579225540161133
ppo/returns/mean: -0.1962064802646637
ppo/policy/advantages_mean: -1.955777406692505e-08
---------------------------------------------------------------------------------------------------


2306it [2:59:46,  3.20s/it]

objective/kl: -7.076580047607422
ppo/returns/mean: -0.0923955887556076
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


2307it [2:59:49,  3.18s/it]

objective/kl: -6.781323432922363
ppo/returns/mean: -0.053671807050704956
ppo/policy/advantages_mean: -4.0978193283081055e-08
---------------------------------------------------------------------------------------------------


2308it [2:59:52,  3.16s/it]

objective/kl: -7.311618804931641
ppo/returns/mean: 0.004723755642771721
ppo/policy/advantages_mean: -6.332993507385254e-08
---------------------------------------------------------------------------------------------------


2309it [2:59:56,  3.16s/it]

objective/kl: -9.654315948486328
ppo/returns/mean: 0.053120750933885574
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2310it [2:59:59,  3.15s/it]

objective/kl: -9.30152702331543
ppo/returns/mean: 0.11385856568813324
ppo/policy/advantages_mean: 3.166496753692627e-08
---------------------------------------------------------------------------------------------------


2311it [3:00:02,  3.24s/it]

objective/kl: -9.244626998901367
ppo/returns/mean: 0.11558559536933899
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2312it [3:00:05,  3.20s/it]

objective/kl: -11.876510620117188
ppo/returns/mean: 0.1602911353111267
ppo/policy/advantages_mean: 6.51925802230835e-08
---------------------------------------------------------------------------------------------------


2313it [3:00:08,  3.18s/it]

objective/kl: -8.244873046875
ppo/returns/mean: 0.1663205921649933
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2314it [3:00:11,  3.16s/it]

objective/kl: -5.5293803215026855
ppo/returns/mean: 0.10173385590314865
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2315it [3:00:15,  3.15s/it]

objective/kl: -4.144465923309326
ppo/returns/mean: 0.07780017703771591
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2316it [3:00:18,  3.14s/it]

objective/kl: -2.9197113513946533
ppo/returns/mean: 0.03382226824760437
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


2317it [3:00:21,  3.17s/it]

objective/kl: -5.357215404510498
ppo/returns/mean: 0.09912270307540894
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2318it [3:00:25,  3.30s/it]

objective/kl: -1.955212116241455
ppo/returns/mean: -0.002450969535857439
ppo/policy/advantages_mean: 1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


2319it [3:00:28,  3.25s/it]

objective/kl: -1.5998824834823608
ppo/returns/mean: -0.04325471445918083
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2320it [3:00:31,  3.20s/it]

objective/kl: -1.444122552871704
ppo/returns/mean: -0.06320999562740326
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2321it [3:00:34,  3.17s/it]

objective/kl: -2.1453819274902344
ppo/returns/mean: -0.06945323199033737
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2322it [3:00:37,  3.15s/it]

objective/kl: -2.0037546157836914
ppo/returns/mean: -0.12024278938770294
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2323it [3:00:40,  3.14s/it]

objective/kl: -2.238399028778076
ppo/returns/mean: -0.11613144725561142
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2324it [3:00:43,  3.14s/it]

objective/kl: -1.450825810432434
ppo/returns/mean: -0.14973506331443787
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2325it [3:00:46,  3.16s/it]

objective/kl: -2.9174180030822754
ppo/returns/mean: -0.1587161421775818
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2326it [3:00:50,  3.28s/it]

objective/kl: -5.428923606872559
ppo/returns/mean: -0.08507099747657776
ppo/policy/advantages_mean: -0.0008193347603082657
---------------------------------------------------------------------------------------------------


2327it [3:00:53,  3.25s/it]

objective/kl: -4.896465301513672
ppo/returns/mean: -0.1353960782289505
ppo/policy/advantages_mean: 0.0014381930232048035
---------------------------------------------------------------------------------------------------


2328it [3:00:56,  3.21s/it]

objective/kl: -5.269660949707031
ppo/returns/mean: -0.15994304418563843
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


2329it [3:00:59,  3.19s/it]

objective/kl: -8.236328125
ppo/returns/mean: -0.047235406935214996
ppo/policy/advantages_mean: -0.0015011727809906006
---------------------------------------------------------------------------------------------------


2330it [3:01:03,  3.17s/it]

objective/kl: -8.782776832580566
ppo/returns/mean: -0.061684221029281616
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2331it [3:01:06,  3.15s/it]

objective/kl: -7.343762397766113
ppo/returns/mean: -0.08620856702327728
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2332it [3:01:09,  3.21s/it]

objective/kl: -11.565049171447754
ppo/returns/mean: 0.001493309624493122
ppo/policy/advantages_mean: 0.00288594514131546
---------------------------------------------------------------------------------------------------


2333it [3:01:12,  3.18s/it]

objective/kl: -13.032085418701172
ppo/returns/mean: 0.036231715232133865
ppo/policy/advantages_mean: -0.0008020829409360886
---------------------------------------------------------------------------------------------------


2334it [3:01:15,  3.16s/it]

objective/kl: -15.475592613220215
ppo/returns/mean: 0.11835530400276184
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2335it [3:01:18,  3.15s/it]

objective/kl: -19.480649948120117
ppo/returns/mean: 0.21682550013065338
ppo/policy/advantages_mean: 0.0006226766854524612
---------------------------------------------------------------------------------------------------


2336it [3:01:21,  3.14s/it]

objective/kl: -23.874799728393555
ppo/returns/mean: 0.35318058729171753
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2337it [3:01:25,  3.12s/it]

objective/kl: -29.365402221679688
ppo/returns/mean: 0.46002909541130066
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2338it [3:01:28,  3.12s/it]

objective/kl: -29.216968536376953
ppo/returns/mean: 0.42835748195648193
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2339it [3:01:31,  3.13s/it]

objective/kl: -25.92678451538086
ppo/returns/mean: 0.36311161518096924
ppo/policy/advantages_mean: -9.313225746154785e-10
---------------------------------------------------------------------------------------------------


2340it [3:01:34,  3.14s/it]

objective/kl: -26.159664154052734
ppo/returns/mean: 0.3824644684791565
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2341it [3:01:37,  3.12s/it]

objective/kl: -25.239713668823242
ppo/returns/mean: 0.3921394646167755
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2342it [3:01:40,  3.11s/it]

objective/kl: -26.546192169189453
ppo/returns/mean: 0.47770559787750244
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2343it [3:01:43,  3.09s/it]

objective/kl: -26.259899139404297
ppo/returns/mean: 0.48225077986717224
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2344it [3:01:46,  3.10s/it]

objective/kl: -27.73198699951172
ppo/returns/mean: 0.5460081100463867
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2345it [3:01:49,  3.08s/it]

objective/kl: -26.501914978027344
ppo/returns/mean: 0.5148460268974304
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2346it [3:01:52,  3.07s/it]

objective/kl: -24.485435485839844
ppo/returns/mean: 0.45865702629089355
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2347it [3:01:55,  3.07s/it]

objective/kl: -23.377559661865234
ppo/returns/mean: 0.37781214714050293
ppo/policy/advantages_mean: 0.0014720037579536438
---------------------------------------------------------------------------------------------------


2348it [3:01:59,  3.08s/it]

objective/kl: -25.135658264160156
ppo/returns/mean: 0.43663522601127625
ppo/policy/advantages_mean: 0.0013016313314437866
---------------------------------------------------------------------------------------------------


2349it [3:02:02,  3.10s/it]

objective/kl: -30.516727447509766
ppo/returns/mean: 0.6168537139892578
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2350it [3:02:05,  3.15s/it]

objective/kl: -28.627056121826172
ppo/returns/mean: 0.5549730062484741
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


2351it [3:02:08,  3.12s/it]

objective/kl: -27.005760192871094
ppo/returns/mean: 0.5128802061080933
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2352it [3:02:11,  3.16s/it]

objective/kl: -27.967195510864258
ppo/returns/mean: 0.5493181347846985
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2353it [3:02:14,  3.16s/it]

objective/kl: -26.29020881652832
ppo/returns/mean: 0.49029138684272766
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2354it [3:02:18,  3.14s/it]

objective/kl: -24.154939651489258
ppo/returns/mean: 0.4598504602909088
ppo/policy/advantages_mean: -3.166496753692627e-08
---------------------------------------------------------------------------------------------------


2355it [3:02:21,  3.12s/it]

objective/kl: -29.010265350341797
ppo/returns/mean: 0.5684254169464111
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2356it [3:02:24,  3.10s/it]

objective/kl: -31.09628677368164
ppo/returns/mean: 0.651346743106842
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2357it [3:02:27,  3.11s/it]

objective/kl: -28.795406341552734
ppo/returns/mean: 0.5710040330886841
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2358it [3:02:30,  3.10s/it]

objective/kl: -30.87726593017578
ppo/returns/mean: 0.6537936925888062
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2359it [3:02:33,  3.09s/it]

objective/kl: -33.73485565185547
ppo/returns/mean: 0.7620384693145752
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2360it [3:02:36,  3.13s/it]

objective/kl: -48.60854721069336
ppo/returns/mean: 1.257615327835083
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2361it [3:02:39,  3.12s/it]

objective/kl: -51.57918930053711
ppo/returns/mean: 1.4083524942398071
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2362it [3:02:42,  3.11s/it]

objective/kl: -47.124900817871094
ppo/returns/mean: 1.322861909866333
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2363it [3:02:45,  3.12s/it]

objective/kl: -48.09716796875
ppo/returns/mean: 1.3597431182861328
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2364it [3:02:49,  3.12s/it]

objective/kl: -45.334983825683594
ppo/returns/mean: 1.3115266561508179
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2365it [3:02:52,  3.13s/it]

objective/kl: -39.456241607666016
ppo/returns/mean: 1.1968228816986084
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


2366it [3:02:55,  3.15s/it]

objective/kl: -31.488128662109375
ppo/returns/mean: 0.9669564366340637
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2367it [3:02:58,  3.13s/it]

objective/kl: -23.665977478027344
ppo/returns/mean: 0.7218111157417297
ppo/policy/advantages_mean: -4.190951585769653e-08
---------------------------------------------------------------------------------------------------


2368it [3:03:01,  3.13s/it]

objective/kl: -19.405773162841797
ppo/returns/mean: 0.5879288911819458
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2369it [3:03:04,  3.12s/it]

objective/kl: -22.782621383666992
ppo/returns/mean: 0.5858196020126343
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2370it [3:03:07,  3.10s/it]

objective/kl: -23.396404266357422
ppo/returns/mean: 0.5214729309082031
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2371it [3:03:10,  3.10s/it]

objective/kl: -37.38959503173828
ppo/returns/mean: 0.9456882476806641
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2372it [3:03:14,  3.11s/it]

objective/kl: -50.409305572509766
ppo/returns/mean: 1.324843168258667
ppo/policy/advantages_mean: 1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


2373it [3:03:17,  3.16s/it]

objective/kl: -50.95236587524414
ppo/returns/mean: 1.3035415410995483
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2374it [3:03:20,  3.14s/it]

objective/kl: -51.3704833984375
ppo/returns/mean: 1.3016486167907715
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2375it [3:03:23,  3.12s/it]

objective/kl: -52.901145935058594
ppo/returns/mean: 1.4052600860595703
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2376it [3:03:26,  3.14s/it]

objective/kl: -59.75846481323242
ppo/returns/mean: 1.604748010635376
ppo/policy/advantages_mean: -1.210719347000122e-08
---------------------------------------------------------------------------------------------------


2377it [3:03:29,  3.12s/it]

objective/kl: -59.00708770751953
ppo/returns/mean: 1.615085244178772
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2378it [3:03:32,  3.11s/it]

objective/kl: -49.784019470214844
ppo/returns/mean: 1.338437557220459
ppo/policy/advantages_mean: -5.122274160385132e-09
---------------------------------------------------------------------------------------------------


2379it [3:03:35,  3.10s/it]

objective/kl: -45.68476867675781
ppo/returns/mean: 1.2479989528656006
ppo/policy/advantages_mean: 3.91155481338501e-08
---------------------------------------------------------------------------------------------------


2380it [3:03:39,  3.09s/it]

objective/kl: -50.67813491821289
ppo/returns/mean: 1.3764185905456543
ppo/policy/advantages_mean: 4.0978193283081055e-08
---------------------------------------------------------------------------------------------------


2381it [3:03:42,  3.28s/it]

objective/kl: -60.73241424560547
ppo/returns/mean: 1.6771278381347656
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2382it [3:03:45,  3.24s/it]

objective/kl: -62.3093147277832
ppo/returns/mean: 1.7335505485534668
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2383it [3:03:48,  3.20s/it]

objective/kl: -61.05540466308594
ppo/returns/mean: 1.7278053760528564
ppo/policy/advantages_mean: -3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


2384it [3:03:52,  3.17s/it]

objective/kl: -51.84501266479492
ppo/returns/mean: 1.517925500869751
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2385it [3:03:55,  3.16s/it]

objective/kl: -53.052730560302734
ppo/returns/mean: 1.5685745477676392
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2386it [3:03:58,  3.28s/it]

objective/kl: -50.93241882324219
ppo/returns/mean: 1.5129460096359253
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2387it [3:04:02,  3.31s/it]

objective/kl: -46.924625396728516
ppo/returns/mean: 1.4220833778381348
ppo/policy/advantages_mean: -4.0978193283081055e-08
---------------------------------------------------------------------------------------------------


2388it [3:04:05,  3.26s/it]

objective/kl: -47.259613037109375
ppo/returns/mean: 1.4337198734283447
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2389it [3:04:08,  3.23s/it]

objective/kl: -45.37049865722656
ppo/returns/mean: 1.3760077953338623
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


2390it [3:04:11,  3.20s/it]

objective/kl: -45.19366455078125
ppo/returns/mean: 1.3648269176483154
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2391it [3:04:14,  3.23s/it]

objective/kl: -48.239131927490234
ppo/returns/mean: 1.4168365001678467
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2392it [3:04:18,  3.29s/it]

objective/kl: -44.893760681152344
ppo/returns/mean: 1.3118901252746582
ppo/policy/advantages_mean: -1.7695128917694092e-08
---------------------------------------------------------------------------------------------------


2393it [3:04:21,  3.25s/it]

objective/kl: -46.64799499511719
ppo/returns/mean: 1.3585119247436523
ppo/policy/advantages_mean: 2.1420419216156006e-08
---------------------------------------------------------------------------------------------------


2394it [3:04:24,  3.21s/it]

objective/kl: -43.26483154296875
ppo/returns/mean: 1.2395460605621338
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2395it [3:04:27,  3.20s/it]

objective/kl: -44.47377395629883
ppo/returns/mean: 1.252218246459961
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2396it [3:04:30,  3.17s/it]

objective/kl: -51.42053985595703
ppo/returns/mean: 1.4377307891845703
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2397it [3:04:33,  3.15s/it]

objective/kl: -48.41339111328125
ppo/returns/mean: 1.3478312492370605
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2398it [3:04:37,  3.15s/it]

objective/kl: -50.044891357421875
ppo/returns/mean: 1.4003980159759521
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2399it [3:04:40,  3.15s/it]

objective/kl: -47.096336364746094
ppo/returns/mean: 1.3214019536972046
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2400it [3:04:43,  3.13s/it]

objective/kl: -49.36882781982422
ppo/returns/mean: 1.381914496421814
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2401it [3:04:47,  3.33s/it]

objective/kl: -48.960636138916016
ppo/returns/mean: 1.3674359321594238
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2402it [3:04:50,  3.24s/it]

objective/kl: -50.04969787597656
ppo/returns/mean: 1.3961708545684814
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2403it [3:04:53,  3.18s/it]

objective/kl: -48.77309036254883
ppo/returns/mean: 1.3658517599105835
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2404it [3:04:56,  3.13s/it]

objective/kl: -54.0305290222168
ppo/returns/mean: 1.5151782035827637
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2405it [3:04:59,  3.15s/it]

objective/kl: -51.93981170654297
ppo/returns/mean: 1.4534556865692139
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2406it [3:05:02,  3.16s/it]

objective/kl: -54.06655502319336
ppo/returns/mean: 1.5094037055969238
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2407it [3:05:05,  3.10s/it]

objective/kl: -54.83213806152344
ppo/returns/mean: 1.540584683418274
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2408it [3:05:08,  3.09s/it]

objective/kl: -53.917057037353516
ppo/returns/mean: 1.4970096349716187
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2409it [3:05:11,  3.06s/it]

objective/kl: -53.14409255981445
ppo/returns/mean: 1.4902440309524536
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2410it [3:05:14,  3.04s/it]

objective/kl: -51.32960510253906
ppo/returns/mean: 1.4193034172058105
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2411it [3:05:17,  3.03s/it]

objective/kl: -54.86856460571289
ppo/returns/mean: 1.5067691802978516
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2412it [3:05:21,  3.16s/it]

objective/kl: -53.80891799926758
ppo/returns/mean: 1.476466417312622
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2413it [3:05:24,  3.12s/it]

objective/kl: -57.686500549316406
ppo/returns/mean: 1.5817184448242188
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2414it [3:05:27,  3.10s/it]

objective/kl: -60.364200592041016
ppo/returns/mean: 1.6640926599502563
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2415it [3:05:30,  3.08s/it]

objective/kl: -59.38406753540039
ppo/returns/mean: 1.6436082124710083
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2416it [3:05:33,  3.09s/it]

objective/kl: -57.92674255371094
ppo/returns/mean: 1.635096788406372
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2417it [3:05:36,  3.07s/it]

objective/kl: -59.62046813964844
ppo/returns/mean: 1.7055549621582031
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2418it [3:05:39,  3.06s/it]

objective/kl: -57.69438934326172
ppo/returns/mean: 1.6612505912780762
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


2419it [3:05:42,  3.05s/it]

objective/kl: -57.67862319946289
ppo/returns/mean: 1.6502411365509033
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2420it [3:05:45,  3.05s/it]

objective/kl: -56.822608947753906
ppo/returns/mean: 1.6593637466430664
ppo/policy/advantages_mean: -3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


2421it [3:05:48,  3.04s/it]

objective/kl: -58.27642059326172
ppo/returns/mean: 1.69560706615448
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2422it [3:05:51,  3.10s/it]

objective/kl: -58.164146423339844
ppo/returns/mean: 1.7232666015625
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2423it [3:05:54,  3.10s/it]

objective/kl: -55.086395263671875
ppo/returns/mean: 1.6029682159423828
ppo/policy/advantages_mean: -1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


2424it [3:05:57,  3.11s/it]

objective/kl: -56.787628173828125
ppo/returns/mean: 1.6474015712738037
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2425it [3:06:01,  3.12s/it]

objective/kl: -54.685184478759766
ppo/returns/mean: 1.5567340850830078
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2426it [3:06:04,  3.14s/it]

objective/kl: -54.51068115234375
ppo/returns/mean: 1.5189629793167114
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2427it [3:06:07,  3.12s/it]

objective/kl: -55.51031494140625
ppo/returns/mean: 1.556282639503479
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2428it [3:06:10,  3.10s/it]

objective/kl: -57.99791717529297
ppo/returns/mean: 1.6704537868499756
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2429it [3:06:13,  3.08s/it]

objective/kl: -55.221839904785156
ppo/returns/mean: 1.5327718257904053
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2430it [3:06:16,  3.06s/it]

objective/kl: -59.18214416503906
ppo/returns/mean: 1.6812270879745483
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2431it [3:06:19,  3.06s/it]

objective/kl: -59.907814025878906
ppo/returns/mean: 1.690873384475708
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


2432it [3:06:22,  3.05s/it]

objective/kl: -59.505043029785156
ppo/returns/mean: 1.680570363998413
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2433it [3:06:25,  3.09s/it]

objective/kl: -58.160438537597656
ppo/returns/mean: 1.6530042886734009
ppo/policy/advantages_mean: 2.1420419216156006e-08
---------------------------------------------------------------------------------------------------


2434it [3:06:28,  3.07s/it]

objective/kl: -60.27159118652344
ppo/returns/mean: 1.6967686414718628
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2435it [3:06:31,  3.12s/it]

objective/kl: -61.14556121826172
ppo/returns/mean: 1.7230733633041382
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2436it [3:06:35,  3.11s/it]

objective/kl: -56.3558349609375
ppo/returns/mean: 1.5549876689910889
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2437it [3:06:38,  3.09s/it]

objective/kl: -60.09734344482422
ppo/returns/mean: 1.7144813537597656
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2438it [3:06:41,  3.06s/it]

objective/kl: -60.11223220825195
ppo/returns/mean: 1.7024505138397217
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2439it [3:06:44,  3.04s/it]

objective/kl: -61.272613525390625
ppo/returns/mean: 1.7328733205795288
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2440it [3:06:47,  3.15s/it]

objective/kl: -62.46894836425781
ppo/returns/mean: 1.7635908126831055
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2441it [3:06:50,  3.22s/it]

objective/kl: -59.886898040771484
ppo/returns/mean: 1.6756775379180908
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2442it [3:06:53,  3.16s/it]

objective/kl: -61.94091033935547
ppo/returns/mean: 1.7490479946136475
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2443it [3:06:57,  3.15s/it]

objective/kl: -58.657745361328125
ppo/returns/mean: 1.630363941192627
ppo/policy/advantages_mean: 1.210719347000122e-08
---------------------------------------------------------------------------------------------------


2444it [3:07:00,  3.10s/it]

objective/kl: -60.95953369140625
ppo/returns/mean: 1.7182238101959229
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2445it [3:07:03,  3.09s/it]

objective/kl: -61.11068344116211
ppo/returns/mean: 1.7050364017486572
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2446it [3:07:06,  3.07s/it]

objective/kl: -61.24814224243164
ppo/returns/mean: 1.7045947313308716
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2447it [3:07:09,  3.06s/it]

objective/kl: -64.47252655029297
ppo/returns/mean: 1.7942078113555908
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2448it [3:07:12,  3.04s/it]

objective/kl: -67.59992980957031
ppo/returns/mean: 1.9182835817337036
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2449it [3:07:15,  3.03s/it]

objective/kl: -69.381591796875
ppo/returns/mean: 2.000730037689209
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2450it [3:07:18,  3.02s/it]

objective/kl: -67.06623840332031
ppo/returns/mean: 1.9433863162994385
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2451it [3:07:21,  3.01s/it]

objective/kl: -64.36931610107422
ppo/returns/mean: 1.8394259214401245
ppo/policy/advantages_mean: 4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


2452it [3:07:24,  3.02s/it]

objective/kl: -64.37296295166016
ppo/returns/mean: 1.8405141830444336
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2453it [3:07:27,  3.02s/it]

objective/kl: -65.38236999511719
ppo/returns/mean: 1.851659893989563
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2454it [3:07:30,  3.09s/it]

objective/kl: -60.10298156738281
ppo/returns/mean: 1.7045503854751587
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2455it [3:07:33,  3.07s/it]

objective/kl: -61.693756103515625
ppo/returns/mean: 1.7442939281463623
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2456it [3:07:36,  3.04s/it]

objective/kl: -66.25732421875
ppo/returns/mean: 1.8632714748382568
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2457it [3:07:39,  3.03s/it]

objective/kl: -64.89122009277344
ppo/returns/mean: 1.8184950351715088
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2458it [3:07:42,  3.04s/it]

objective/kl: -63.61478042602539
ppo/returns/mean: 1.785681128501892
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2459it [3:07:45,  3.15s/it]

objective/kl: -65.821533203125
ppo/returns/mean: 1.83636474609375
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2460it [3:07:48,  3.12s/it]

objective/kl: -63.29826736450195
ppo/returns/mean: 1.7407639026641846
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2461it [3:07:52,  3.10s/it]

objective/kl: -62.90361022949219
ppo/returns/mean: 1.7360219955444336
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2462it [3:07:55,  3.09s/it]

objective/kl: -63.21460723876953
ppo/returns/mean: 1.752659559249878
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2463it [3:07:58,  3.07s/it]

objective/kl: -67.89541625976562
ppo/returns/mean: 1.9098012447357178
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2464it [3:08:01,  3.07s/it]

objective/kl: -65.42988586425781
ppo/returns/mean: 1.8205413818359375
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2465it [3:08:04,  3.14s/it]

objective/kl: -63.26629638671875
ppo/returns/mean: 1.741662859916687
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2466it [3:08:07,  3.09s/it]

objective/kl: -67.01504516601562
ppo/returns/mean: 1.8624484539031982
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2467it [3:08:10,  3.06s/it]

objective/kl: -66.2872085571289
ppo/returns/mean: 1.855567455291748
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2468it [3:08:13,  3.04s/it]

objective/kl: -66.26868438720703
ppo/returns/mean: 1.8399629592895508
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2469it [3:08:16,  3.02s/it]

objective/kl: -64.03208923339844
ppo/returns/mean: 1.7706975936889648
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2470it [3:08:19,  3.01s/it]

objective/kl: -62.82432556152344
ppo/returns/mean: 1.7129576206207275
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2471it [3:08:22,  3.13s/it]

objective/kl: -64.44371032714844
ppo/returns/mean: 1.81596839427948
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2472it [3:08:25,  3.09s/it]

objective/kl: -62.65570831298828
ppo/returns/mean: 1.7413825988769531
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2473it [3:08:28,  3.06s/it]

objective/kl: -63.7159423828125
ppo/returns/mean: 1.7777005434036255
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2474it [3:08:31,  3.03s/it]

objective/kl: -63.25116729736328
ppo/returns/mean: 1.7727103233337402
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2475it [3:08:35,  3.12s/it]

objective/kl: -62.13011932373047
ppo/returns/mean: 1.68906831741333
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2476it [3:08:38,  3.09s/it]

objective/kl: -62.30365753173828
ppo/returns/mean: 1.6677435636520386
ppo/policy/advantages_mean: 1.210719347000122e-08
---------------------------------------------------------------------------------------------------


2477it [3:08:41,  3.21s/it]

objective/kl: -63.75313949584961
ppo/returns/mean: 1.6910148859024048
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2478it [3:08:44,  3.25s/it]

objective/kl: -66.51246643066406
ppo/returns/mean: 1.8129585981369019
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2479it [3:08:47,  3.18s/it]

objective/kl: -66.46235656738281
ppo/returns/mean: 1.8420335054397583
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2480it [3:08:50,  3.13s/it]

objective/kl: -62.85386657714844
ppo/returns/mean: 1.6952241659164429
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2481it [3:08:54,  3.11s/it]

objective/kl: -66.15919494628906
ppo/returns/mean: 1.787529706954956
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2482it [3:08:57,  3.32s/it]

objective/kl: -63.63994598388672
ppo/returns/mean: 1.7223381996154785
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2483it [3:09:00,  3.24s/it]

objective/kl: -65.837890625
ppo/returns/mean: 1.7692959308624268
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2484it [3:09:04,  3.42s/it]

objective/kl: -68.98223114013672
ppo/returns/mean: 1.8743269443511963
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2485it [3:09:07,  3.32s/it]

objective/kl: -66.52658081054688
ppo/returns/mean: 1.77239990234375
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2486it [3:09:11,  3.38s/it]

objective/kl: -67.99134826660156
ppo/returns/mean: 1.7871615886688232
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2487it [3:09:14,  3.27s/it]

objective/kl: -69.13784790039062
ppo/returns/mean: 1.826669692993164
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2488it [3:09:17,  3.28s/it]

objective/kl: -69.17474365234375
ppo/returns/mean: 1.8242424726486206
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2489it [3:09:20,  3.24s/it]

objective/kl: -69.74297332763672
ppo/returns/mean: 1.8567601442337036
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2490it [3:09:23,  3.21s/it]

objective/kl: -72.1221694946289
ppo/returns/mean: 1.8849225044250488
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2491it [3:09:26,  3.14s/it]

objective/kl: -74.37045288085938
ppo/returns/mean: 1.9416284561157227
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2492it [3:09:29,  3.10s/it]

objective/kl: -76.83180236816406
ppo/returns/mean: 2.02767014503479
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2493it [3:09:32,  3.07s/it]

objective/kl: -73.585205078125
ppo/returns/mean: 1.9553160667419434
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2494it [3:09:35,  3.04s/it]

objective/kl: -76.9853286743164
ppo/returns/mean: 2.058350086212158
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


2495it [3:09:38,  3.03s/it]

objective/kl: -76.46741485595703
ppo/returns/mean: 2.055126667022705
ppo/policy/advantages_mean: 4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


2496it [3:09:42,  3.09s/it]

objective/kl: -77.2347183227539
ppo/returns/mean: 2.0671441555023193
ppo/policy/advantages_mean: -2.421438694000244e-08
---------------------------------------------------------------------------------------------------


2497it [3:09:45,  3.06s/it]

objective/kl: -75.35022735595703
ppo/returns/mean: 2.0060877799987793
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2498it [3:09:48,  3.05s/it]

objective/kl: -78.18026733398438
ppo/returns/mean: 2.103497266769409
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2499it [3:09:51,  3.03s/it]

objective/kl: -77.16278076171875
ppo/returns/mean: 2.0887832641601562
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2500it [3:09:54,  3.03s/it]

objective/kl: -79.14527893066406
ppo/returns/mean: 2.157151699066162
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2501it [3:09:57,  3.01s/it]

objective/kl: -77.94459533691406
ppo/returns/mean: 2.1071505546569824
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2502it [3:10:00,  3.24s/it]

objective/kl: -75.57864379882812
ppo/returns/mean: 2.049605131149292
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2503it [3:10:04,  3.30s/it]

objective/kl: -76.89201354980469
ppo/returns/mean: 2.0734448432922363
ppo/policy/advantages_mean: -3.166496753692627e-08
---------------------------------------------------------------------------------------------------


2504it [3:10:07,  3.30s/it]

objective/kl: -74.05455780029297
ppo/returns/mean: 2.012423038482666
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2505it [3:10:10,  3.21s/it]

objective/kl: -77.35466003417969
ppo/returns/mean: 2.1107535362243652
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2506it [3:10:13,  3.15s/it]

objective/kl: -76.3179931640625
ppo/returns/mean: 2.0837929248809814
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2507it [3:10:16,  3.10s/it]

objective/kl: -81.6551513671875
ppo/returns/mean: 2.248305559158325
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2508it [3:10:19,  3.06s/it]

objective/kl: -77.30892944335938
ppo/returns/mean: 2.09664249420166
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2509it [3:10:22,  3.04s/it]

objective/kl: -72.53160858154297
ppo/returns/mean: 1.94684898853302
ppo/policy/advantages_mean: 1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


2510it [3:10:25,  3.03s/it]

objective/kl: -77.32588958740234
ppo/returns/mean: 2.107211112976074
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2511it [3:10:28,  3.01s/it]

objective/kl: -74.96554565429688
ppo/returns/mean: 1.9885694980621338
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2512it [3:10:31,  3.01s/it]

objective/kl: -75.82928466796875
ppo/returns/mean: 2.0102434158325195
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2513it [3:10:34,  3.01s/it]

objective/kl: -78.2525634765625
ppo/returns/mean: 2.048030376434326
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2514it [3:10:37,  3.00s/it]

objective/kl: -76.98995208740234
ppo/returns/mean: 2.0106277465820312
ppo/policy/advantages_mean: -9.313225746154785e-10
---------------------------------------------------------------------------------------------------


2515it [3:10:40,  3.01s/it]

objective/kl: -80.6486587524414
ppo/returns/mean: 2.1255874633789062
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2516it [3:10:44,  3.15s/it]

objective/kl: -81.869140625
ppo/returns/mean: 2.193298816680908
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2517it [3:10:47,  3.10s/it]

objective/kl: -78.11820220947266
ppo/returns/mean: 2.034808397293091
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2518it [3:10:50,  3.08s/it]

objective/kl: -78.21481323242188
ppo/returns/mean: 2.041217565536499
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2519it [3:10:53,  3.12s/it]

objective/kl: -76.80824279785156
ppo/returns/mean: 2.0296473503112793
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2520it [3:10:56,  3.08s/it]

objective/kl: -79.44325256347656
ppo/returns/mean: 2.136855363845825
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2521it [3:10:59,  3.06s/it]

objective/kl: -76.11689758300781
ppo/returns/mean: 2.0106427669525146
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2522it [3:11:02,  3.16s/it]

objective/kl: -79.16937255859375
ppo/returns/mean: 2.087587356567383
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2523it [3:11:05,  3.19s/it]

objective/kl: -76.98606872558594
ppo/returns/mean: 2.0081872940063477
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2524it [3:11:09,  3.32s/it]

objective/kl: -75.16340637207031
ppo/returns/mean: 1.9347023963928223
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2525it [3:11:12,  3.29s/it]

objective/kl: -78.9280014038086
ppo/returns/mean: 2.059396743774414
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2526it [3:11:16,  3.27s/it]

objective/kl: -76.67755889892578
ppo/returns/mean: 1.9786441326141357
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2527it [3:11:19,  3.28s/it]

objective/kl: -77.07243347167969
ppo/returns/mean: 1.9937191009521484
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2528it [3:11:22,  3.20s/it]

objective/kl: -79.59552001953125
ppo/returns/mean: 2.054849624633789
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2529it [3:11:26,  3.34s/it]

objective/kl: -79.01213073730469
ppo/returns/mean: 2.0391690731048584
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2530it [3:11:29,  3.36s/it]

objective/kl: -78.95309448242188
ppo/returns/mean: 2.0228559970855713
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2531it [3:11:32,  3.25s/it]

objective/kl: -79.60382843017578
ppo/returns/mean: 2.0260844230651855
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2532it [3:11:35,  3.29s/it]

objective/kl: -84.18067169189453
ppo/returns/mean: 2.190913677215576
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2533it [3:11:39,  3.28s/it]

objective/kl: -86.53384399414062
ppo/returns/mean: 2.2968173027038574
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2534it [3:11:42,  3.21s/it]

objective/kl: -83.42695617675781
ppo/returns/mean: 2.1540849208831787
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2535it [3:11:45,  3.27s/it]

objective/kl: -79.1275863647461
ppo/returns/mean: 2.0147294998168945
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2536it [3:11:48,  3.24s/it]

objective/kl: -82.93482208251953
ppo/returns/mean: 2.124729633331299
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2537it [3:11:51,  3.18s/it]

objective/kl: -83.52642822265625
ppo/returns/mean: 2.13364315032959
ppo/policy/advantages_mean: -6.51925802230835e-09
---------------------------------------------------------------------------------------------------


2538it [3:11:54,  3.12s/it]

objective/kl: -85.03692626953125
ppo/returns/mean: 2.1773180961608887
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2539it [3:11:57,  3.10s/it]

objective/kl: -81.52841186523438
ppo/returns/mean: 1.9880342483520508
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2540it [3:12:00,  3.07s/it]

objective/kl: -83.16358947753906
ppo/returns/mean: 2.005560874938965
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2541it [3:12:03,  3.06s/it]

objective/kl: -85.42601013183594
ppo/returns/mean: 2.099630117416382
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2542it [3:12:06,  3.05s/it]

objective/kl: -84.2007064819336
ppo/returns/mean: 2.0693185329437256
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


2543it [3:12:09,  3.04s/it]

objective/kl: -83.25618743896484
ppo/returns/mean: 2.014009475708008
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2544it [3:12:12,  3.04s/it]

objective/kl: -87.25730895996094
ppo/returns/mean: 2.133453369140625
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2545it [3:12:16,  3.18s/it]

objective/kl: -90.5972900390625
ppo/returns/mean: 2.2436532974243164
ppo/policy/advantages_mean: 6.51925802230835e-09
---------------------------------------------------------------------------------------------------


2546it [3:12:19,  3.17s/it]

objective/kl: -89.03520965576172
ppo/returns/mean: 2.225126266479492
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2547it [3:12:22,  3.14s/it]

objective/kl: -91.32038116455078
ppo/returns/mean: 2.304570198059082
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2548it [3:12:25,  3.11s/it]

objective/kl: -87.11519622802734
ppo/returns/mean: 2.1492717266082764
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2549it [3:12:28,  3.11s/it]

objective/kl: -89.15476989746094
ppo/returns/mean: 2.248863458633423
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2550it [3:12:31,  3.08s/it]

objective/kl: -84.84805297851562
ppo/returns/mean: 2.07521653175354
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2551it [3:12:34,  3.07s/it]

objective/kl: -87.32450866699219
ppo/returns/mean: 2.166121006011963
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2552it [3:12:38,  3.19s/it]

objective/kl: -85.61356353759766
ppo/returns/mean: 2.086702585220337
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2553it [3:12:41,  3.23s/it]

objective/kl: -86.58466339111328
ppo/returns/mean: 2.095876932144165
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2554it [3:12:44,  3.17s/it]

objective/kl: -85.55852508544922
ppo/returns/mean: 2.064131736755371
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2555it [3:12:47,  3.13s/it]

objective/kl: -92.33938598632812
ppo/returns/mean: 2.2121572494506836
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2556it [3:12:50,  3.13s/it]

objective/kl: -87.78067016601562
ppo/returns/mean: 2.1096997261047363
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2557it [3:12:53,  3.10s/it]

objective/kl: -84.53286743164062
ppo/returns/mean: 2.023754596710205
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2558it [3:12:56,  3.08s/it]

objective/kl: -88.72268676757812
ppo/returns/mean: 2.120396137237549
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2559it [3:13:00,  3.17s/it]

objective/kl: -94.9852294921875
ppo/returns/mean: 2.3462977409362793
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2560it [3:13:03,  3.30s/it]

objective/kl: -90.61031341552734
ppo/returns/mean: 2.2761361598968506
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2561it [3:13:06,  3.22s/it]

objective/kl: -89.0597915649414
ppo/returns/mean: 2.2377686500549316
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2562it [3:13:09,  3.17s/it]

objective/kl: -89.82945251464844
ppo/returns/mean: 2.261408805847168
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2563it [3:13:12,  3.13s/it]

objective/kl: -97.37138366699219
ppo/returns/mean: 2.4979875087738037
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


2564it [3:13:15,  3.10s/it]

objective/kl: -93.7431640625
ppo/returns/mean: 2.446197986602783
ppo/policy/advantages_mean: 4.98257577419281e-08
---------------------------------------------------------------------------------------------------


2565it [3:13:19,  3.07s/it]

objective/kl: -86.90567779541016
ppo/returns/mean: 2.271817684173584
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2566it [3:13:22,  3.14s/it]

objective/kl: -85.83598327636719
ppo/returns/mean: 2.2387430667877197
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2567it [3:13:25,  3.16s/it]

objective/kl: -83.34522247314453
ppo/returns/mean: 2.1267521381378174
ppo/policy/advantages_mean: 4.7497451305389404e-08
---------------------------------------------------------------------------------------------------


2568it [3:13:28,  3.12s/it]

objective/kl: -76.06355285644531
ppo/returns/mean: 1.9104125499725342
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2569it [3:13:31,  3.15s/it]

objective/kl: -76.03131103515625
ppo/returns/mean: 1.8868179321289062
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2570it [3:13:34,  3.11s/it]

objective/kl: -75.28082275390625
ppo/returns/mean: 1.835753083229065
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


2571it [3:13:37,  3.08s/it]

objective/kl: -69.15955352783203
ppo/returns/mean: 1.6662733554840088
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


2572it [3:13:40,  3.06s/it]

objective/kl: -61.84440612792969
ppo/returns/mean: 1.4299073219299316
ppo/policy/advantages_mean: 4.0978193283081055e-08
---------------------------------------------------------------------------------------------------


2573it [3:13:43,  3.04s/it]

objective/kl: -51.78253936767578
ppo/returns/mean: 1.1410552263259888
ppo/policy/advantages_mean: -5.21540641784668e-08
---------------------------------------------------------------------------------------------------


2574it [3:13:46,  3.03s/it]

objective/kl: -49.16511535644531
ppo/returns/mean: 1.0153679847717285
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2575it [3:13:50,  3.19s/it]

objective/kl: -47.28289794921875
ppo/returns/mean: 0.9253379702568054
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2576it [3:13:53,  3.14s/it]

objective/kl: -49.22996139526367
ppo/returns/mean: 0.9502251744270325
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2577it [3:13:56,  3.10s/it]

objective/kl: -48.70232391357422
ppo/returns/mean: 0.9138651490211487
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2578it [3:13:59,  3.14s/it]

objective/kl: -50.14638137817383
ppo/returns/mean: 0.9235838651657104
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2579it [3:14:02,  3.10s/it]

objective/kl: -47.48448181152344
ppo/returns/mean: 0.852240800857544
ppo/policy/advantages_mean: -3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


2580it [3:14:06,  3.32s/it]

objective/kl: -49.12316131591797
ppo/returns/mean: 0.8858544826507568
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2581it [3:14:09,  3.25s/it]

objective/kl: -49.808860778808594
ppo/returns/mean: 0.9190727472305298
ppo/policy/advantages_mean: -8.847564458847046e-09
---------------------------------------------------------------------------------------------------


2582it [3:14:12,  3.18s/it]

objective/kl: -48.978515625
ppo/returns/mean: 0.9011731147766113
ppo/policy/advantages_mean: -1.7695128917694092e-08
---------------------------------------------------------------------------------------------------


2583it [3:14:15,  3.12s/it]

objective/kl: -46.625152587890625
ppo/returns/mean: 0.8462265729904175
ppo/policy/advantages_mean: -4.6566128730773926e-08
---------------------------------------------------------------------------------------------------


2584it [3:14:18,  3.08s/it]

objective/kl: -47.52047348022461
ppo/returns/mean: 0.8556646108627319
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2585it [3:14:21,  3.05s/it]

objective/kl: -45.33414077758789
ppo/returns/mean: 0.8165628910064697
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2586it [3:14:24,  3.09s/it]

objective/kl: -49.238101959228516
ppo/returns/mean: 0.9285290241241455
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2587it [3:14:27,  3.05s/it]

objective/kl: -45.625640869140625
ppo/returns/mean: 0.8160321712493896
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2588it [3:14:31,  3.14s/it]

objective/kl: -44.92633056640625
ppo/returns/mean: 0.7872104644775391
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2589it [3:14:33,  3.09s/it]

objective/kl: -46.52626037597656
ppo/returns/mean: 0.8335865139961243
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2590it [3:14:37,  3.07s/it]

objective/kl: -52.89179611206055
ppo/returns/mean: 0.9766964912414551
ppo/policy/advantages_mean: -3.632158041000366e-08
---------------------------------------------------------------------------------------------------


2591it [3:14:40,  3.28s/it]

objective/kl: -52.21446228027344
ppo/returns/mean: 0.9840048551559448
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2592it [3:14:43,  3.20s/it]

objective/kl: -52.731834411621094
ppo/returns/mean: 1.00728178024292
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2593it [3:14:46,  3.15s/it]

objective/kl: -54.74144744873047
ppo/returns/mean: 1.0847856998443604
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2594it [3:14:50,  3.37s/it]

objective/kl: -50.72650909423828
ppo/returns/mean: 0.9772714376449585
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2595it [3:14:53,  3.26s/it]

objective/kl: -49.37156677246094
ppo/returns/mean: 0.9339281320571899
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2596it [3:14:56,  3.19s/it]

objective/kl: -51.9078254699707
ppo/returns/mean: 0.9742037057876587
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2597it [3:14:59,  3.14s/it]

objective/kl: -49.06282043457031
ppo/returns/mean: 0.8833814859390259
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2598it [3:15:02,  3.14s/it]

objective/kl: -48.388362884521484
ppo/returns/mean: 0.861891508102417
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2599it [3:15:06,  3.15s/it]

objective/kl: -51.26337432861328
ppo/returns/mean: 0.960228681564331
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2600it [3:15:09,  3.14s/it]

objective/kl: -50.06193161010742
ppo/returns/mean: 0.9035285115242004
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2601it [3:15:12,  3.11s/it]

objective/kl: -48.66481399536133
ppo/returns/mean: 0.8428441882133484
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2602it [3:15:15,  3.08s/it]

objective/kl: -52.425697326660156
ppo/returns/mean: 0.9748209118843079
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2603it [3:15:18,  3.06s/it]

objective/kl: -52.51243209838867
ppo/returns/mean: 1.000846266746521
ppo/policy/advantages_mean: 2.421438694000244e-08
---------------------------------------------------------------------------------------------------


2604it [3:15:21,  3.08s/it]

objective/kl: -48.52735137939453
ppo/returns/mean: 0.8600653409957886
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2605it [3:15:24,  3.06s/it]

objective/kl: -47.0345344543457
ppo/returns/mean: 0.839441180229187
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2606it [3:15:27,  3.05s/it]

objective/kl: -49.13520812988281
ppo/returns/mean: 0.9159866571426392
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2607it [3:15:30,  3.13s/it]

objective/kl: -49.1279411315918
ppo/returns/mean: 0.9110411405563354
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2608it [3:15:34,  3.17s/it]

objective/kl: -50.222564697265625
ppo/returns/mean: 0.9252303838729858
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2609it [3:15:37,  3.13s/it]

objective/kl: -48.79215621948242
ppo/returns/mean: 0.8748172521591187
ppo/policy/advantages_mean: -4.190951585769653e-09
---------------------------------------------------------------------------------------------------


2610it [3:15:40,  3.10s/it]

objective/kl: -51.525978088378906
ppo/returns/mean: 0.9980040788650513
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2611it [3:15:43,  3.07s/it]

objective/kl: -49.672523498535156
ppo/returns/mean: 0.9055830240249634
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2612it [3:15:46,  3.07s/it]

objective/kl: -51.65547180175781
ppo/returns/mean: 0.9813422560691833
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2613it [3:15:49,  3.05s/it]

objective/kl: -50.64525604248047
ppo/returns/mean: 0.9863181114196777
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2614it [3:15:52,  3.07s/it]

objective/kl: -51.33588409423828
ppo/returns/mean: 1.0050235986709595
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2615it [3:15:55,  3.05s/it]

objective/kl: -50.24964141845703
ppo/returns/mean: 0.956086277961731
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2616it [3:15:58,  3.14s/it]

objective/kl: -53.14513397216797
ppo/returns/mean: 1.0964081287384033
ppo/policy/advantages_mean: 1.210719347000122e-08
---------------------------------------------------------------------------------------------------


2617it [3:16:01,  3.10s/it]

objective/kl: -57.965065002441406
ppo/returns/mean: 1.269801139831543
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2618it [3:16:04,  3.12s/it]

objective/kl: -62.29924011230469
ppo/returns/mean: 1.4487265348434448
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2619it [3:16:08,  3.20s/it]

objective/kl: -61.624393463134766
ppo/returns/mean: 1.474419116973877
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2620it [3:16:11,  3.30s/it]

objective/kl: -59.01837158203125
ppo/returns/mean: 1.4554085731506348
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2621it [3:16:14,  3.21s/it]

objective/kl: -55.01155090332031
ppo/returns/mean: 1.3611881732940674
ppo/policy/advantages_mean: -1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


2622it [3:16:17,  3.14s/it]

objective/kl: -51.30876159667969
ppo/returns/mean: 1.3225343227386475
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2623it [3:16:20,  3.11s/it]

objective/kl: -53.989036560058594
ppo/returns/mean: 1.3970520496368408
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2624it [3:16:23,  3.12s/it]

objective/kl: -51.500431060791016
ppo/returns/mean: 1.3403937816619873
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2625it [3:16:26,  3.08s/it]

objective/kl: -45.32759094238281
ppo/returns/mean: 1.1978936195373535
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2626it [3:16:29,  3.07s/it]

objective/kl: -45.92922592163086
ppo/returns/mean: 1.1830544471740723
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2627it [3:16:32,  3.06s/it]

objective/kl: -45.295108795166016
ppo/returns/mean: 1.194429874420166
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2628it [3:16:35,  3.04s/it]

objective/kl: -43.032127380371094
ppo/returns/mean: 1.0995616912841797
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2629it [3:16:38,  3.04s/it]

objective/kl: -42.46990966796875
ppo/returns/mean: 1.082993984222412
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2630it [3:16:41,  3.04s/it]

objective/kl: -44.601776123046875
ppo/returns/mean: 1.1347945928573608
ppo/policy/advantages_mean: -2.8870999813079834e-08
---------------------------------------------------------------------------------------------------


2631it [3:16:45,  3.03s/it]

objective/kl: -42.686012268066406
ppo/returns/mean: 1.0810006856918335
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2632it [3:16:48,  3.03s/it]

objective/kl: -42.47462463378906
ppo/returns/mean: 1.072401523590088
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2633it [3:16:51,  3.04s/it]

objective/kl: -42.582366943359375
ppo/returns/mean: 1.0836288928985596
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2634it [3:16:54,  3.08s/it]

objective/kl: -40.471187591552734
ppo/returns/mean: 1.0074167251586914
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2635it [3:16:57,  3.18s/it]

objective/kl: -41.27367401123047
ppo/returns/mean: 1.0336118936538696
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2636it [3:17:00,  3.13s/it]

objective/kl: -42.370758056640625
ppo/returns/mean: 1.0375890731811523
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2637it [3:17:03,  3.14s/it]

objective/kl: -40.46629333496094
ppo/returns/mean: 0.997449517250061
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2638it [3:17:06,  3.11s/it]

objective/kl: -41.53327941894531
ppo/returns/mean: 1.0038800239562988
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2639it [3:17:09,  3.08s/it]

objective/kl: -41.64960479736328
ppo/returns/mean: 1.0310966968536377
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2640it [3:17:12,  3.06s/it]

objective/kl: -40.69200897216797
ppo/returns/mean: 0.9929380416870117
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2641it [3:17:15,  3.05s/it]

objective/kl: -38.96845626831055
ppo/returns/mean: 0.9439053535461426
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2642it [3:17:18,  3.03s/it]

objective/kl: -40.041873931884766
ppo/returns/mean: 0.9606075286865234
ppo/policy/advantages_mean: 1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


2643it [3:17:22,  3.10s/it]

objective/kl: -41.42787551879883
ppo/returns/mean: 0.9760010242462158
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2644it [3:17:25,  3.09s/it]

objective/kl: -39.97032165527344
ppo/returns/mean: 0.9464871883392334
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2645it [3:17:28,  3.08s/it]

objective/kl: -41.792388916015625
ppo/returns/mean: 0.9920308589935303
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2646it [3:17:31,  3.07s/it]

objective/kl: -42.07863998413086
ppo/returns/mean: 1.0033698081970215
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2647it [3:17:34,  3.05s/it]

objective/kl: -44.75031661987305
ppo/returns/mean: 1.0618699789047241
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2648it [3:17:37,  3.22s/it]

objective/kl: -44.470924377441406
ppo/returns/mean: 1.0664231777191162
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2649it [3:17:41,  3.20s/it]

objective/kl: -43.4681510925293
ppo/returns/mean: 1.061201810836792
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2650it [3:17:44,  3.15s/it]

objective/kl: -43.117374420166016
ppo/returns/mean: 1.0650396347045898
ppo/policy/advantages_mean: -2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


2651it [3:17:47,  3.13s/it]

objective/kl: -42.943363189697266
ppo/returns/mean: 1.0674934387207031
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2652it [3:17:50,  3.12s/it]

objective/kl: -44.66923904418945
ppo/returns/mean: 1.1094400882720947
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2653it [3:17:53,  3.20s/it]

objective/kl: -44.18096160888672
ppo/returns/mean: 1.1233350038528442
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2654it [3:17:56,  3.16s/it]

objective/kl: -45.51544189453125
ppo/returns/mean: 1.1352858543395996
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2655it [3:17:59,  3.12s/it]

objective/kl: -44.147705078125
ppo/returns/mean: 1.1190898418426514
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2656it [3:18:02,  3.09s/it]

objective/kl: -44.24225616455078
ppo/returns/mean: 1.1222314834594727
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2657it [3:18:05,  3.09s/it]

objective/kl: -44.0742073059082
ppo/returns/mean: 1.1230677366256714
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2658it [3:18:08,  3.07s/it]

objective/kl: -48.347755432128906
ppo/returns/mean: 1.2039284706115723
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2659it [3:18:12,  3.07s/it]

objective/kl: -54.6401252746582
ppo/returns/mean: 1.3215117454528809
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


2660it [3:18:15,  3.06s/it]

objective/kl: -53.899574279785156
ppo/returns/mean: 1.3185007572174072
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2661it [3:18:18,  3.05s/it]

objective/kl: -49.78363800048828
ppo/returns/mean: 1.233431100845337
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2662it [3:18:21,  3.05s/it]

objective/kl: -51.06585693359375
ppo/returns/mean: 1.2625484466552734
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2663it [3:18:24,  3.04s/it]

objective/kl: -49.22496032714844
ppo/returns/mean: 1.2220313549041748
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2664it [3:18:27,  3.04s/it]

objective/kl: -49.41658020019531
ppo/returns/mean: 1.228265404701233
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2665it [3:18:30,  3.05s/it]

objective/kl: -48.55434036254883
ppo/returns/mean: 1.2109684944152832
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2666it [3:18:33,  3.09s/it]

objective/kl: -49.852752685546875
ppo/returns/mean: 1.2340328693389893
ppo/policy/advantages_mean: -8.381903171539307e-09
---------------------------------------------------------------------------------------------------


2667it [3:18:36,  3.10s/it]

objective/kl: -47.09307861328125
ppo/returns/mean: 1.1616554260253906
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2668it [3:18:39,  3.12s/it]

objective/kl: -48.006683349609375
ppo/returns/mean: 1.1888118982315063
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2669it [3:18:42,  3.12s/it]

objective/kl: -48.6783332824707
ppo/returns/mean: 1.2012115716934204
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2670it [3:18:46,  3.12s/it]

objective/kl: -47.546695709228516
ppo/returns/mean: 1.1677157878875732
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2671it [3:18:49,  3.13s/it]

objective/kl: -49.932655334472656
ppo/returns/mean: 1.2180259227752686
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2672it [3:18:52,  3.14s/it]

objective/kl: -47.5623664855957
ppo/returns/mean: 1.158368468284607
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2673it [3:18:55,  3.14s/it]

objective/kl: -50.08186340332031
ppo/returns/mean: 1.2171032428741455
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2674it [3:18:58,  3.13s/it]

objective/kl: -49.59595489501953
ppo/returns/mean: 1.2084918022155762
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2675it [3:19:01,  3.13s/it]

objective/kl: -50.77336502075195
ppo/returns/mean: 1.2241017818450928
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2676it [3:19:04,  3.13s/it]

objective/kl: -49.37958526611328
ppo/returns/mean: 1.1888644695281982
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2677it [3:19:08,  3.18s/it]

objective/kl: -51.465599060058594
ppo/returns/mean: 1.242321491241455
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2678it [3:19:11,  3.16s/it]

objective/kl: -50.70256805419922
ppo/returns/mean: 1.2338911294937134
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2679it [3:19:14,  3.14s/it]

objective/kl: -52.12898635864258
ppo/returns/mean: 1.2503507137298584
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2680it [3:19:17,  3.13s/it]

objective/kl: -49.664947509765625
ppo/returns/mean: 1.2126750946044922
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2681it [3:19:20,  3.10s/it]

objective/kl: -51.04901123046875
ppo/returns/mean: 1.2391698360443115
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2682it [3:19:23,  3.07s/it]

objective/kl: -49.61327362060547
ppo/returns/mean: 1.1835675239562988
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2683it [3:19:26,  3.04s/it]

objective/kl: -51.1041259765625
ppo/returns/mean: 1.230705976486206
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2684it [3:19:29,  3.04s/it]

objective/kl: -50.76007843017578
ppo/returns/mean: 1.2189688682556152
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2685it [3:19:32,  3.03s/it]

objective/kl: -50.97736358642578
ppo/returns/mean: 1.214348554611206
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2686it [3:19:35,  3.03s/it]

objective/kl: -50.436607360839844
ppo/returns/mean: 1.2008540630340576
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2687it [3:19:38,  3.04s/it]

objective/kl: -53.21623992919922
ppo/returns/mean: 1.258376955986023
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2688it [3:19:41,  3.08s/it]

objective/kl: -53.68767166137695
ppo/returns/mean: 1.2969504594802856
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2689it [3:19:44,  3.08s/it]

objective/kl: -54.28659439086914
ppo/returns/mean: 1.2798022031784058
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2690it [3:19:47,  3.05s/it]

objective/kl: -58.02730941772461
ppo/returns/mean: 1.3663556575775146
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2691it [3:19:51,  3.20s/it]

objective/kl: -57.06510925292969
ppo/returns/mean: 1.337113380432129
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2692it [3:19:54,  3.18s/it]

objective/kl: -55.698333740234375
ppo/returns/mean: 1.300577163696289
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2693it [3:19:57,  3.14s/it]

objective/kl: -52.424720764160156
ppo/returns/mean: 1.2138943672180176
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2694it [3:20:00,  3.12s/it]

objective/kl: -54.65782165527344
ppo/returns/mean: 1.2821226119995117
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2695it [3:20:03,  3.10s/it]

objective/kl: -51.11238098144531
ppo/returns/mean: 1.1676790714263916
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2696it [3:20:06,  3.07s/it]

objective/kl: -58.11573028564453
ppo/returns/mean: 1.342505931854248
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2697it [3:20:09,  3.05s/it]

objective/kl: -56.94742965698242
ppo/returns/mean: 1.3104060888290405
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2698it [3:20:12,  3.04s/it]

objective/kl: -55.73986053466797
ppo/returns/mean: 1.2844792604446411
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2699it [3:20:15,  3.03s/it]

objective/kl: -54.96031188964844
ppo/returns/mean: 1.2551584243774414
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2700it [3:20:18,  3.01s/it]

objective/kl: -54.55289077758789
ppo/returns/mean: 1.2527425289154053
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2701it [3:20:21,  3.00s/it]

objective/kl: -57.97761154174805
ppo/returns/mean: 1.3615140914916992
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2702it [3:20:25,  3.18s/it]

objective/kl: -57.81719970703125
ppo/returns/mean: 1.3502237796783447
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2703it [3:20:28,  3.12s/it]

objective/kl: -57.42401123046875
ppo/returns/mean: 1.3403514623641968
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2704it [3:20:31,  3.17s/it]

objective/kl: -60.41902160644531
ppo/returns/mean: 1.4308981895446777
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2705it [3:20:35,  3.39s/it]

objective/kl: -59.74872970581055
ppo/returns/mean: 1.4212620258331299
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2706it [3:20:38,  3.37s/it]

objective/kl: -59.90478515625
ppo/returns/mean: 1.4208406209945679
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2707it [3:20:41,  3.27s/it]

objective/kl: -57.34688949584961
ppo/returns/mean: 1.3741414546966553
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2708it [3:20:45,  3.32s/it]

objective/kl: -57.03422927856445
ppo/returns/mean: 1.3792208433151245
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2709it [3:20:48,  3.23s/it]

objective/kl: -58.60683822631836
ppo/returns/mean: 1.4178881645202637
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2710it [3:20:51,  3.17s/it]

objective/kl: -56.803260803222656
ppo/returns/mean: 1.3712817430496216
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2711it [3:20:54,  3.22s/it]

objective/kl: -56.38457489013672
ppo/returns/mean: 1.3517932891845703
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2712it [3:20:57,  3.16s/it]

objective/kl: -55.9786376953125
ppo/returns/mean: 1.3580642938613892
ppo/policy/advantages_mean: 3.166496753692627e-08
---------------------------------------------------------------------------------------------------


2713it [3:21:00,  3.12s/it]

objective/kl: -56.23147201538086
ppo/returns/mean: 1.3537750244140625
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2714it [3:21:03,  3.17s/it]

objective/kl: -55.849998474121094
ppo/returns/mean: 1.323939561843872
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2715it [3:21:06,  3.13s/it]

objective/kl: -55.34490966796875
ppo/returns/mean: 1.3446351289749146
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2716it [3:21:09,  3.10s/it]

objective/kl: -56.49272537231445
ppo/returns/mean: 1.3481669425964355
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2717it [3:21:13,  3.07s/it]

objective/kl: -55.52285385131836
ppo/returns/mean: 1.3311011791229248
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2718it [3:21:16,  3.06s/it]

objective/kl: -56.118492126464844
ppo/returns/mean: 1.3485674858093262
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2719it [3:21:19,  3.18s/it]

objective/kl: -57.050514221191406
ppo/returns/mean: 1.3669939041137695
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2720it [3:21:22,  3.16s/it]

objective/kl: -55.375484466552734
ppo/returns/mean: 1.3414535522460938
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2721it [3:21:25,  3.12s/it]

objective/kl: -54.92900848388672
ppo/returns/mean: 1.3246264457702637
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2722it [3:21:28,  3.09s/it]

objective/kl: -56.234046936035156
ppo/returns/mean: 1.3659240007400513
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2723it [3:21:31,  3.07s/it]

objective/kl: -56.36126708984375
ppo/returns/mean: 1.3660475015640259
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2724it [3:21:34,  3.05s/it]

objective/kl: -55.76924133300781
ppo/returns/mean: 1.3306130170822144
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2725it [3:21:37,  3.04s/it]

objective/kl: -55.60515594482422
ppo/returns/mean: 1.3526935577392578
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2726it [3:21:41,  3.20s/it]

objective/kl: -58.56689453125
ppo/returns/mean: 1.3874017000198364
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2727it [3:21:44,  3.15s/it]

objective/kl: -55.39307403564453
ppo/returns/mean: 1.3100037574768066
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2728it [3:21:47,  3.23s/it]

objective/kl: -55.87025451660156
ppo/returns/mean: 1.2784392833709717
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2729it [3:21:50,  3.17s/it]

objective/kl: -58.45988464355469
ppo/returns/mean: 1.3344190120697021
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2730it [3:21:53,  3.13s/it]

objective/kl: -60.23323059082031
ppo/returns/mean: 1.3924684524536133
ppo/policy/advantages_mean: 2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


2731it [3:21:56,  3.10s/it]

objective/kl: -62.22377395629883
ppo/returns/mean: 1.469396710395813
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2732it [3:22:00,  3.30s/it]

objective/kl: -60.52827835083008
ppo/returns/mean: 1.4349346160888672
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2733it [3:22:03,  3.25s/it]

objective/kl: -61.37237548828125
ppo/returns/mean: 1.4690791368484497
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2734it [3:22:06,  3.19s/it]

objective/kl: -60.22880554199219
ppo/returns/mean: 1.4225640296936035
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2735it [3:22:09,  3.14s/it]

objective/kl: -60.00954818725586
ppo/returns/mean: 1.4172711372375488
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2736it [3:22:12,  3.11s/it]

objective/kl: -61.52349090576172
ppo/returns/mean: 1.449446678161621
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2737it [3:22:15,  3.08s/it]

objective/kl: -61.7740364074707
ppo/returns/mean: 1.465295433998108
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2738it [3:22:18,  3.07s/it]

objective/kl: -59.496429443359375
ppo/returns/mean: 1.4107134342193604
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2739it [3:22:21,  3.05s/it]

objective/kl: -57.7784423828125
ppo/returns/mean: 1.371795892715454
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2740it [3:22:25,  3.07s/it]

objective/kl: -58.58699035644531
ppo/returns/mean: 1.3894463777542114
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2741it [3:22:28,  3.08s/it]

objective/kl: -60.43099594116211
ppo/returns/mean: 1.4076924324035645
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2742it [3:22:31,  3.06s/it]

objective/kl: -59.38889694213867
ppo/returns/mean: 1.3970930576324463
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2743it [3:22:34,  3.12s/it]

objective/kl: -59.660282135009766
ppo/returns/mean: 1.3955752849578857
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


2744it [3:22:37,  3.22s/it]

objective/kl: -60.108707427978516
ppo/returns/mean: 1.3852604627609253
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2745it [3:22:40,  3.15s/it]

objective/kl: -62.69395446777344
ppo/returns/mean: 1.4625873565673828
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2746it [3:22:43,  3.11s/it]

objective/kl: -66.6239013671875
ppo/returns/mean: 1.5695053339004517
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2747it [3:22:46,  3.08s/it]

objective/kl: -67.3689956665039
ppo/returns/mean: 1.5779829025268555
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2748it [3:22:49,  3.06s/it]

objective/kl: -68.8298568725586
ppo/returns/mean: 1.6264677047729492
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2749it [3:22:52,  3.07s/it]

objective/kl: -69.18932342529297
ppo/returns/mean: 1.6295870542526245
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2750it [3:22:56,  3.20s/it]

objective/kl: -66.06639099121094
ppo/returns/mean: 1.569683313369751
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2751it [3:22:59,  3.29s/it]

objective/kl: -65.72044372558594
ppo/returns/mean: 1.5558459758758545
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2752it [3:23:02,  3.20s/it]

objective/kl: -63.699527740478516
ppo/returns/mean: 1.5512418746948242
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2753it [3:23:07,  3.51s/it]

objective/kl: -65.24485778808594
ppo/returns/mean: 1.5500028133392334
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2754it [3:23:10,  3.35s/it]

objective/kl: -67.2706298828125
ppo/returns/mean: 1.6035709381103516
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2755it [3:23:13,  3.39s/it]

objective/kl: -66.91036987304688
ppo/returns/mean: 1.5888206958770752
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2756it [3:23:16,  3.36s/it]

objective/kl: -64.92507934570312
ppo/returns/mean: 1.4956486225128174
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2757it [3:23:20,  3.33s/it]

objective/kl: -64.67332458496094
ppo/returns/mean: 1.4931166172027588
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2758it [3:23:23,  3.36s/it]

objective/kl: -67.40829467773438
ppo/returns/mean: 1.4970535039901733
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2759it [3:23:26,  3.25s/it]

objective/kl: -61.0614013671875
ppo/returns/mean: 1.249009370803833
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2760it [3:23:29,  3.24s/it]

objective/kl: -57.25166320800781
ppo/returns/mean: 0.9605276584625244
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


2761it [3:23:32,  3.16s/it]

objective/kl: -40.19911575317383
ppo/returns/mean: 0.3220937252044678
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2762it [3:23:35,  3.11s/it]

objective/kl: -47.97985076904297
ppo/returns/mean: 0.4013465344905853
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2763it [3:23:38,  3.07s/it]

objective/kl: -55.625274658203125
ppo/returns/mean: 0.5324106216430664
ppo/policy/advantages_mean: -0.00022375211119651794
---------------------------------------------------------------------------------------------------


2764it [3:23:42,  3.33s/it]

objective/kl: -80.23568725585938
ppo/returns/mean: 0.9576387405395508
ppo/policy/advantages_mean: 0.0005457010120153427
---------------------------------------------------------------------------------------------------


2765it [3:23:45,  3.24s/it]

objective/kl: -82.1410140991211
ppo/returns/mean: 1.0664668083190918
ppo/policy/advantages_mean: -0.0012331334874033928
---------------------------------------------------------------------------------------------------


2766it [3:23:49,  3.32s/it]

objective/kl: -116.58338165283203
ppo/returns/mean: 2.331435203552246
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2767it [3:23:52,  3.24s/it]

objective/kl: -116.63771057128906
ppo/returns/mean: 2.397068738937378
ppo/policy/advantages_mean: 5.774199962615967e-08
---------------------------------------------------------------------------------------------------


2768it [3:23:55,  3.28s/it]

objective/kl: -111.17776489257812
ppo/returns/mean: 2.3487634658813477
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2769it [3:23:58,  3.23s/it]

objective/kl: -101.78607940673828
ppo/returns/mean: 2.165134906768799
ppo/policy/advantages_mean: 3.259629011154175e-09
---------------------------------------------------------------------------------------------------


2770it [3:24:02,  3.29s/it]

objective/kl: -98.04523468017578
ppo/returns/mean: 2.1023812294006348
ppo/policy/advantages_mean: 4.0046870708465576e-08
---------------------------------------------------------------------------------------------------


2771it [3:24:05,  3.21s/it]

objective/kl: -94.83590698242188
ppo/returns/mean: 2.092696189880371
ppo/policy/advantages_mean: -6.51925802230835e-09
---------------------------------------------------------------------------------------------------


2772it [3:24:08,  3.15s/it]

objective/kl: -92.86619567871094
ppo/returns/mean: 2.1062541007995605
ppo/policy/advantages_mean: -3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


2773it [3:24:11,  3.18s/it]

objective/kl: -86.60945892333984
ppo/returns/mean: 2.00457501411438
ppo/policy/advantages_mean: -1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


2774it [3:24:14,  3.13s/it]

objective/kl: -81.60098266601562
ppo/returns/mean: 1.907575249671936
ppo/policy/advantages_mean: 1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


2775it [3:24:17,  3.10s/it]

objective/kl: -72.96368408203125
ppo/returns/mean: 1.710386872291565
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2776it [3:24:21,  3.24s/it]

objective/kl: -71.35173034667969
ppo/returns/mean: 1.663508415222168
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2777it [3:24:24,  3.17s/it]

objective/kl: -69.60955810546875
ppo/returns/mean: 1.616817831993103
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2778it [3:24:27,  3.13s/it]

objective/kl: -72.28470611572266
ppo/returns/mean: 1.6678128242492676
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2779it [3:24:30,  3.10s/it]

objective/kl: -73.58012390136719
ppo/returns/mean: 1.6665598154067993
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2780it [3:24:33,  3.09s/it]

objective/kl: -76.11045837402344
ppo/returns/mean: 1.7088830471038818
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2781it [3:24:36,  3.22s/it]

objective/kl: -77.28016662597656
ppo/returns/mean: 1.6959141492843628
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2782it [3:24:39,  3.17s/it]

objective/kl: -79.85115051269531
ppo/returns/mean: 1.727454423904419
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2783it [3:24:42,  3.12s/it]

objective/kl: -78.7604751586914
ppo/returns/mean: 1.6775190830230713
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2784it [3:24:45,  3.10s/it]

objective/kl: -79.67899322509766
ppo/returns/mean: 1.6944143772125244
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2785it [3:24:48,  3.07s/it]

objective/kl: -81.62388610839844
ppo/returns/mean: 1.7214922904968262
ppo/policy/advantages_mean: 2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


2786it [3:24:51,  3.05s/it]

objective/kl: -83.03759765625
ppo/returns/mean: 1.7441198825836182
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2787it [3:24:54,  3.06s/it]

objective/kl: -84.63157653808594
ppo/returns/mean: 1.7870745658874512
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2788it [3:24:58,  3.06s/it]

objective/kl: -84.84012603759766
ppo/returns/mean: 1.7909679412841797
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2789it [3:25:02,  3.39s/it]

objective/kl: -83.30136108398438
ppo/returns/mean: 1.7719757556915283
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2790it [3:25:05,  3.28s/it]

objective/kl: -83.05375671386719
ppo/returns/mean: 1.7924469709396362
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2791it [3:25:08,  3.23s/it]

objective/kl: -81.59606170654297
ppo/returns/mean: 1.7506346702575684
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2792it [3:25:11,  3.16s/it]

objective/kl: -81.09090423583984
ppo/returns/mean: 1.7447545528411865
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2793it [3:25:14,  3.13s/it]

objective/kl: -82.66673278808594
ppo/returns/mean: 1.7702407836914062
ppo/policy/advantages_mean: 2.421438694000244e-08
---------------------------------------------------------------------------------------------------


2794it [3:25:17,  3.12s/it]

objective/kl: -81.65276336669922
ppo/returns/mean: 1.7622544765472412
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2795it [3:25:20,  3.17s/it]

objective/kl: -80.50636291503906
ppo/returns/mean: 1.7174878120422363
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2796it [3:25:23,  3.12s/it]

objective/kl: -80.292236328125
ppo/returns/mean: 1.6964051723480225
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2797it [3:25:26,  3.07s/it]

objective/kl: -79.78097534179688
ppo/returns/mean: 1.6874239444732666
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2798it [3:25:29,  3.04s/it]

objective/kl: -80.12191772460938
ppo/returns/mean: 1.6868352890014648
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2799it [3:25:32,  3.05s/it]

objective/kl: -83.06095123291016
ppo/returns/mean: 1.7728755474090576
ppo/policy/advantages_mean: -1.7695128917694092e-08
---------------------------------------------------------------------------------------------------


2800it [3:25:35,  3.08s/it]

objective/kl: -83.25465393066406
ppo/returns/mean: 1.7644492387771606
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2801it [3:25:39,  3.09s/it]

objective/kl: -83.72654724121094
ppo/returns/mean: 1.7907264232635498
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2802it [3:25:42,  3.06s/it]

objective/kl: -83.41889190673828
ppo/returns/mean: 1.7751951217651367
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2803it [3:25:45,  3.05s/it]

objective/kl: -83.39237213134766
ppo/returns/mean: 1.7813341617584229
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2804it [3:25:48,  3.04s/it]

objective/kl: -81.33413696289062
ppo/returns/mean: 1.7433960437774658
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2805it [3:25:51,  3.10s/it]

objective/kl: -84.61036682128906
ppo/returns/mean: 1.8157048225402832
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2806it [3:25:54,  3.09s/it]

objective/kl: -83.97447204589844
ppo/returns/mean: 1.790292739868164
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2807it [3:25:57,  3.07s/it]

objective/kl: -83.78974914550781
ppo/returns/mean: 1.792402982711792
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2808it [3:26:00,  3.05s/it]

objective/kl: -82.21408081054688
ppo/returns/mean: 1.7556915283203125
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2809it [3:26:03,  3.04s/it]

objective/kl: -84.12812805175781
ppo/returns/mean: 1.797663927078247
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2810it [3:26:06,  3.03s/it]

objective/kl: -83.70613861083984
ppo/returns/mean: 1.7905604839324951
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2811it [3:26:09,  3.04s/it]

objective/kl: -82.54753875732422
ppo/returns/mean: 1.7501144409179688
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2812it [3:26:12,  3.07s/it]

objective/kl: -82.99407958984375
ppo/returns/mean: 1.7699596881866455
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2813it [3:26:15,  3.05s/it]

objective/kl: -83.89038848876953
ppo/returns/mean: 1.7939622402191162
ppo/policy/advantages_mean: 4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


2814it [3:26:18,  3.04s/it]

objective/kl: -84.51868438720703
ppo/returns/mean: 1.7984580993652344
ppo/policy/advantages_mean: 3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


2815it [3:26:21,  3.04s/it]

objective/kl: -83.72158813476562
ppo/returns/mean: 1.803701400756836
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2816it [3:26:24,  3.04s/it]

objective/kl: -83.5720443725586
ppo/returns/mean: 1.817434310913086
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2817it [3:26:27,  3.03s/it]

objective/kl: -85.24142456054688
ppo/returns/mean: 1.861246109008789
ppo/policy/advantages_mean: 6.51925802230835e-09
---------------------------------------------------------------------------------------------------


2818it [3:26:30,  3.07s/it]

objective/kl: -87.87409210205078
ppo/returns/mean: 1.931341290473938
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2819it [3:26:34,  3.18s/it]

objective/kl: -88.43733215332031
ppo/returns/mean: 1.970526933670044
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2820it [3:26:38,  3.42s/it]

objective/kl: -84.39737701416016
ppo/returns/mean: 1.890090823173523
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2821it [3:26:42,  3.52s/it]

objective/kl: -81.49845886230469
ppo/returns/mean: 1.8433315753936768
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2822it [3:26:45,  3.39s/it]

objective/kl: -81.27981567382812
ppo/returns/mean: 1.8695085048675537
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2823it [3:26:48,  3.31s/it]

objective/kl: -85.88948059082031
ppo/returns/mean: 1.9495964050292969
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2824it [3:26:52,  3.45s/it]

objective/kl: -80.65718841552734
ppo/returns/mean: 1.8115980625152588
ppo/policy/advantages_mean: 2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


2825it [3:26:56,  3.68s/it]

objective/kl: -86.9546890258789
ppo/returns/mean: 1.9006297588348389
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2826it [3:26:59,  3.60s/it]

objective/kl: -92.4007568359375
ppo/returns/mean: 1.9409153461456299
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2827it [3:27:02,  3.43s/it]

objective/kl: -95.72325134277344
ppo/returns/mean: 1.9746636152267456
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2828it [3:27:06,  3.51s/it]

objective/kl: -94.64935302734375
ppo/returns/mean: 1.9730736017227173
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2829it [3:27:09,  3.37s/it]

objective/kl: -94.20814514160156
ppo/returns/mean: 1.9619029760360718
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2830it [3:27:12,  3.28s/it]

objective/kl: -89.05205535888672
ppo/returns/mean: 1.8479437828063965
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2831it [3:27:15,  3.20s/it]

objective/kl: -89.06498718261719
ppo/returns/mean: 1.8465147018432617
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2832it [3:27:18,  3.15s/it]

objective/kl: -93.22929382324219
ppo/returns/mean: 1.9372774362564087
ppo/policy/advantages_mean: -3.818422555923462e-08
---------------------------------------------------------------------------------------------------


2833it [3:27:21,  3.12s/it]

objective/kl: -92.20162963867188
ppo/returns/mean: 1.9248546361923218
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2834it [3:27:24,  3.10s/it]

objective/kl: -91.99473571777344
ppo/returns/mean: 1.8878414630889893
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2835it [3:27:27,  3.14s/it]

objective/kl: -88.37435913085938
ppo/returns/mean: 1.7986398935317993
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2836it [3:27:31,  3.16s/it]

objective/kl: -93.76153564453125
ppo/returns/mean: 1.8973186016082764
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2837it [3:27:34,  3.12s/it]

objective/kl: -92.20399475097656
ppo/returns/mean: 1.8607136011123657
ppo/policy/advantages_mean: 1.7695128917694092e-08
---------------------------------------------------------------------------------------------------


2838it [3:27:37,  3.10s/it]

objective/kl: -92.10001373291016
ppo/returns/mean: 1.8507115840911865
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2839it [3:27:40,  3.07s/it]

objective/kl: -91.35820007324219
ppo/returns/mean: 1.8160059452056885
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2840it [3:27:43,  3.05s/it]

objective/kl: -92.33323669433594
ppo/returns/mean: 1.840927243232727
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2841it [3:27:46,  3.04s/it]

objective/kl: -94.97921752929688
ppo/returns/mean: 1.9039841890335083
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2842it [3:27:49,  3.04s/it]

objective/kl: -95.24250793457031
ppo/returns/mean: 1.8870904445648193
ppo/policy/advantages_mean: -6.51925802230835e-09
---------------------------------------------------------------------------------------------------


2843it [3:27:52,  3.06s/it]

objective/kl: -93.91410827636719
ppo/returns/mean: 1.8673644065856934
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2844it [3:27:55,  3.08s/it]

objective/kl: -97.43226623535156
ppo/returns/mean: 1.9384078979492188
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2845it [3:27:58,  3.07s/it]

objective/kl: -98.9846420288086
ppo/returns/mean: 1.9785542488098145
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2846it [3:28:01,  3.07s/it]

objective/kl: -100.37471008300781
ppo/returns/mean: 2.0013465881347656
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2847it [3:28:04,  3.05s/it]

objective/kl: -100.38679504394531
ppo/returns/mean: 2.0400702953338623
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


2848it [3:28:07,  3.04s/it]

objective/kl: -100.72293090820312
ppo/returns/mean: 2.071150541305542
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2849it [3:28:11,  3.30s/it]

objective/kl: -102.04438781738281
ppo/returns/mean: 2.0944747924804688
ppo/policy/advantages_mean: 1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


2850it [3:28:14,  3.22s/it]

objective/kl: -102.34330749511719
ppo/returns/mean: 2.1162991523742676
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2851it [3:28:17,  3.16s/it]

objective/kl: -101.71475219726562
ppo/returns/mean: 2.105023145675659
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2852it [3:28:20,  3.12s/it]

objective/kl: -100.63072204589844
ppo/returns/mean: 2.0927090644836426
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2853it [3:28:23,  3.08s/it]

objective/kl: -99.63240051269531
ppo/returns/mean: 2.0507972240448
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2854it [3:28:26,  3.07s/it]

objective/kl: -102.50511169433594
ppo/returns/mean: 2.1356968879699707
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2855it [3:28:29,  3.05s/it]

objective/kl: -102.07037353515625
ppo/returns/mean: 2.1285836696624756
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2856it [3:28:32,  3.05s/it]

objective/kl: -102.62759399414062
ppo/returns/mean: 2.127567768096924
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2857it [3:28:35,  3.04s/it]

objective/kl: -101.17372131347656
ppo/returns/mean: 2.05607533454895
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2858it [3:28:39,  3.13s/it]

objective/kl: -103.61576080322266
ppo/returns/mean: 2.1068367958068848
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


2859it [3:28:42,  3.10s/it]

objective/kl: -102.191650390625
ppo/returns/mean: 2.0754542350769043
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2860it [3:28:45,  3.07s/it]

objective/kl: -100.11138916015625
ppo/returns/mean: 2.025692939758301
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2861it [3:28:48,  3.06s/it]

objective/kl: -101.67179107666016
ppo/returns/mean: 2.021458625793457
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2862it [3:28:51,  3.04s/it]

objective/kl: -100.18348693847656
ppo/returns/mean: 1.977712869644165
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2863it [3:28:54,  3.03s/it]

objective/kl: -102.55633544921875
ppo/returns/mean: 2.000173330307007
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2864it [3:28:57,  3.04s/it]

objective/kl: -102.80792999267578
ppo/returns/mean: 1.9937533140182495
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2865it [3:29:00,  3.04s/it]

objective/kl: -103.23859405517578
ppo/returns/mean: 2.010378360748291
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2866it [3:29:03,  3.03s/it]

objective/kl: -102.43643188476562
ppo/returns/mean: 1.9765934944152832
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2867it [3:29:06,  3.02s/it]

objective/kl: -104.249267578125
ppo/returns/mean: 2.019063949584961
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2868it [3:29:10,  3.27s/it]

objective/kl: -103.94551086425781
ppo/returns/mean: 2.0078024864196777
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2869it [3:29:13,  3.26s/it]

objective/kl: -102.08261108398438
ppo/returns/mean: 1.9484583139419556
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2870it [3:29:16,  3.19s/it]

objective/kl: -103.25459289550781
ppo/returns/mean: 1.974051833152771
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2871it [3:29:19,  3.15s/it]

objective/kl: -103.0174560546875
ppo/returns/mean: 1.9754507541656494
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2872it [3:29:22,  3.11s/it]

objective/kl: -109.77185821533203
ppo/returns/mean: 2.1365444660186768
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2873it [3:29:25,  3.09s/it]

objective/kl: -105.0419921875
ppo/returns/mean: 2.026668071746826
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2874it [3:29:28,  3.14s/it]

objective/kl: -107.4488754272461
ppo/returns/mean: 2.084712505340576
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2875it [3:29:31,  3.12s/it]

objective/kl: -109.99505615234375
ppo/returns/mean: 2.169518232345581
ppo/policy/advantages_mean: -1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


2876it [3:29:34,  3.08s/it]

objective/kl: -110.9505615234375
ppo/returns/mean: 2.214265823364258
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2877it [3:29:37,  3.07s/it]

objective/kl: -108.38365936279297
ppo/returns/mean: 2.1568188667297363
ppo/policy/advantages_mean: 1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


2878it [3:29:40,  3.05s/it]

objective/kl: -105.98477172851562
ppo/returns/mean: 2.1086065769195557
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2879it [3:29:43,  3.04s/it]

objective/kl: -106.39754486083984
ppo/returns/mean: 2.135012149810791
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2880it [3:29:46,  3.04s/it]

objective/kl: -104.553466796875
ppo/returns/mean: 2.0959572792053223
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2881it [3:29:50,  3.06s/it]

objective/kl: -104.34357452392578
ppo/returns/mean: 2.071370840072632
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2882it [3:29:53,  3.06s/it]

objective/kl: -99.80091857910156
ppo/returns/mean: 1.9662575721740723
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2883it [3:29:56,  3.15s/it]

objective/kl: -101.60166931152344
ppo/returns/mean: 2.0168020725250244
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2884it [3:29:59,  3.11s/it]

objective/kl: -100.95903015136719
ppo/returns/mean: 1.9841172695159912
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2885it [3:30:02,  3.09s/it]

objective/kl: -99.39616394042969
ppo/returns/mean: 1.9477102756500244
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2886it [3:30:05,  3.07s/it]

objective/kl: -97.84754943847656
ppo/returns/mean: 1.9254193305969238
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2887it [3:30:08,  3.05s/it]

objective/kl: -92.31036376953125
ppo/returns/mean: 1.755685806274414
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2888it [3:30:11,  3.05s/it]

objective/kl: -96.20953369140625
ppo/returns/mean: 1.8803558349609375
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2889it [3:30:14,  3.04s/it]

objective/kl: -97.61927795410156
ppo/returns/mean: 1.8743727207183838
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2890it [3:30:17,  3.03s/it]

objective/kl: -96.10352325439453
ppo/returns/mean: 1.882110357284546
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2891it [3:30:20,  3.04s/it]

objective/kl: -94.269287109375
ppo/returns/mean: 1.910278558731079
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2892it [3:30:23,  3.03s/it]

objective/kl: -94.614990234375
ppo/returns/mean: 1.8057756423950195
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2893it [3:30:26,  3.04s/it]

objective/kl: -97.15365600585938
ppo/returns/mean: 1.927950143814087
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2894it [3:30:30,  3.22s/it]

objective/kl: -96.33944702148438
ppo/returns/mean: 1.8962199687957764
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2895it [3:30:33,  3.18s/it]

objective/kl: -93.6224365234375
ppo/returns/mean: 1.900909662246704
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2896it [3:30:36,  3.14s/it]

objective/kl: -96.62847137451172
ppo/returns/mean: 1.9356921911239624
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2897it [3:30:39,  3.11s/it]

objective/kl: -92.21025848388672
ppo/returns/mean: 1.8986406326293945
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2898it [3:30:42,  3.13s/it]

objective/kl: -97.6116714477539
ppo/returns/mean: 1.9399709701538086
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2899it [3:30:45,  3.10s/it]

objective/kl: -94.92387390136719
ppo/returns/mean: 1.9776933193206787
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2900it [3:30:48,  3.08s/it]

objective/kl: -96.53346252441406
ppo/returns/mean: 1.9154998064041138
ppo/policy/advantages_mean: -3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


2901it [3:30:52,  3.11s/it]

objective/kl: -100.88410949707031
ppo/returns/mean: 1.9862074851989746
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2902it [3:30:55,  3.11s/it]

objective/kl: -104.0088119506836
ppo/returns/mean: 2.0857553482055664
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2903it [3:30:58,  3.08s/it]

objective/kl: -96.21121978759766
ppo/returns/mean: 1.928524136543274
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2904it [3:31:01,  3.06s/it]

objective/kl: -93.73310852050781
ppo/returns/mean: 1.9155638217926025
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2905it [3:31:04,  3.05s/it]

objective/kl: -93.10269927978516
ppo/returns/mean: 1.9470329284667969
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2906it [3:31:07,  3.05s/it]

objective/kl: -87.15799713134766
ppo/returns/mean: 1.8787729740142822
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2907it [3:31:10,  3.06s/it]

objective/kl: -86.41987609863281
ppo/returns/mean: 1.8944809436798096
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2908it [3:31:13,  3.06s/it]

objective/kl: -83.47393798828125
ppo/returns/mean: 1.869637370109558
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2909it [3:31:16,  3.05s/it]

objective/kl: -89.14567565917969
ppo/returns/mean: 2.009915828704834
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2910it [3:31:19,  3.05s/it]

objective/kl: -88.53583526611328
ppo/returns/mean: 2.010517120361328
ppo/policy/advantages_mean: -2.421438694000244e-08
---------------------------------------------------------------------------------------------------


2911it [3:31:22,  3.04s/it]

objective/kl: -86.26468658447266
ppo/returns/mean: 1.9621338844299316
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


2912it [3:31:25,  3.08s/it]

objective/kl: -83.30995178222656
ppo/returns/mean: 1.9096494913101196
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2913it [3:31:28,  3.14s/it]

objective/kl: -83.1041259765625
ppo/returns/mean: 1.9448298215866089
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2914it [3:31:32,  3.37s/it]

objective/kl: -86.0723876953125
ppo/returns/mean: 1.9963024854660034
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


2915it [3:31:35,  3.27s/it]

objective/kl: -90.63650512695312
ppo/returns/mean: 2.050278663635254
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2916it [3:31:38,  3.19s/it]

objective/kl: -89.14704895019531
ppo/returns/mean: 2.025498628616333
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2917it [3:31:41,  3.15s/it]

objective/kl: -90.09185791015625
ppo/returns/mean: 2.0485048294067383
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2918it [3:31:45,  3.18s/it]

objective/kl: -89.67904663085938
ppo/returns/mean: 2.057894706726074
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2919it [3:31:48,  3.14s/it]

objective/kl: -90.41575622558594
ppo/returns/mean: 2.0713038444519043
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2920it [3:31:51,  3.11s/it]

objective/kl: -87.17029571533203
ppo/returns/mean: 2.04473876953125
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2921it [3:31:54,  3.07s/it]

objective/kl: -88.18524932861328
ppo/returns/mean: 2.038681745529175
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


2922it [3:31:57,  3.07s/it]

objective/kl: -89.06639099121094
ppo/returns/mean: 2.019862174987793
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


2923it [3:32:00,  3.14s/it]

objective/kl: -88.54804992675781
ppo/returns/mean: 2.0418877601623535
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2924it [3:32:03,  3.12s/it]

objective/kl: -87.6228256225586
ppo/returns/mean: 2.0096659660339355
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2925it [3:32:06,  3.09s/it]

objective/kl: -86.20509338378906
ppo/returns/mean: 1.9501910209655762
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2926it [3:32:09,  3.07s/it]

objective/kl: -90.60289764404297
ppo/returns/mean: 2.0306949615478516
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


2927it [3:32:12,  3.06s/it]

objective/kl: -91.54872131347656
ppo/returns/mean: 2.0382187366485596
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2928it [3:32:16,  3.18s/it]

objective/kl: -95.27845764160156
ppo/returns/mean: 2.092721462249756
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2929it [3:32:19,  3.16s/it]

objective/kl: -94.43301391601562
ppo/returns/mean: 2.1052680015563965
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


2930it [3:32:22,  3.12s/it]

objective/kl: -98.0888671875
ppo/returns/mean: 2.1072630882263184
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


2931it [3:32:25,  3.23s/it]

objective/kl: -91.83572387695312
ppo/returns/mean: 2.018167018890381
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2932it [3:32:29,  3.24s/it]

objective/kl: -92.72557067871094
ppo/returns/mean: 2.017274856567383
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2933it [3:32:32,  3.17s/it]

objective/kl: -103.67594909667969
ppo/returns/mean: 2.1646242141723633
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2934it [3:32:35,  3.12s/it]

objective/kl: -107.92838287353516
ppo/returns/mean: 2.230111837387085
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2935it [3:32:38,  3.10s/it]

objective/kl: -107.35414123535156
ppo/returns/mean: 2.2220516204833984
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2936it [3:32:41,  3.08s/it]

objective/kl: -105.86869049072266
ppo/returns/mean: 2.187039613723755
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2937it [3:32:44,  3.06s/it]

objective/kl: -106.86581420898438
ppo/returns/mean: 2.218215227127075
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2938it [3:32:47,  3.04s/it]

objective/kl: -108.78548431396484
ppo/returns/mean: 2.210503339767456
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2939it [3:32:50,  3.03s/it]

objective/kl: -108.95170593261719
ppo/returns/mean: 2.1706604957580566
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2940it [3:32:53,  3.02s/it]

objective/kl: -109.71737670898438
ppo/returns/mean: 2.1384778022766113
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


2941it [3:32:56,  3.02s/it]

objective/kl: -101.01234436035156
ppo/returns/mean: 1.7885334491729736
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2942it [3:32:59,  3.03s/it]

objective/kl: -85.63423156738281
ppo/returns/mean: 1.2331221103668213
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2943it [3:33:02,  3.04s/it]

objective/kl: -64.342041015625
ppo/returns/mean: 0.6543248891830444
ppo/policy/advantages_mean: 5.587935447692871e-08
---------------------------------------------------------------------------------------------------


2944it [3:33:05,  3.10s/it]

objective/kl: -31.375701904296875
ppo/returns/mean: -0.148411825299263
ppo/policy/advantages_mean: 0.00026868656277656555
---------------------------------------------------------------------------------------------------


2945it [3:33:09,  3.33s/it]

objective/kl: -9.037675857543945
ppo/returns/mean: -0.7921074628829956
ppo/policy/advantages_mean: 0.004101458936929703
---------------------------------------------------------------------------------------------------


2946it [3:33:12,  3.36s/it]

objective/kl: -19.074291229248047
ppo/returns/mean: -0.3739989399909973
ppo/policy/advantages_mean: 0.0010326821357011795
---------------------------------------------------------------------------------------------------


2947it [3:33:17,  3.63s/it]

objective/kl: -18.960994720458984
ppo/returns/mean: -0.4691855311393738
ppo/policy/advantages_mean: 0.0027109477669000626
---------------------------------------------------------------------------------------------------


2948it [3:33:20,  3.68s/it]

objective/kl: -27.78375816345215
ppo/returns/mean: -0.3175808787345886
ppo/policy/advantages_mean: -0.0011840350925922394
---------------------------------------------------------------------------------------------------


2949it [3:33:24,  3.56s/it]

objective/kl: -29.38915252685547
ppo/returns/mean: -0.28796789050102234
ppo/policy/advantages_mean: -0.0010374011471867561
---------------------------------------------------------------------------------------------------


2950it [3:33:27,  3.48s/it]

objective/kl: -25.836532592773438
ppo/returns/mean: -0.2883288264274597
ppo/policy/advantages_mean: -0.00026174820959568024
---------------------------------------------------------------------------------------------------


2951it [3:33:30,  3.43s/it]

objective/kl: -31.70206642150879
ppo/returns/mean: -0.22265736758708954
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2952it [3:33:34,  3.40s/it]

objective/kl: -41.430938720703125
ppo/returns/mean: -0.09467953443527222
ppo/policy/advantages_mean: -2.0876526832580566e-05
---------------------------------------------------------------------------------------------------


2953it [3:33:37,  3.35s/it]

objective/kl: -63.18508529663086
ppo/returns/mean: 0.3892839848995209
ppo/policy/advantages_mean: 0.004934653639793396
---------------------------------------------------------------------------------------------------


2954it [3:33:40,  3.26s/it]

objective/kl: -66.72538757324219
ppo/returns/mean: 0.6108307838439941
ppo/policy/advantages_mean: 0.0016750432550907135
---------------------------------------------------------------------------------------------------


2955it [3:33:43,  3.35s/it]

objective/kl: -70.28160858154297
ppo/returns/mean: 0.8936066031455994
ppo/policy/advantages_mean: 7.916241884231567e-08
---------------------------------------------------------------------------------------------------


2956it [3:33:47,  3.29s/it]

objective/kl: -73.45503234863281
ppo/returns/mean: 1.1207075119018555
ppo/policy/advantages_mean: 1.210719347000122e-08
---------------------------------------------------------------------------------------------------


2957it [3:33:50,  3.21s/it]

objective/kl: -57.70061492919922
ppo/returns/mean: 0.928568959236145
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


2958it [3:33:53,  3.15s/it]

objective/kl: -51.98278045654297
ppo/returns/mean: 1.012986421585083
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2959it [3:33:56,  3.12s/it]

objective/kl: -31.852405548095703
ppo/returns/mean: 0.7400616407394409
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


2960it [3:33:59,  3.09s/it]

objective/kl: -10.411853790283203
ppo/returns/mean: 0.36543408036231995
ppo/policy/advantages_mean: -3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


2961it [3:34:02,  3.08s/it]

objective/kl: 14.269560813903809
ppo/returns/mean: -0.14979669451713562
ppo/policy/advantages_mean: -1.73225998878479e-07
---------------------------------------------------------------------------------------------------


2962it [3:34:06,  3.32s/it]

objective/kl: 29.1405029296875
ppo/returns/mean: -0.5282959938049316
ppo/policy/advantages_mean: -1.862645149230957e-07
---------------------------------------------------------------------------------------------------


2963it [3:34:09,  3.23s/it]

objective/kl: 27.48271369934082
ppo/returns/mean: -0.5461362600326538
ppo/policy/advantages_mean: -6.395392119884491e-05
---------------------------------------------------------------------------------------------------


2964it [3:34:12,  3.19s/it]

objective/kl: 22.343170166015625
ppo/returns/mean: -0.5362387895584106
ppo/policy/advantages_mean: 0.0032639428973197937
---------------------------------------------------------------------------------------------------


2965it [3:34:15,  3.30s/it]

objective/kl: 20.46955680847168
ppo/returns/mean: -0.5339115858078003
ppo/policy/advantages_mean: 0.00023276731371879578
---------------------------------------------------------------------------------------------------


2966it [3:34:18,  3.23s/it]

objective/kl: 11.463987350463867
ppo/returns/mean: -0.429171621799469
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2967it [3:34:21,  3.17s/it]

objective/kl: 5.189970016479492
ppo/returns/mean: -0.3888319730758667
ppo/policy/advantages_mean: -0.0004896894097328186
---------------------------------------------------------------------------------------------------


2968it [3:34:25,  3.19s/it]

objective/kl: -7.240407466888428
ppo/returns/mean: -0.35976582765579224
ppo/policy/advantages_mean: -0.00021795742213726044
---------------------------------------------------------------------------------------------------


2969it [3:34:28,  3.21s/it]

objective/kl: -9.469380378723145
ppo/returns/mean: -0.302558571100235
ppo/policy/advantages_mean: 0.002096856012940407
---------------------------------------------------------------------------------------------------


2970it [3:34:31,  3.20s/it]

objective/kl: -13.339292526245117
ppo/returns/mean: -0.2120204120874405
ppo/policy/advantages_mean: -0.0028733480721712112
---------------------------------------------------------------------------------------------------


2971it [3:34:34,  3.18s/it]

objective/kl: -13.871706008911133
ppo/returns/mean: -0.1376483142375946
ppo/policy/advantages_mean: 0.0006227642297744751
---------------------------------------------------------------------------------------------------


2972it [3:34:37,  3.13s/it]

objective/kl: -16.572097778320312
ppo/returns/mean: -0.07554072141647339
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


2973it [3:34:41,  3.40s/it]

objective/kl: -28.463638305664062
ppo/returns/mean: 0.0909532755613327
ppo/policy/advantages_mean: 2.1720188669860363e-05
---------------------------------------------------------------------------------------------------


2974it [3:34:44,  3.32s/it]

objective/kl: -37.29772186279297
ppo/returns/mean: 0.3124333322048187
ppo/policy/advantages_mean: -0.0001920461654663086
---------------------------------------------------------------------------------------------------


2975it [3:34:47,  3.23s/it]

objective/kl: -46.563087463378906
ppo/returns/mean: 0.5825384855270386
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


2976it [3:34:50,  3.16s/it]

objective/kl: -39.387962341308594
ppo/returns/mean: 0.524674654006958
ppo/policy/advantages_mean: 3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


2977it [3:34:53,  3.12s/it]

objective/kl: -18.395048141479492
ppo/returns/mean: 0.17961855232715607
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


2978it [3:34:57,  3.13s/it]

objective/kl: -19.414424896240234
ppo/returns/mean: 0.09476149082183838
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2979it [3:35:00,  3.33s/it]

objective/kl: -24.78565216064453
ppo/returns/mean: 0.16326436400413513
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2980it [3:35:03,  3.25s/it]

objective/kl: -29.99408531188965
ppo/returns/mean: 0.27609682083129883
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


2981it [3:35:07,  3.20s/it]

objective/kl: -37.88727569580078
ppo/returns/mean: 0.49857741594314575
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


2982it [3:35:10,  3.14s/it]

objective/kl: -46.06776428222656
ppo/returns/mean: 0.7577616572380066
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


2983it [3:35:13,  3.17s/it]

objective/kl: -44.077964782714844
ppo/returns/mean: 0.7670881748199463
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


2984it [3:35:16,  3.28s/it]

objective/kl: -40.65704345703125
ppo/returns/mean: 0.7871853709220886
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


2985it [3:35:19,  3.20s/it]

objective/kl: -38.1656494140625
ppo/returns/mean: 0.794249415397644
ppo/policy/advantages_mean: 1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


2986it [3:35:22,  3.15s/it]

objective/kl: -35.653079986572266
ppo/returns/mean: 0.8044285178184509
ppo/policy/advantages_mean: 3.166496753692627e-08
---------------------------------------------------------------------------------------------------


2987it [3:35:25,  3.11s/it]

objective/kl: -32.17959976196289
ppo/returns/mean: 0.8033876419067383
ppo/policy/advantages_mean: 0.0005359714850783348
---------------------------------------------------------------------------------------------------


2988it [3:35:28,  3.09s/it]

objective/kl: -30.635841369628906
ppo/returns/mean: 0.8351554870605469
ppo/policy/advantages_mean: 0.0002012215554714203
---------------------------------------------------------------------------------------------------


2989it [3:35:31,  3.07s/it]

objective/kl: -29.034353256225586
ppo/returns/mean: 0.8399960398674011
ppo/policy/advantages_mean: -0.00038973893970251083
---------------------------------------------------------------------------------------------------


2990it [3:35:35,  3.26s/it]

objective/kl: -29.368114471435547
ppo/returns/mean: 0.8854262828826904
ppo/policy/advantages_mean: 0.0016142353415489197
---------------------------------------------------------------------------------------------------


2991it [3:35:39,  3.33s/it]

objective/kl: -18.497556686401367
ppo/returns/mean: 0.4980318248271942
ppo/policy/advantages_mean: 0.0019800979644060135
---------------------------------------------------------------------------------------------------


2992it [3:35:42,  3.28s/it]

objective/kl: -22.175357818603516
ppo/returns/mean: 0.6598109006881714
ppo/policy/advantages_mean: 0.0010141171514987946
---------------------------------------------------------------------------------------------------


2993it [3:35:45,  3.21s/it]

objective/kl: -27.64964485168457
ppo/returns/mean: 0.9447546601295471
ppo/policy/advantages_mean: 0.0030087679624557495
---------------------------------------------------------------------------------------------------


2994it [3:35:49,  3.42s/it]

objective/kl: -28.787385940551758
ppo/returns/mean: 0.990790843963623
ppo/policy/advantages_mean: 8.381903171539307e-08
---------------------------------------------------------------------------------------------------


2995it [3:35:52,  3.38s/it]

objective/kl: -28.480619430541992
ppo/returns/mean: 0.9901177287101746
ppo/policy/advantages_mean: -4.842877388000488e-08
---------------------------------------------------------------------------------------------------


2996it [3:35:55,  3.28s/it]

objective/kl: -30.725847244262695
ppo/returns/mean: 1.0367828607559204
ppo/policy/advantages_mean: -1.043081283569336e-07
---------------------------------------------------------------------------------------------------


2997it [3:35:58,  3.20s/it]

objective/kl: -29.6816349029541
ppo/returns/mean: 0.9853075742721558
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


2998it [3:36:01,  3.15s/it]

objective/kl: -29.206472396850586
ppo/returns/mean: 0.9910483360290527
ppo/policy/advantages_mean: 2.7008354663848877e-08
---------------------------------------------------------------------------------------------------


2999it [3:36:04,  3.11s/it]

objective/kl: -32.26416778564453
ppo/returns/mean: 1.0153577327728271
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3000it [3:36:07,  3.10s/it]

objective/kl: -41.67158889770508
ppo/returns/mean: 1.1674582958221436
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3001it [3:36:11,  3.31s/it]

objective/kl: -43.617515563964844
ppo/returns/mean: 1.0753422975540161
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3002it [3:36:14,  3.24s/it]

objective/kl: -45.65265655517578
ppo/returns/mean: 1.2216079235076904
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


3003it [3:36:17,  3.17s/it]

objective/kl: -60.05766296386719
ppo/returns/mean: 1.3568772077560425
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3004it [3:36:20,  3.14s/it]

objective/kl: -64.48220825195312
ppo/returns/mean: 1.3962715864181519
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3005it [3:36:23,  3.10s/it]

objective/kl: -50.598411560058594
ppo/returns/mean: 0.7927999496459961
ppo/policy/advantages_mean: 4.470348358154297e-08
---------------------------------------------------------------------------------------------------


3006it [3:36:26,  3.08s/it]

objective/kl: -70.727294921875
ppo/returns/mean: 1.4242002964019775
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3007it [3:36:29,  3.08s/it]

objective/kl: -50.49971008300781
ppo/returns/mean: 1.3053317070007324
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3008it [3:36:32,  3.07s/it]

objective/kl: -42.919837951660156
ppo/returns/mean: 1.261519193649292
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3009it [3:36:36,  3.08s/it]

objective/kl: -38.92376708984375
ppo/returns/mean: 1.2314010858535767
ppo/policy/advantages_mean: 4.0046870708465576e-08
---------------------------------------------------------------------------------------------------


3010it [3:36:39,  3.07s/it]

objective/kl: -31.003734588623047
ppo/returns/mean: 1.0037362575531006
ppo/policy/advantages_mean: -0.001552574336528778
---------------------------------------------------------------------------------------------------


3011it [3:36:42,  3.06s/it]

objective/kl: -21.617294311523438
ppo/returns/mean: 0.829670786857605
ppo/policy/advantages_mean: -2.421438694000244e-08
---------------------------------------------------------------------------------------------------


3012it [3:36:45,  3.05s/it]

objective/kl: 24.40254020690918
ppo/returns/mean: -0.3636154234409332
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3013it [3:36:48,  3.04s/it]

objective/kl: 6.871761798858643
ppo/returns/mean: 0.04428964480757713
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3014it [3:36:51,  3.11s/it]

objective/kl: -16.860408782958984
ppo/returns/mean: 0.6165330410003662
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3015it [3:36:54,  3.16s/it]

objective/kl: -28.17535400390625
ppo/returns/mean: 0.9471233487129211
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3016it [3:36:57,  3.12s/it]

objective/kl: -31.491363525390625
ppo/returns/mean: 1.004225492477417
ppo/policy/advantages_mean: 0.0004131235182285309
---------------------------------------------------------------------------------------------------


3017it [3:37:00,  3.11s/it]

objective/kl: -34.580406188964844
ppo/returns/mean: 1.076258659362793
ppo/policy/advantages_mean: -0.0009327288717031479
---------------------------------------------------------------------------------------------------


3018it [3:37:03,  3.09s/it]

objective/kl: -36.29817199707031
ppo/returns/mean: 1.1303046941757202
ppo/policy/advantages_mean: -1.7695128917694092e-08
---------------------------------------------------------------------------------------------------


3019it [3:37:07,  3.18s/it]

objective/kl: -40.84086227416992
ppo/returns/mean: 1.2210571765899658
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3020it [3:37:10,  3.15s/it]

objective/kl: -44.4671630859375
ppo/returns/mean: 1.2880980968475342
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3021it [3:37:13,  3.12s/it]

objective/kl: -46.366455078125
ppo/returns/mean: 1.3218965530395508
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3022it [3:37:16,  3.22s/it]

objective/kl: -48.6925048828125
ppo/returns/mean: 1.3677642345428467
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3023it [3:37:20,  3.36s/it]

objective/kl: -49.033226013183594
ppo/returns/mean: 1.4244718551635742
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3024it [3:37:23,  3.25s/it]

objective/kl: -50.88844299316406
ppo/returns/mean: 1.441803216934204
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


3025it [3:37:26,  3.28s/it]

objective/kl: -48.17823028564453
ppo/returns/mean: 1.3664346933364868
ppo/policy/advantages_mean: -0.0019851811230182648
---------------------------------------------------------------------------------------------------


3026it [3:37:29,  3.19s/it]

objective/kl: -46.24327087402344
ppo/returns/mean: 1.3462517261505127
ppo/policy/advantages_mean: -2.3283064365386963e-08
---------------------------------------------------------------------------------------------------


3027it [3:37:32,  3.14s/it]

objective/kl: -49.276065826416016
ppo/returns/mean: 1.4406917095184326
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3028it [3:37:35,  3.10s/it]

objective/kl: -49.00608825683594
ppo/returns/mean: 1.3826029300689697
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3029it [3:37:39,  3.32s/it]

objective/kl: -47.755611419677734
ppo/returns/mean: 1.38327956199646
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3030it [3:37:42,  3.22s/it]

objective/kl: -49.51240921020508
ppo/returns/mean: 1.42018723487854
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3031it [3:37:45,  3.16s/it]

objective/kl: -52.480628967285156
ppo/returns/mean: 1.4741241931915283
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3032it [3:37:48,  3.11s/it]

objective/kl: -56.23377990722656
ppo/returns/mean: 1.5472371578216553
ppo/policy/advantages_mean: 2.7008354663848877e-08
---------------------------------------------------------------------------------------------------


3033it [3:37:51,  3.15s/it]

objective/kl: -50.16435623168945
ppo/returns/mean: 1.3903496265411377
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3034it [3:37:54,  3.12s/it]

objective/kl: -53.84012222290039
ppo/returns/mean: 1.4661341905593872
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3035it [3:37:57,  3.08s/it]

objective/kl: -53.4039421081543
ppo/returns/mean: 1.485416293144226
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3036it [3:38:01,  3.20s/it]

objective/kl: -55.954811096191406
ppo/returns/mean: 1.5189251899719238
ppo/policy/advantages_mean: -2.1420419216156006e-08
---------------------------------------------------------------------------------------------------


3037it [3:38:04,  3.17s/it]

objective/kl: -57.00543975830078
ppo/returns/mean: 1.512337327003479
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3038it [3:38:07,  3.13s/it]

objective/kl: -66.35418701171875
ppo/returns/mean: 1.6706568002700806
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3039it [3:38:10,  3.10s/it]

objective/kl: -74.171875
ppo/returns/mean: 1.8382716178894043
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3040it [3:38:13,  3.12s/it]

objective/kl: -75.90403747558594
ppo/returns/mean: 1.874841570854187
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3041it [3:38:16,  3.10s/it]

objective/kl: -74.43814086914062
ppo/returns/mean: 1.8714286088943481
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3042it [3:38:20,  3.18s/it]

objective/kl: -76.04411315917969
ppo/returns/mean: 1.88252854347229
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


3043it [3:38:23,  3.13s/it]

objective/kl: -80.62547302246094
ppo/returns/mean: 1.982086181640625
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3044it [3:38:26,  3.10s/it]

objective/kl: -79.24760437011719
ppo/returns/mean: 1.9637782573699951
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3045it [3:38:29,  3.10s/it]

objective/kl: -81.37786865234375
ppo/returns/mean: 1.9848943948745728
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3046it [3:38:32,  3.07s/it]

objective/kl: -85.34528350830078
ppo/returns/mean: 2.0893540382385254
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3047it [3:38:35,  3.05s/it]

objective/kl: -81.76249694824219
ppo/returns/mean: 2.0061304569244385
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3048it [3:38:38,  3.06s/it]

objective/kl: -82.13218688964844
ppo/returns/mean: 2.0024752616882324
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3049it [3:38:42,  3.25s/it]

objective/kl: -79.6329345703125
ppo/returns/mean: 1.913548469543457
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3050it [3:38:45,  3.19s/it]

objective/kl: -80.38420867919922
ppo/returns/mean: 1.9741085767745972
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3051it [3:38:48,  3.26s/it]

objective/kl: -84.2420654296875
ppo/returns/mean: 2.0497326850891113
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3052it [3:38:51,  3.19s/it]

objective/kl: -82.61868286132812
ppo/returns/mean: 2.0051722526550293
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3053it [3:38:54,  3.14s/it]

objective/kl: -85.6141128540039
ppo/returns/mean: 2.071983814239502
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3054it [3:38:57,  3.11s/it]

objective/kl: -84.73126220703125
ppo/returns/mean: 2.0513575077056885
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3055it [3:39:01,  3.21s/it]

objective/kl: -83.62458801269531
ppo/returns/mean: 2.041922092437744
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3056it [3:39:04,  3.16s/it]

objective/kl: -82.46607971191406
ppo/returns/mean: 2.0225257873535156
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3057it [3:39:07,  3.11s/it]

objective/kl: -80.92970275878906
ppo/returns/mean: 1.9933505058288574
ppo/policy/advantages_mean: 7.82310962677002e-08
---------------------------------------------------------------------------------------------------


3058it [3:39:10,  3.08s/it]

objective/kl: -78.14798736572266
ppo/returns/mean: 1.9344794750213623
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3059it [3:39:13,  3.07s/it]

objective/kl: -77.22731018066406
ppo/returns/mean: 1.9094938039779663
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3060it [3:39:16,  3.04s/it]

objective/kl: -77.08290100097656
ppo/returns/mean: 1.8941763639450073
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3061it [3:39:19,  3.05s/it]

objective/kl: -81.18096923828125
ppo/returns/mean: 1.956566572189331
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3062it [3:39:22,  3.03s/it]

objective/kl: -78.5684585571289
ppo/returns/mean: 1.9152088165283203
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3063it [3:39:25,  3.02s/it]

objective/kl: -78.77084350585938
ppo/returns/mean: 1.8848578929901123
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3064it [3:39:28,  3.02s/it]

objective/kl: -81.58361053466797
ppo/returns/mean: 1.9716622829437256
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3065it [3:39:31,  3.12s/it]

objective/kl: -80.51970672607422
ppo/returns/mean: 1.950822353363037
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3066it [3:39:35,  3.21s/it]

objective/kl: -81.78193664550781
ppo/returns/mean: 1.9635502099990845
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3067it [3:39:38,  3.16s/it]

objective/kl: -82.90448760986328
ppo/returns/mean: 1.9806243181228638
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3068it [3:39:41,  3.12s/it]

objective/kl: -83.60887145996094
ppo/returns/mean: 1.9852757453918457
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3069it [3:39:44,  3.07s/it]

objective/kl: -82.74567413330078
ppo/returns/mean: 1.9696149826049805
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3070it [3:39:47,  3.04s/it]

objective/kl: -82.59733581542969
ppo/returns/mean: 1.975576400756836
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3071it [3:39:50,  3.11s/it]

objective/kl: -82.15370178222656
ppo/returns/mean: 1.9622726440429688
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3072it [3:39:53,  3.16s/it]

objective/kl: -82.20722198486328
ppo/returns/mean: 1.9606789350509644
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3073it [3:39:57,  3.27s/it]

objective/kl: -82.18412017822266
ppo/returns/mean: 1.9513074159622192
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3074it [3:40:00,  3.20s/it]

objective/kl: -82.52055358886719
ppo/returns/mean: 1.957089900970459
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3075it [3:40:03,  3.14s/it]

objective/kl: -82.95207977294922
ppo/returns/mean: 1.9894282817840576
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3076it [3:40:06,  3.12s/it]

objective/kl: -81.94735717773438
ppo/returns/mean: 1.9675889015197754
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3077it [3:40:09,  3.08s/it]

objective/kl: -83.74696350097656
ppo/returns/mean: 1.9968764781951904
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3078it [3:40:12,  3.05s/it]

objective/kl: -86.3623275756836
ppo/returns/mean: 2.0466771125793457
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3079it [3:40:15,  3.04s/it]

objective/kl: -86.95797729492188
ppo/returns/mean: 2.068080425262451
ppo/policy/advantages_mean: 1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


3080it [3:40:18,  3.02s/it]

objective/kl: -86.9626235961914
ppo/returns/mean: 2.066272258758545
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3081it [3:40:21,  3.01s/it]

objective/kl: -87.09526062011719
ppo/returns/mean: 2.0720467567443848
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3082it [3:40:24,  3.20s/it]

objective/kl: -83.38842010498047
ppo/returns/mean: 1.9750030040740967
ppo/policy/advantages_mean: -3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


3083it [3:40:28,  3.21s/it]

objective/kl: -80.0238037109375
ppo/returns/mean: 1.906877875328064
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3084it [3:40:31,  3.15s/it]

objective/kl: -86.9874267578125
ppo/returns/mean: 2.0422801971435547
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


3085it [3:40:34,  3.22s/it]

objective/kl: -82.70111846923828
ppo/returns/mean: 1.9421498775482178
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3086it [3:40:37,  3.16s/it]

objective/kl: -89.11825561523438
ppo/returns/mean: 2.0768325328826904
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3087it [3:40:40,  3.12s/it]

objective/kl: -87.25509643554688
ppo/returns/mean: 2.053276538848877
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3088it [3:40:43,  3.09s/it]

objective/kl: -87.42523193359375
ppo/returns/mean: 2.0476479530334473
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3089it [3:40:46,  3.08s/it]

objective/kl: -87.28227996826172
ppo/returns/mean: 2.048430919647217
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3090it [3:40:49,  3.06s/it]

objective/kl: -87.14545440673828
ppo/returns/mean: 2.0525355339050293
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3091it [3:40:52,  3.06s/it]

objective/kl: -87.72801208496094
ppo/returns/mean: 2.0613198280334473
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3092it [3:40:55,  3.06s/it]

objective/kl: -86.43883514404297
ppo/returns/mean: 2.0396053791046143
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3093it [3:40:58,  3.05s/it]

objective/kl: -86.59931945800781
ppo/returns/mean: 2.0359292030334473
ppo/policy/advantages_mean: 2.561137080192566e-08
---------------------------------------------------------------------------------------------------


3094it [3:41:01,  3.04s/it]

objective/kl: -85.24929809570312
ppo/returns/mean: 2.0075185298919678
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3095it [3:41:04,  3.03s/it]

objective/kl: -86.04830169677734
ppo/returns/mean: 2.0172924995422363
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3096it [3:41:07,  3.03s/it]

objective/kl: -85.30624389648438
ppo/returns/mean: 2.005366325378418
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3097it [3:41:10,  3.06s/it]

objective/kl: -84.33619689941406
ppo/returns/mean: 1.9766919612884521
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3098it [3:41:13,  3.06s/it]

objective/kl: -85.2667236328125
ppo/returns/mean: 2.0024051666259766
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3099it [3:41:16,  3.04s/it]

objective/kl: -87.04143524169922
ppo/returns/mean: 2.031740665435791
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3100it [3:41:20,  3.06s/it]

objective/kl: -85.04177856445312
ppo/returns/mean: 2.0059947967529297
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3101it [3:41:23,  3.05s/it]

objective/kl: -84.6611328125
ppo/returns/mean: 1.9948477745056152
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3102it [3:41:26,  3.20s/it]

objective/kl: -84.26751708984375
ppo/returns/mean: 1.9902400970458984
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3103it [3:41:30,  3.31s/it]

objective/kl: -86.18792724609375
ppo/returns/mean: 2.0197973251342773
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3104it [3:41:33,  3.28s/it]

objective/kl: -84.20655822753906
ppo/returns/mean: 1.9869976043701172
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3105it [3:41:36,  3.19s/it]

objective/kl: -84.16923522949219
ppo/returns/mean: 1.979589581489563
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3106it [3:41:39,  3.13s/it]

objective/kl: -84.3507080078125
ppo/returns/mean: 1.9840463399887085
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3107it [3:41:42,  3.08s/it]

objective/kl: -85.8212661743164
ppo/returns/mean: 2.0109786987304688
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3108it [3:41:45,  3.07s/it]

objective/kl: -85.47587585449219
ppo/returns/mean: 1.998765468597412
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3109it [3:41:48,  3.05s/it]

objective/kl: -84.64469909667969
ppo/returns/mean: 1.98929762840271
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3110it [3:41:51,  3.10s/it]

objective/kl: -86.79560852050781
ppo/returns/mean: 2.025129795074463
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3111it [3:41:54,  3.07s/it]

objective/kl: -87.07901763916016
ppo/returns/mean: 2.0201849937438965
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3112it [3:41:57,  3.05s/it]

objective/kl: -88.92251586914062
ppo/returns/mean: 2.045562505722046
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3113it [3:42:00,  3.03s/it]

objective/kl: -87.44247436523438
ppo/returns/mean: 2.0210094451904297
ppo/policy/advantages_mean: 2.421438694000244e-08
---------------------------------------------------------------------------------------------------


3114it [3:42:03,  3.08s/it]

objective/kl: -88.86805725097656
ppo/returns/mean: 2.0409536361694336
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3115it [3:42:06,  3.06s/it]

objective/kl: -92.28948974609375
ppo/returns/mean: 2.0338096618652344
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3116it [3:42:09,  3.05s/it]

objective/kl: -92.25785827636719
ppo/returns/mean: 2.059701681137085
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3117it [3:42:12,  3.04s/it]

objective/kl: -116.46562194824219
ppo/returns/mean: 2.2579450607299805
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3118it [3:42:15,  3.03s/it]

objective/kl: -106.36148834228516
ppo/returns/mean: 2.1477270126342773
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3119it [3:42:18,  3.02s/it]

objective/kl: -103.00243377685547
ppo/returns/mean: 2.130650520324707
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3120it [3:42:21,  3.01s/it]

objective/kl: -92.28570556640625
ppo/returns/mean: 2.0004525184631348
ppo/policy/advantages_mean: 1.7695128917694092e-08
---------------------------------------------------------------------------------------------------


3121it [3:42:24,  2.99s/it]

objective/kl: -88.07176971435547
ppo/returns/mean: 2.003788471221924
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3122it [3:42:28,  3.14s/it]

objective/kl: -85.98738098144531
ppo/returns/mean: 1.989851951599121
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3123it [3:42:31,  3.10s/it]

objective/kl: -88.59542846679688
ppo/returns/mean: 2.0511250495910645
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3124it [3:42:34,  3.07s/it]

objective/kl: -86.04978942871094
ppo/returns/mean: 2.0092320442199707
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3125it [3:42:37,  3.04s/it]

objective/kl: -88.10075378417969
ppo/returns/mean: 2.044910430908203
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3126it [3:42:40,  3.03s/it]

objective/kl: -85.6551742553711
ppo/returns/mean: 2.0255050659179688
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3127it [3:42:43,  3.02s/it]

objective/kl: -86.238037109375
ppo/returns/mean: 1.9932358264923096
ppo/policy/advantages_mean: 1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


3128it [3:42:46,  3.01s/it]

objective/kl: -84.93875122070312
ppo/returns/mean: 1.9617012739181519
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3129it [3:42:49,  3.01s/it]

objective/kl: -84.55517578125
ppo/returns/mean: 1.9202091693878174
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3130it [3:42:52,  3.01s/it]

objective/kl: -81.23286437988281
ppo/returns/mean: 1.8533389568328857
ppo/policy/advantages_mean: 5.587935447692871e-08
---------------------------------------------------------------------------------------------------


3131it [3:42:55,  3.04s/it]

objective/kl: -81.3568115234375
ppo/returns/mean: 1.8462055921554565
ppo/policy/advantages_mean: -1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


3132it [3:42:58,  3.03s/it]

objective/kl: -80.70895385742188
ppo/returns/mean: 1.835808277130127
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3133it [3:43:01,  3.02s/it]

objective/kl: -77.66165161132812
ppo/returns/mean: 1.8020060062408447
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3134it [3:43:04,  3.11s/it]

objective/kl: -73.15744018554688
ppo/returns/mean: 1.694129228591919
ppo/policy/advantages_mean: -2.421438694000244e-08
---------------------------------------------------------------------------------------------------


3135it [3:43:07,  3.08s/it]

objective/kl: -56.007896423339844
ppo/returns/mean: 1.2813782691955566
ppo/policy/advantages_mean: 5.587935447692871e-08
---------------------------------------------------------------------------------------------------


3136it [3:43:10,  3.05s/it]

objective/kl: -34.796573638916016
ppo/returns/mean: 0.7728397846221924
ppo/policy/advantages_mean: 5.21540641784668e-08
---------------------------------------------------------------------------------------------------


3137it [3:43:13,  3.04s/it]

objective/kl: -27.67257308959961
ppo/returns/mean: 0.5854005813598633
ppo/policy/advantages_mean: -8.940696716308594e-08
---------------------------------------------------------------------------------------------------


3138it [3:43:16,  3.08s/it]

objective/kl: -27.983173370361328
ppo/returns/mean: 0.4910954535007477
ppo/policy/advantages_mean: -0.0011757239699363708
---------------------------------------------------------------------------------------------------


3139it [3:43:20,  3.16s/it]

objective/kl: -22.042335510253906
ppo/returns/mean: 0.26161056756973267
ppo/policy/advantages_mean: -0.0009022243320941925
---------------------------------------------------------------------------------------------------


3140it [3:43:23,  3.17s/it]

objective/kl: -26.486412048339844
ppo/returns/mean: 0.35875627398490906
ppo/policy/advantages_mean: -0.0018832259811460972
---------------------------------------------------------------------------------------------------


3141it [3:43:26,  3.21s/it]

objective/kl: -29.728029251098633
ppo/returns/mean: 0.4239439368247986
ppo/policy/advantages_mean: -0.0016509275883436203
---------------------------------------------------------------------------------------------------


3142it [3:43:30,  3.37s/it]

objective/kl: -29.13236427307129
ppo/returns/mean: 0.3838733732700348
ppo/policy/advantages_mean: 0.0007002605125308037
---------------------------------------------------------------------------------------------------


3143it [3:43:33,  3.32s/it]

objective/kl: -31.161157608032227
ppo/returns/mean: 0.38436201214790344
ppo/policy/advantages_mean: 0.001139424741268158
---------------------------------------------------------------------------------------------------


3144it [3:43:36,  3.27s/it]

objective/kl: -35.61777114868164
ppo/returns/mean: 0.5230978727340698
ppo/policy/advantages_mean: 0.0028891414403915405
---------------------------------------------------------------------------------------------------


3145it [3:43:40,  3.26s/it]

objective/kl: -33.347557067871094
ppo/returns/mean: 0.38540080189704895
ppo/policy/advantages_mean: -0.0014366358518600464
---------------------------------------------------------------------------------------------------


3146it [3:43:43,  3.27s/it]

objective/kl: -36.207786560058594
ppo/returns/mean: 0.4559931755065918
ppo/policy/advantages_mean: 0.007484890520572662
---------------------------------------------------------------------------------------------------


3147it [3:43:46,  3.24s/it]

objective/kl: -37.610069274902344
ppo/returns/mean: 0.5544288754463196
ppo/policy/advantages_mean: 0.0032506678253412247
---------------------------------------------------------------------------------------------------


3148it [3:43:49,  3.23s/it]

objective/kl: -42.245155334472656
ppo/returns/mean: 0.6563649773597717
ppo/policy/advantages_mean: -0.002053234726190567
---------------------------------------------------------------------------------------------------


3149it [3:43:52,  3.17s/it]

objective/kl: -40.590797424316406
ppo/returns/mean: 0.6989531517028809
ppo/policy/advantages_mean: 0.0010206745937466621
---------------------------------------------------------------------------------------------------


3150it [3:43:55,  3.13s/it]

objective/kl: -42.250770568847656
ppo/returns/mean: 0.7487133741378784
ppo/policy/advantages_mean: 0.001892872154712677
---------------------------------------------------------------------------------------------------


3151it [3:43:59,  3.20s/it]

objective/kl: -47.55433654785156
ppo/returns/mean: 0.8799021244049072
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3152it [3:44:02,  3.16s/it]

objective/kl: -52.115806579589844
ppo/returns/mean: 0.9785760641098022
ppo/policy/advantages_mean: -0.00040270015597343445
---------------------------------------------------------------------------------------------------


3153it [3:44:05,  3.11s/it]

objective/kl: -54.473243713378906
ppo/returns/mean: 1.0407602787017822
ppo/policy/advantages_mean: 0.0024619698524475098
---------------------------------------------------------------------------------------------------


3154it [3:44:08,  3.07s/it]

objective/kl: -56.40755081176758
ppo/returns/mean: 1.113694429397583
ppo/policy/advantages_mean: 7.450580596923828e-08
---------------------------------------------------------------------------------------------------


3155it [3:44:11,  3.06s/it]

objective/kl: -55.141929626464844
ppo/returns/mean: 1.1004902124404907
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


3156it [3:44:14,  3.20s/it]

objective/kl: -57.44176483154297
ppo/returns/mean: 1.1433138847351074
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3157it [3:44:17,  3.19s/it]

objective/kl: -56.52030944824219
ppo/returns/mean: 1.1364264488220215
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3158it [3:44:21,  3.28s/it]

objective/kl: -56.509708404541016
ppo/returns/mean: 1.1403943300247192
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3159it [3:44:24,  3.18s/it]

objective/kl: -57.412025451660156
ppo/returns/mean: 1.1619281768798828
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3160it [3:44:27,  3.30s/it]

objective/kl: -57.93360900878906
ppo/returns/mean: 1.169025182723999
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3161it [3:44:30,  3.21s/it]

objective/kl: -59.64579391479492
ppo/returns/mean: 1.203826665878296
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3162it [3:44:33,  3.15s/it]

objective/kl: -58.171199798583984
ppo/returns/mean: 1.1818937063217163
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3163it [3:44:36,  3.10s/it]

objective/kl: -59.61511993408203
ppo/returns/mean: 1.2141008377075195
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3164it [3:44:39,  3.07s/it]

objective/kl: -65.31494140625
ppo/returns/mean: 1.3450396060943604
ppo/policy/advantages_mean: 2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


3165it [3:44:43,  3.18s/it]

objective/kl: -70.80651092529297
ppo/returns/mean: 1.476590633392334
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3166it [3:44:46,  3.13s/it]

objective/kl: -68.43140411376953
ppo/returns/mean: 1.437178611755371
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3167it [3:44:49,  3.09s/it]

objective/kl: -69.36799621582031
ppo/returns/mean: 1.4562827348709106
ppo/policy/advantages_mean: -6.193295121192932e-08
---------------------------------------------------------------------------------------------------


3168it [3:44:52,  3.07s/it]

objective/kl: -70.96551513671875
ppo/returns/mean: 1.4872901439666748
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3169it [3:44:55,  3.05s/it]

objective/kl: -74.32388305664062
ppo/returns/mean: 1.5837156772613525
ppo/policy/advantages_mean: 2.514570951461792e-08
---------------------------------------------------------------------------------------------------


3170it [3:44:58,  3.03s/it]

objective/kl: -75.07194519042969
ppo/returns/mean: 1.6013716459274292
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3171it [3:45:01,  3.14s/it]

objective/kl: -77.10055541992188
ppo/returns/mean: 1.6637229919433594
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3172it [3:45:04,  3.14s/it]

objective/kl: -80.23219299316406
ppo/returns/mean: 1.7417054176330566
ppo/policy/advantages_mean: 1.210719347000122e-08
---------------------------------------------------------------------------------------------------


3173it [3:45:07,  3.12s/it]

objective/kl: -80.58328247070312
ppo/returns/mean: 1.7531633377075195
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3174it [3:45:11,  3.35s/it]

objective/kl: -80.63404846191406
ppo/returns/mean: 1.7600586414337158
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


3175it [3:45:15,  3.30s/it]

objective/kl: -78.92253112792969
ppo/returns/mean: 1.7259540557861328
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3176it [3:45:18,  3.44s/it]

objective/kl: -81.2393798828125
ppo/returns/mean: 1.7748777866363525
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3177it [3:45:21,  3.32s/it]

objective/kl: -74.05599975585938
ppo/returns/mean: 1.6196731328964233
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3178it [3:45:24,  3.24s/it]

objective/kl: -77.89408874511719
ppo/returns/mean: 1.6959493160247803
ppo/policy/advantages_mean: -2.1420419216156006e-08
---------------------------------------------------------------------------------------------------


3179it [3:45:28,  3.22s/it]

objective/kl: -77.086181640625
ppo/returns/mean: 1.6700150966644287
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3180it [3:45:31,  3.20s/it]

objective/kl: -76.90538024902344
ppo/returns/mean: 1.6598637104034424
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3181it [3:45:34,  3.14s/it]

objective/kl: -79.62663269042969
ppo/returns/mean: 1.7165591716766357
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3182it [3:45:37,  3.14s/it]

objective/kl: -79.5989990234375
ppo/returns/mean: 1.7086106538772583
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3183it [3:45:40,  3.09s/it]

objective/kl: -80.493408203125
ppo/returns/mean: 1.7405924797058105
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3184it [3:45:43,  3.06s/it]

objective/kl: -83.17911529541016
ppo/returns/mean: 1.8024970293045044
ppo/policy/advantages_mean: -3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


3185it [3:45:46,  3.05s/it]

objective/kl: -80.40606689453125
ppo/returns/mean: 1.7473974227905273
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3186it [3:45:49,  3.02s/it]

objective/kl: -80.92464447021484
ppo/returns/mean: 1.7784653902053833
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3187it [3:45:52,  3.16s/it]

objective/kl: -80.84468078613281
ppo/returns/mean: 1.7591962814331055
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3188it [3:45:56,  3.30s/it]

objective/kl: -82.07612609863281
ppo/returns/mean: 1.7791012525558472
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3189it [3:45:59,  3.20s/it]

objective/kl: -83.31253814697266
ppo/returns/mean: 1.7962883710861206
ppo/policy/advantages_mean: -4.0978193283081055e-08
---------------------------------------------------------------------------------------------------


3190it [3:46:02,  3.17s/it]

objective/kl: -82.42953491210938
ppo/returns/mean: 1.75992751121521
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3191it [3:46:05,  3.14s/it]

objective/kl: -82.7589111328125
ppo/returns/mean: 1.785120964050293
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3192it [3:46:08,  3.11s/it]

objective/kl: -82.7957763671875
ppo/returns/mean: 1.7887954711914062
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3193it [3:46:11,  3.07s/it]

objective/kl: -82.38458251953125
ppo/returns/mean: 1.7827000617980957
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3194it [3:46:14,  3.05s/it]

objective/kl: -81.77156066894531
ppo/returns/mean: 1.7829499244689941
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3195it [3:46:18,  3.28s/it]

objective/kl: -81.07503509521484
ppo/returns/mean: 1.738860845565796
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3196it [3:46:21,  3.27s/it]

objective/kl: -84.22781372070312
ppo/returns/mean: 1.816702127456665
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3197it [3:46:24,  3.29s/it]

objective/kl: -84.41323852539062
ppo/returns/mean: 1.826770544052124
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3198it [3:46:27,  3.20s/it]

objective/kl: -84.64151763916016
ppo/returns/mean: 1.84197998046875
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3199it [3:46:31,  3.34s/it]

objective/kl: -84.92031860351562
ppo/returns/mean: 1.8439881801605225
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3200it [3:46:35,  3.36s/it]

objective/kl: -85.58684539794922
ppo/returns/mean: 1.8606010675430298
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3201it [3:46:38,  3.25s/it]

objective/kl: -86.81887817382812
ppo/returns/mean: 1.871477484703064
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3202it [3:46:41,  3.28s/it]

objective/kl: -83.62998962402344
ppo/returns/mean: 1.8175127506256104
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3203it [3:46:44,  3.35s/it]

objective/kl: -85.94084930419922
ppo/returns/mean: 1.861156940460205
ppo/policy/advantages_mean: 1.210719347000122e-08
---------------------------------------------------------------------------------------------------


3204it [3:46:47,  3.25s/it]

objective/kl: -84.80358123779297
ppo/returns/mean: 1.8501684665679932
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3205it [3:46:51,  3.31s/it]

objective/kl: -83.3280029296875
ppo/returns/mean: 1.8235775232315063
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3206it [3:46:54,  3.23s/it]

objective/kl: -83.66893005371094
ppo/returns/mean: 1.8353164196014404
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3207it [3:46:57,  3.17s/it]

objective/kl: -83.58112335205078
ppo/returns/mean: 1.8221774101257324
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3208it [3:47:00,  3.20s/it]

objective/kl: -84.392333984375
ppo/returns/mean: 1.8355379104614258
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3209it [3:47:03,  3.15s/it]

objective/kl: -83.08402252197266
ppo/returns/mean: 1.8057283163070679
ppo/policy/advantages_mean: 8.381903171539307e-09
---------------------------------------------------------------------------------------------------


3210it [3:47:06,  3.12s/it]

objective/kl: -83.54638671875
ppo/returns/mean: 1.8062255382537842
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3211it [3:47:09,  3.10s/it]

objective/kl: -84.23097229003906
ppo/returns/mean: 1.8288344144821167
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3212it [3:47:12,  3.09s/it]

objective/kl: -86.1885986328125
ppo/returns/mean: 1.8583260774612427
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3213it [3:47:16,  3.20s/it]

objective/kl: -85.57499694824219
ppo/returns/mean: 1.8463037014007568
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3214it [3:47:19,  3.16s/it]

objective/kl: -86.63526916503906
ppo/returns/mean: 1.8582170009613037
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


3215it [3:47:22,  3.17s/it]

objective/kl: -86.88185119628906
ppo/returns/mean: 1.8688626289367676
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3216it [3:47:25,  3.12s/it]

objective/kl: -88.48588562011719
ppo/returns/mean: 1.8953336477279663
ppo/policy/advantages_mean: 1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


3217it [3:47:28,  3.08s/it]

objective/kl: -88.427978515625
ppo/returns/mean: 1.906542420387268
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3218it [3:47:31,  3.05s/it]

objective/kl: -87.094482421875
ppo/returns/mean: 1.8755217790603638
ppo/policy/advantages_mean: -2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


3219it [3:47:34,  3.09s/it]

objective/kl: -88.38497924804688
ppo/returns/mean: 1.894553780555725
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3220it [3:47:37,  3.07s/it]

objective/kl: -87.75060272216797
ppo/returns/mean: 1.883170485496521
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3221it [3:47:41,  3.28s/it]

objective/kl: -87.80459594726562
ppo/returns/mean: 1.8737260103225708
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3222it [3:47:44,  3.20s/it]

objective/kl: -87.57217407226562
ppo/returns/mean: 1.8738107681274414
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3223it [3:47:47,  3.14s/it]

objective/kl: -88.05865478515625
ppo/returns/mean: 1.8784517049789429
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3224it [3:47:50,  3.09s/it]

objective/kl: -85.94268798828125
ppo/returns/mean: 1.8500220775604248
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3225it [3:47:53,  3.06s/it]

objective/kl: -87.99431610107422
ppo/returns/mean: 1.871621012687683
ppo/policy/advantages_mean: 2.421438694000244e-08
---------------------------------------------------------------------------------------------------


3226it [3:47:56,  3.04s/it]

objective/kl: -88.36097717285156
ppo/returns/mean: 1.8890023231506348
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3227it [3:47:59,  3.09s/it]

objective/kl: -84.7509765625
ppo/returns/mean: 1.8030312061309814
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3228it [3:48:02,  3.06s/it]

objective/kl: -87.68011474609375
ppo/returns/mean: 1.8542228937149048
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3229it [3:48:06,  3.11s/it]

objective/kl: -88.36490631103516
ppo/returns/mean: 1.8724650144577026
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3230it [3:48:08,  3.07s/it]

objective/kl: -85.06759643554688
ppo/returns/mean: 1.8070944547653198
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3231it [3:48:11,  3.06s/it]

objective/kl: -86.02392578125
ppo/returns/mean: 1.8161354064941406
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3232it [3:48:15,  3.05s/it]

objective/kl: -87.97439575195312
ppo/returns/mean: 1.8636858463287354
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


3233it [3:48:18,  3.22s/it]

objective/kl: -86.19587707519531
ppo/returns/mean: 1.8123722076416016
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3234it [3:48:21,  3.15s/it]

objective/kl: -85.26629638671875
ppo/returns/mean: 1.7874162197113037
ppo/policy/advantages_mean: -2.3283064365386963e-08
---------------------------------------------------------------------------------------------------


3235it [3:48:24,  3.12s/it]

objective/kl: -85.75609588623047
ppo/returns/mean: 1.7923705577850342
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3236it [3:48:27,  3.08s/it]

objective/kl: -85.29535675048828
ppo/returns/mean: 1.7846462726593018
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3237it [3:48:30,  3.05s/it]

objective/kl: -86.39694213867188
ppo/returns/mean: 1.809952735900879
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3238it [3:48:33,  3.04s/it]

objective/kl: -83.2957763671875
ppo/returns/mean: 1.748894453048706
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3239it [3:48:36,  3.04s/it]

objective/kl: -85.80371856689453
ppo/returns/mean: 1.7923589944839478
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3240it [3:48:40,  3.28s/it]

objective/kl: -82.93202209472656
ppo/returns/mean: 1.7148663997650146
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3241it [3:48:43,  3.30s/it]

objective/kl: -85.84750366210938
ppo/returns/mean: 1.781567931175232
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3242it [3:48:46,  3.20s/it]

objective/kl: -83.514892578125
ppo/returns/mean: 1.7578015327453613
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3243it [3:48:49,  3.14s/it]

objective/kl: -86.14620971679688
ppo/returns/mean: 1.8148831129074097
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3244it [3:48:52,  3.10s/it]

objective/kl: -83.4330062866211
ppo/returns/mean: 1.7656887769699097
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3245it [3:48:55,  3.08s/it]

objective/kl: -84.1069107055664
ppo/returns/mean: 1.7497408390045166
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3246it [3:48:58,  3.05s/it]

objective/kl: -83.45126342773438
ppo/returns/mean: 1.7246406078338623
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3247it [3:49:01,  3.03s/it]

objective/kl: -81.34129333496094
ppo/returns/mean: 1.6856098175048828
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3248it [3:49:04,  3.02s/it]

objective/kl: -86.46442413330078
ppo/returns/mean: 1.78596031665802
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3249it [3:49:07,  3.01s/it]

objective/kl: -85.31077575683594
ppo/returns/mean: 1.7586947679519653
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3250it [3:49:11,  3.21s/it]

objective/kl: -82.18659210205078
ppo/returns/mean: 1.7087575197219849
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3251it [3:49:14,  3.17s/it]

objective/kl: -84.81009674072266
ppo/returns/mean: 1.7458505630493164
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3252it [3:49:17,  3.11s/it]

objective/kl: -85.78046417236328
ppo/returns/mean: 1.7618169784545898
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3253it [3:49:20,  3.19s/it]

objective/kl: -85.3564682006836
ppo/returns/mean: 1.7476553916931152
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3254it [3:49:25,  3.57s/it]

objective/kl: -84.9563217163086
ppo/returns/mean: 1.7602901458740234
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3255it [3:49:28,  3.48s/it]

objective/kl: -83.45199584960938
ppo/returns/mean: 1.7227849960327148
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3256it [3:49:31,  3.35s/it]

objective/kl: -81.22090911865234
ppo/returns/mean: 1.6766655445098877
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3257it [3:49:34,  3.25s/it]

objective/kl: -82.92538452148438
ppo/returns/mean: 1.697479486465454
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3258it [3:49:37,  3.19s/it]

objective/kl: -81.30352783203125
ppo/returns/mean: 1.6538922786712646
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3259it [3:49:40,  3.18s/it]

objective/kl: -80.8149642944336
ppo/returns/mean: 1.6605546474456787
ppo/policy/advantages_mean: 4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


3260it [3:49:43,  3.14s/it]

objective/kl: -83.80340576171875
ppo/returns/mean: 1.6143617630004883
ppo/policy/advantages_mean: 4.470348358154297e-08
---------------------------------------------------------------------------------------------------


3261it [3:49:47,  3.33s/it]

objective/kl: -34.78738784790039
ppo/returns/mean: 0.6210967302322388
ppo/policy/advantages_mean: 1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


3262it [3:49:50,  3.28s/it]

objective/kl: -28.61282730102539
ppo/returns/mean: 0.24124832451343536
ppo/policy/advantages_mean: -0.0010324344038963318
---------------------------------------------------------------------------------------------------


3263it [3:49:54,  3.29s/it]

objective/kl: -15.285584449768066
ppo/returns/mean: -0.14590710401535034
ppo/policy/advantages_mean: -0.0002857847139239311
---------------------------------------------------------------------------------------------------


3264it [3:49:57,  3.39s/it]

objective/kl: -25.4373779296875
ppo/returns/mean: -0.07260589301586151
ppo/policy/advantages_mean: 0.0005533546209335327
---------------------------------------------------------------------------------------------------


3265it [3:50:01,  3.37s/it]

objective/kl: -98.897216796875
ppo/returns/mean: 1.2748148441314697
ppo/policy/advantages_mean: 0.0024511823430657387
---------------------------------------------------------------------------------------------------


3266it [3:50:04,  3.29s/it]

objective/kl: -126.7935791015625
ppo/returns/mean: 2.0058159828186035
ppo/policy/advantages_mean: -2.7008354663848877e-08
---------------------------------------------------------------------------------------------------


3267it [3:50:07,  3.23s/it]

objective/kl: -126.74102783203125
ppo/returns/mean: 2.018590211868286
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3268it [3:50:10,  3.17s/it]

objective/kl: -118.37440490722656
ppo/returns/mean: 1.996774673461914
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3269it [3:50:13,  3.13s/it]

objective/kl: -76.48017120361328
ppo/returns/mean: 1.3134500980377197
ppo/policy/advantages_mean: -2.514570951461792e-08
---------------------------------------------------------------------------------------------------


3270it [3:50:16,  3.11s/it]

objective/kl: 33.185768127441406
ppo/returns/mean: -0.4899115562438965
ppo/policy/advantages_mean: -2.0675361156463623e-07
---------------------------------------------------------------------------------------------------


3271it [3:50:21,  3.57s/it]

objective/kl: 56.99883270263672
ppo/returns/mean: -0.8269674777984619
ppo/policy/advantages_mean: 0.005323199555277824
---------------------------------------------------------------------------------------------------


3272it [3:50:24,  3.54s/it]

objective/kl: -35.401817321777344
ppo/returns/mean: 0.3068625330924988
ppo/policy/advantages_mean: 0.008061517030000687
---------------------------------------------------------------------------------------------------


3273it [3:50:27,  3.39s/it]

objective/kl: -55.78556823730469
ppo/returns/mean: 0.9051754474639893
ppo/policy/advantages_mean: -0.000317583791911602
---------------------------------------------------------------------------------------------------


3274it [3:50:30,  3.35s/it]

objective/kl: -8.991273880004883
ppo/returns/mean: 0.1954319030046463
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3275it [3:50:34,  3.29s/it]

objective/kl: 54.54704284667969
ppo/returns/mean: -0.7653365731239319
ppo/policy/advantages_mean: -2.331775613129139e-05
---------------------------------------------------------------------------------------------------


3276it [3:50:37,  3.31s/it]

objective/kl: 16.468326568603516
ppo/returns/mean: -0.22757279872894287
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3277it [3:50:40,  3.21s/it]

objective/kl: -27.328598022460938
ppo/returns/mean: 0.4499765932559967
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3278it [3:50:43,  3.14s/it]

objective/kl: -58.70500183105469
ppo/returns/mean: 0.8487629890441895
ppo/policy/advantages_mean: -3.91155481338501e-08
---------------------------------------------------------------------------------------------------


3279it [3:50:46,  3.14s/it]

objective/kl: -51.05120086669922
ppo/returns/mean: 0.5031523704528809
ppo/policy/advantages_mean: -0.00021449849009513855
---------------------------------------------------------------------------------------------------


3280it [3:50:49,  3.11s/it]

objective/kl: -82.64627075195312
ppo/returns/mean: 1.301150918006897
ppo/policy/advantages_mean: -0.0006035128608345985
---------------------------------------------------------------------------------------------------


3281it [3:50:52,  3.08s/it]

objective/kl: -92.17585754394531
ppo/returns/mean: 1.5772148370742798
ppo/policy/advantages_mean: -0.0002460945397615433
---------------------------------------------------------------------------------------------------


3282it [3:50:56,  3.31s/it]

objective/kl: -91.66242980957031
ppo/returns/mean: 1.6846728324890137
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3283it [3:50:59,  3.23s/it]

objective/kl: -87.72297668457031
ppo/returns/mean: 1.7577093839645386
ppo/policy/advantages_mean: -4.563480615615845e-08
---------------------------------------------------------------------------------------------------


3284it [3:51:02,  3.20s/it]

objective/kl: -71.71928405761719
ppo/returns/mean: 1.5860040187835693
ppo/policy/advantages_mean: 3.4924596548080444e-08
---------------------------------------------------------------------------------------------------


3285it [3:51:05,  3.14s/it]

objective/kl: -57.526092529296875
ppo/returns/mean: 1.3554673194885254
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3286it [3:51:08,  3.12s/it]

objective/kl: -50.93516540527344
ppo/returns/mean: 1.2274482250213623
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3287it [3:51:11,  3.13s/it]

objective/kl: -16.529197692871094
ppo/returns/mean: 0.5534830689430237
ppo/policy/advantages_mean: 1.955777406692505e-08
---------------------------------------------------------------------------------------------------


3288it [3:51:15,  3.26s/it]

objective/kl: 41.97540283203125
ppo/returns/mean: -0.38783490657806396
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3289it [3:51:18,  3.34s/it]

objective/kl: 46.34018325805664
ppo/returns/mean: -0.5239207744598389
ppo/policy/advantages_mean: -1.2293457984924316e-07
---------------------------------------------------------------------------------------------------


3290it [3:51:22,  3.48s/it]

objective/kl: 35.6308479309082
ppo/returns/mean: -0.5389931201934814
ppo/policy/advantages_mean: 0.0018307091668248177
---------------------------------------------------------------------------------------------------


3291it [3:51:25,  3.34s/it]

objective/kl: 35.70618438720703
ppo/returns/mean: -0.577820897102356
ppo/policy/advantages_mean: -5.960464477539063e-08
---------------------------------------------------------------------------------------------------


3292it [3:51:29,  3.36s/it]

objective/kl: 52.77351379394531
ppo/returns/mean: -0.8164498805999756
ppo/policy/advantages_mean: 0.001325368881225586
---------------------------------------------------------------------------------------------------


3293it [3:51:32,  3.33s/it]

objective/kl: 47.382102966308594
ppo/returns/mean: -0.8175742626190186
ppo/policy/advantages_mean: 0.0015595704317092896
---------------------------------------------------------------------------------------------------


3294it [3:51:35,  3.38s/it]

objective/kl: 43.57244873046875
ppo/returns/mean: -0.7929847240447998
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3295it [3:51:38,  3.28s/it]

objective/kl: 46.43226623535156
ppo/returns/mean: -0.8509303331375122
ppo/policy/advantages_mean: -1.7229467630386353e-08
---------------------------------------------------------------------------------------------------


3296it [3:51:41,  3.21s/it]

objective/kl: 47.09181594848633
ppo/returns/mean: -0.878913402557373
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


3297it [3:51:45,  3.19s/it]

objective/kl: 48.330718994140625
ppo/returns/mean: -0.911800742149353
ppo/policy/advantages_mean: 2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


3298it [3:51:48,  3.15s/it]

objective/kl: 52.70891189575195
ppo/returns/mean: -0.9707170724868774
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3299it [3:51:51,  3.30s/it]

objective/kl: 52.775718688964844
ppo/returns/mean: -0.9463956356048584
ppo/policy/advantages_mean: 2.421438694000244e-08
---------------------------------------------------------------------------------------------------


3300it [3:51:54,  3.24s/it]

objective/kl: 52.09514236450195
ppo/returns/mean: -0.9319547414779663
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3301it [3:51:57,  3.18s/it]

objective/kl: 54.609153747558594
ppo/returns/mean: -0.9871206879615784
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3302it [3:52:01,  3.14s/it]

objective/kl: 50.5278205871582
ppo/returns/mean: -0.9131039977073669
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3303it [3:52:04,  3.12s/it]

objective/kl: 52.668025970458984
ppo/returns/mean: -0.9550057649612427
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


3304it [3:52:07,  3.09s/it]

objective/kl: 50.589630126953125
ppo/returns/mean: -0.9228533506393433
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3305it [3:52:10,  3.20s/it]

objective/kl: 49.70269012451172
ppo/returns/mean: -0.9015377759933472
ppo/policy/advantages_mean: -2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


3306it [3:52:15,  3.77s/it]

objective/kl: 52.42138671875
ppo/returns/mean: -0.9521688222885132
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3307it [3:52:19,  3.70s/it]

objective/kl: 44.45598602294922
ppo/returns/mean: -0.7504284381866455
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3308it [3:52:22,  3.59s/it]

objective/kl: 43.35695266723633
ppo/returns/mean: -0.7033601999282837
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3309it [3:52:25,  3.42s/it]

objective/kl: 37.91577911376953
ppo/returns/mean: -0.5947709083557129
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


3310it [3:52:28,  3.32s/it]

objective/kl: 34.104248046875
ppo/returns/mean: -0.5260196924209595
ppo/policy/advantages_mean: 1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


3311it [3:52:31,  3.24s/it]

objective/kl: 23.961650848388672
ppo/returns/mean: -0.33812206983566284
ppo/policy/advantages_mean: 1.0244548320770264e-08
---------------------------------------------------------------------------------------------------


3312it [3:52:34,  3.19s/it]

objective/kl: 19.578659057617188
ppo/returns/mean: -0.21471098065376282
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3313it [3:52:38,  3.22s/it]

objective/kl: 17.75343894958496
ppo/returns/mean: -0.21852341294288635
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3314it [3:52:41,  3.18s/it]

objective/kl: 17.617034912109375
ppo/returns/mean: -0.20050738751888275
ppo/policy/advantages_mean: 1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


3315it [3:52:44,  3.14s/it]

objective/kl: 18.325042724609375
ppo/returns/mean: -0.20693960785865784
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3316it [3:52:47,  3.11s/it]

objective/kl: 15.983577728271484
ppo/returns/mean: -0.13718059659004211
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3317it [3:52:50,  3.08s/it]

objective/kl: 14.814411163330078
ppo/returns/mean: -0.07897301018238068
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3318it [3:52:53,  3.06s/it]

objective/kl: 21.834247589111328
ppo/returns/mean: -0.24085424840450287
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3319it [3:52:56,  3.07s/it]

objective/kl: 38.40330123901367
ppo/returns/mean: -0.5950019359588623
ppo/policy/advantages_mean: -1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


3320it [3:52:59,  3.06s/it]

objective/kl: 53.09313201904297
ppo/returns/mean: -0.9591760635375977
ppo/policy/advantages_mean: 4.377216100692749e-08
---------------------------------------------------------------------------------------------------


3321it [3:53:02,  3.05s/it]

objective/kl: 51.84266662597656
ppo/returns/mean: -0.9456298351287842
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3322it [3:53:05,  3.05s/it]

objective/kl: 52.20707702636719
ppo/returns/mean: -0.9637396931648254
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3323it [3:53:08,  3.04s/it]

objective/kl: 53.31549835205078
ppo/returns/mean: -1.001366138458252
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3324it [3:53:11,  3.05s/it]

objective/kl: 50.017112731933594
ppo/returns/mean: -0.9465423822402954
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3325it [3:53:14,  3.11s/it]

objective/kl: 48.65721130371094
ppo/returns/mean: -0.8769277334213257
ppo/policy/advantages_mean: 0.0006562052294611931
---------------------------------------------------------------------------------------------------


3326it [3:53:18,  3.15s/it]

objective/kl: 46.8125114440918
ppo/returns/mean: -0.8620050549507141
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3327it [3:53:21,  3.22s/it]

objective/kl: 43.08557891845703
ppo/returns/mean: -0.7942799925804138
ppo/policy/advantages_mean: -0.0001551508903503418
---------------------------------------------------------------------------------------------------


3328it [3:53:24,  3.25s/it]

objective/kl: 46.95556640625
ppo/returns/mean: -0.8585646748542786
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3329it [3:53:27,  3.19s/it]

objective/kl: 22.724428176879883
ppo/returns/mean: -0.29964470863342285
ppo/policy/advantages_mean: -0.0006826221942901611
---------------------------------------------------------------------------------------------------


3330it [3:53:31,  3.30s/it]

objective/kl: 20.695446014404297
ppo/returns/mean: -0.26413553953170776
ppo/policy/advantages_mean: 0.0010607987642288208
---------------------------------------------------------------------------------------------------


3331it [3:53:35,  3.45s/it]

objective/kl: 16.460247039794922
ppo/returns/mean: -0.17271092534065247
ppo/policy/advantages_mean: 0.0013024820946156979
---------------------------------------------------------------------------------------------------


3332it [3:53:38,  3.33s/it]

objective/kl: 13.84257984161377
ppo/returns/mean: -0.08497180789709091
ppo/policy/advantages_mean: -3.166496753692627e-08
---------------------------------------------------------------------------------------------------


3333it [3:53:41,  3.29s/it]

objective/kl: 15.735244750976562
ppo/returns/mean: -0.15418903529644012
ppo/policy/advantages_mean: 0.006218187510967255
---------------------------------------------------------------------------------------------------


3334it [3:53:44,  3.21s/it]

objective/kl: 14.3519868850708
ppo/returns/mean: -0.07997114211320877
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3335it [3:53:47,  3.15s/it]

objective/kl: 7.529277801513672
ppo/returns/mean: 0.03891681879758835
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3336it [3:53:50,  3.25s/it]

objective/kl: 5.5929179191589355
ppo/returns/mean: 0.07835128903388977
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3337it [3:53:53,  3.18s/it]

objective/kl: 5.744677543640137
ppo/returns/mean: 0.071971595287323
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3338it [3:53:57,  3.14s/it]

objective/kl: 5.05518102645874
ppo/returns/mean: 0.09251788258552551
ppo/policy/advantages_mean: 6.51925802230835e-09
---------------------------------------------------------------------------------------------------


3339it [3:54:00,  3.13s/it]

objective/kl: 3.0566649436950684
ppo/returns/mean: 0.1458321362733841
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3340it [3:54:03,  3.12s/it]

objective/kl: 1.6703886985778809
ppo/returns/mean: 0.1939665526151657
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3341it [3:54:06,  3.10s/it]

objective/kl: -1.73016357421875
ppo/returns/mean: 0.3158116042613983
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3342it [3:54:09,  3.09s/it]

objective/kl: -3.707716464996338
ppo/returns/mean: 0.35998690128326416
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3343it [3:54:12,  3.07s/it]

objective/kl: -1.9854307174682617
ppo/returns/mean: 0.3373832702636719
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3344it [3:54:15,  3.06s/it]

objective/kl: -3.25350284576416
ppo/returns/mean: 0.3751927316188812
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3345it [3:54:18,  3.08s/it]

objective/kl: -1.8657124042510986
ppo/returns/mean: 0.3532821834087372
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3346it [3:54:21,  3.08s/it]

objective/kl: -3.9176323413848877
ppo/returns/mean: 0.37714332342147827
ppo/policy/advantages_mean: -4.0046870708465576e-08
---------------------------------------------------------------------------------------------------


3347it [3:54:24,  3.07s/it]

objective/kl: -1.4997661113739014
ppo/returns/mean: 0.3292686939239502
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3348it [3:54:27,  3.06s/it]

objective/kl: -1.168622612953186
ppo/returns/mean: 0.3236692547798157
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


3349it [3:54:31,  3.15s/it]

objective/kl: 0.3623005449771881
ppo/returns/mean: 0.3005317449569702
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3350it [3:54:34,  3.11s/it]

objective/kl: 0.5922537446022034
ppo/returns/mean: 0.31200018525123596
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3351it [3:54:37,  3.09s/it]

objective/kl: -1.7114626169204712
ppo/returns/mean: 0.3413747549057007
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3352it [3:54:40,  3.08s/it]

objective/kl: -3.067277431488037
ppo/returns/mean: 0.3539774715900421
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


3353it [3:54:43,  3.19s/it]

objective/kl: -0.8634371757507324
ppo/returns/mean: 0.3213173747062683
ppo/policy/advantages_mean: -2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


3354it [3:54:46,  3.14s/it]

objective/kl: -2.1918063163757324
ppo/returns/mean: 0.34631645679473877
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3355it [3:54:49,  3.12s/it]

objective/kl: -2.6795804500579834
ppo/returns/mean: 0.35235413908958435
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3356it [3:54:52,  3.09s/it]

objective/kl: -2.510659694671631
ppo/returns/mean: 0.347796767950058
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3357it [3:54:55,  3.08s/it]

objective/kl: -0.9328891038894653
ppo/returns/mean: 0.32187914848327637
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3358it [3:54:58,  3.06s/it]

objective/kl: 0.11243683099746704
ppo/returns/mean: 0.2954368591308594
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3359it [3:55:01,  3.05s/it]

objective/kl: -1.4805352687835693
ppo/returns/mean: 0.3440113365650177
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3360it [3:55:04,  3.05s/it]

objective/kl: -1.9638721942901611
ppo/returns/mean: 0.3444773554801941
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3361it [3:55:08,  3.25s/it]

objective/kl: 0.11149489879608154
ppo/returns/mean: 0.30500853061676025
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3362it [3:55:11,  3.23s/it]

objective/kl: -1.8568755388259888
ppo/returns/mean: 0.33524656295776367
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3363it [3:55:14,  3.17s/it]

objective/kl: -0.6323012113571167
ppo/returns/mean: 0.33563828468322754
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


3364it [3:55:17,  3.13s/it]

objective/kl: -3.0792925357818604
ppo/returns/mean: 0.3634487986564636
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3365it [3:55:20,  3.13s/it]

objective/kl: -3.858815908432007
ppo/returns/mean: 0.372878760099411
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3366it [3:55:23,  3.09s/it]

objective/kl: -3.7098851203918457
ppo/returns/mean: 0.37493112683296204
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3367it [3:55:27,  3.07s/it]

objective/kl: -2.177757740020752
ppo/returns/mean: 0.34937047958374023
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3368it [3:55:30,  3.07s/it]

objective/kl: -2.094388008117676
ppo/returns/mean: 0.34047290682792664
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3369it [3:55:33,  3.07s/it]

objective/kl: -1.7955507040023804
ppo/returns/mean: 0.33865898847579956
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3370it [3:55:36,  3.06s/it]

objective/kl: -2.4382879734039307
ppo/returns/mean: 0.3482728898525238
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3371it [3:55:39,  3.05s/it]

objective/kl: -1.6560218334197998
ppo/returns/mean: 0.34289097785949707
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3372it [3:55:42,  3.18s/it]

objective/kl: -3.0016860961914062
ppo/returns/mean: 0.36680781841278076
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3373it [3:55:46,  3.32s/it]

objective/kl: -5.708858489990234
ppo/returns/mean: 0.41082486510276794
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3374it [3:55:49,  3.31s/it]

objective/kl: -4.548091888427734
ppo/returns/mean: 0.39615297317504883
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3375it [3:55:52,  3.30s/it]

objective/kl: -6.655900478363037
ppo/returns/mean: 0.42231979966163635
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3376it [3:55:56,  3.25s/it]

objective/kl: -7.932439804077148
ppo/returns/mean: 0.4588108956813812
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


3377it [3:55:59,  3.26s/it]

objective/kl: -7.4494218826293945
ppo/returns/mean: 0.4487695097923279
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3378it [3:56:02,  3.20s/it]

objective/kl: -10.145818710327148
ppo/returns/mean: 0.47594180703163147
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3379it [3:56:05,  3.28s/it]

objective/kl: -8.787261962890625
ppo/returns/mean: 0.45594051480293274
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3380it [3:56:08,  3.21s/it]

objective/kl: -8.086034774780273
ppo/returns/mean: 0.44908595085144043
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3381it [3:56:11,  3.15s/it]

objective/kl: -6.307655334472656
ppo/returns/mean: 0.43698760867118835
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


3382it [3:56:14,  3.11s/it]

objective/kl: -9.724164962768555
ppo/returns/mean: 0.4889986515045166
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3383it [3:56:17,  3.09s/it]

objective/kl: -11.804910659790039
ppo/returns/mean: 0.5279136896133423
ppo/policy/advantages_mean: 2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


3384it [3:56:20,  3.08s/it]

objective/kl: -11.88291072845459
ppo/returns/mean: 0.5196840763092041
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3385it [3:56:24,  3.15s/it]

objective/kl: -11.634990692138672
ppo/returns/mean: 0.5082143545150757
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3386it [3:56:27,  3.13s/it]

objective/kl: -12.619573593139648
ppo/returns/mean: 0.5334696769714355
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3387it [3:56:30,  3.10s/it]

objective/kl: -11.396246910095215
ppo/returns/mean: 0.5304985642433167
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3388it [3:56:33,  3.07s/it]

objective/kl: -9.599386215209961
ppo/returns/mean: 0.4997871518135071
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3389it [3:56:36,  3.13s/it]

objective/kl: -6.901880264282227
ppo/returns/mean: 0.4210202097892761
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3390it [3:56:39,  3.12s/it]

objective/kl: -8.703039169311523
ppo/returns/mean: 0.46369385719299316
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3391it [3:56:42,  3.13s/it]

objective/kl: -10.782027244567871
ppo/returns/mean: 0.4975457191467285
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3392it [3:56:46,  3.35s/it]

objective/kl: -9.044824600219727
ppo/returns/mean: 0.48699867725372314
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3393it [3:56:50,  3.38s/it]

objective/kl: -8.942586898803711
ppo/returns/mean: 0.4948132634162903
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3394it [3:56:53,  3.33s/it]

objective/kl: -11.188377380371094
ppo/returns/mean: 0.5266833305358887
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3395it [3:56:56,  3.24s/it]

objective/kl: -10.62343978881836
ppo/returns/mean: 0.5215744972229004
ppo/policy/advantages_mean: -1.210719347000122e-08
---------------------------------------------------------------------------------------------------


3396it [3:56:59,  3.19s/it]

objective/kl: -10.862065315246582
ppo/returns/mean: 0.5233402252197266
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3397it [3:57:02,  3.21s/it]

objective/kl: -9.493949890136719
ppo/returns/mean: 0.5168455839157104
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3398it [3:57:05,  3.15s/it]

objective/kl: -10.216456413269043
ppo/returns/mean: 0.5304574370384216
ppo/policy/advantages_mean: -9.313225746154785e-10
---------------------------------------------------------------------------------------------------


3399it [3:57:08,  3.13s/it]

objective/kl: -10.840532302856445
ppo/returns/mean: 0.5193459987640381
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3400it [3:57:12,  3.11s/it]

objective/kl: -12.9337797164917
ppo/returns/mean: 0.56053227186203
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3401it [3:57:15,  3.11s/it]

objective/kl: -15.86747932434082
ppo/returns/mean: 0.6279682517051697
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3402it [3:57:18,  3.09s/it]

objective/kl: -14.050355911254883
ppo/returns/mean: 0.5779323577880859
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3403it [3:57:21,  3.07s/it]

objective/kl: -16.574724197387695
ppo/returns/mean: 0.62253737449646
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3404it [3:57:24,  3.07s/it]

objective/kl: -15.918985366821289
ppo/returns/mean: 0.6157262325286865
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3405it [3:57:27,  3.15s/it]

objective/kl: -14.745616912841797
ppo/returns/mean: 0.5948466062545776
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3406it [3:57:30,  3.12s/it]

objective/kl: -18.326526641845703
ppo/returns/mean: 0.6569576859474182
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


3407it [3:57:33,  3.10s/it]

objective/kl: -16.628196716308594
ppo/returns/mean: 0.6061538457870483
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3408it [3:57:36,  3.10s/it]

objective/kl: -19.435903549194336
ppo/returns/mean: 0.6937527656555176
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3409it [3:57:39,  3.09s/it]

objective/kl: -16.93973731994629
ppo/returns/mean: 0.63307785987854
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3410it [3:57:42,  3.08s/it]

objective/kl: -20.090450286865234
ppo/returns/mean: 0.6987752914428711
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3411it [3:57:45,  3.06s/it]

objective/kl: -20.766094207763672
ppo/returns/mean: 0.7133557796478271
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3412it [3:57:49,  3.12s/it]

objective/kl: -19.10757064819336
ppo/returns/mean: 0.6987425684928894
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3413it [3:57:52,  3.12s/it]

objective/kl: -17.293859481811523
ppo/returns/mean: 0.6660712957382202
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3414it [3:57:55,  3.09s/it]

objective/kl: -18.937511444091797
ppo/returns/mean: 0.6543315649032593
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3415it [3:57:58,  3.08s/it]

objective/kl: -17.083778381347656
ppo/returns/mean: 0.6460551619529724
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3416it [3:58:01,  3.07s/it]

objective/kl: -16.755329132080078
ppo/returns/mean: 0.6470677852630615
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3417it [3:58:04,  3.06s/it]

objective/kl: -20.290803909301758
ppo/returns/mean: 0.6960150599479675
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3418it [3:58:07,  3.05s/it]

objective/kl: -19.937742233276367
ppo/returns/mean: 0.6975303888320923
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3419it [3:58:10,  3.05s/it]

objective/kl: -21.0134220123291
ppo/returns/mean: 0.7294741868972778
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3420it [3:58:14,  3.18s/it]

objective/kl: -24.314817428588867
ppo/returns/mean: 0.7830526828765869
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3421it [3:58:17,  3.13s/it]

objective/kl: -22.37596893310547
ppo/returns/mean: 0.754253625869751
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3422it [3:58:20,  3.10s/it]

objective/kl: -25.585494995117188
ppo/returns/mean: 0.8099477887153625
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


3423it [3:58:23,  3.10s/it]

objective/kl: -24.936878204345703
ppo/returns/mean: 0.7924483418464661
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3424it [3:58:26,  3.08s/it]

objective/kl: -24.56269073486328
ppo/returns/mean: 0.78987056016922
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3425it [3:58:29,  3.07s/it]

objective/kl: -24.48921775817871
ppo/returns/mean: 0.7701187133789062
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3426it [3:58:33,  3.29s/it]

objective/kl: -24.544315338134766
ppo/returns/mean: 0.7549576759338379
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3427it [3:58:36,  3.26s/it]

objective/kl: -21.956073760986328
ppo/returns/mean: 0.7330336570739746
ppo/policy/advantages_mean: -6.51925802230835e-09
---------------------------------------------------------------------------------------------------


3428it [3:58:39,  3.19s/it]

objective/kl: -23.20507049560547
ppo/returns/mean: 0.7568169832229614
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3429it [3:58:42,  3.15s/it]

objective/kl: -21.770427703857422
ppo/returns/mean: 0.7168451547622681
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3430it [3:58:45,  3.13s/it]

objective/kl: -23.234365463256836
ppo/returns/mean: 0.7528963685035706
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3431it [3:58:48,  3.12s/it]

objective/kl: -24.37258529663086
ppo/returns/mean: 0.7772653102874756
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3432it [3:58:51,  3.10s/it]

objective/kl: -25.507457733154297
ppo/returns/mean: 0.7912793755531311
ppo/policy/advantages_mean: 2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


3433it [3:58:54,  3.09s/it]

objective/kl: -24.59454917907715
ppo/returns/mean: 0.7675873041152954
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3434it [3:58:57,  3.16s/it]

objective/kl: -26.28127098083496
ppo/returns/mean: 0.796227216720581
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3435it [3:59:01,  3.20s/it]

objective/kl: -24.49386215209961
ppo/returns/mean: 0.7874606847763062
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3436it [3:59:04,  3.16s/it]

objective/kl: -21.8525447845459
ppo/returns/mean: 0.7442585825920105
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3437it [3:59:07,  3.12s/it]

objective/kl: -22.37405014038086
ppo/returns/mean: 0.7614929676055908
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3438it [3:59:10,  3.09s/it]

objective/kl: -21.620222091674805
ppo/returns/mean: 0.7396317720413208
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3439it [3:59:13,  3.08s/it]

objective/kl: -23.407005310058594
ppo/returns/mean: 0.7566732168197632
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3440it [3:59:17,  3.32s/it]

objective/kl: -25.23691177368164
ppo/returns/mean: 0.7896437048912048
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3441it [3:59:20,  3.23s/it]

objective/kl: -26.893680572509766
ppo/returns/mean: 0.8129073977470398
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3442it [3:59:23,  3.24s/it]

objective/kl: -24.678314208984375
ppo/returns/mean: 0.7808443903923035
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3443it [3:59:26,  3.17s/it]

objective/kl: -28.833282470703125
ppo/returns/mean: 0.8592939376831055
ppo/policy/advantages_mean: 4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


3444it [3:59:29,  3.11s/it]

objective/kl: -27.70187759399414
ppo/returns/mean: 0.8345766067504883
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3445it [3:59:32,  3.09s/it]

objective/kl: -29.656681060791016
ppo/returns/mean: 0.85907381772995
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3446it [3:59:35,  3.06s/it]

objective/kl: -26.821910858154297
ppo/returns/mean: 0.8164386749267578
ppo/policy/advantages_mean: 4.656612873077393e-10
---------------------------------------------------------------------------------------------------


3447it [3:59:38,  3.05s/it]

objective/kl: -28.60604476928711
ppo/returns/mean: 0.8390417098999023
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3448it [3:59:42,  3.41s/it]

objective/kl: -29.449260711669922
ppo/returns/mean: 0.8554117679595947
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3449it [3:59:45,  3.31s/it]

objective/kl: -29.83891487121582
ppo/returns/mean: 0.8551809787750244
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3450it [3:59:49,  3.29s/it]

objective/kl: -29.916728973388672
ppo/returns/mean: 0.8749940395355225
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3451it [3:59:52,  3.27s/it]

objective/kl: -30.37445640563965
ppo/returns/mean: 0.8802396655082703
ppo/policy/advantages_mean: 9.313225746154785e-10
---------------------------------------------------------------------------------------------------


3452it [3:59:55,  3.23s/it]

objective/kl: -30.766693115234375
ppo/returns/mean: 0.8649163842201233
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3453it [3:59:58,  3.22s/it]

objective/kl: -29.966764450073242
ppo/returns/mean: 0.8594977855682373
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3454it [4:00:02,  3.24s/it]

objective/kl: -28.904335021972656
ppo/returns/mean: 0.8450380563735962
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3455it [4:00:05,  3.18s/it]

objective/kl: -28.457839965820312
ppo/returns/mean: 0.8379166126251221
ppo/policy/advantages_mean: 1.210719347000122e-08
---------------------------------------------------------------------------------------------------


3456it [4:00:08,  3.20s/it]

objective/kl: -30.423480987548828
ppo/returns/mean: 0.8748294115066528
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3457it [4:00:11,  3.29s/it]

objective/kl: -31.505430221557617
ppo/returns/mean: 0.8822149038314819
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3458it [4:00:14,  3.22s/it]

objective/kl: -31.706226348876953
ppo/returns/mean: 0.8917035460472107
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3459it [4:00:18,  3.20s/it]

objective/kl: -29.705116271972656
ppo/returns/mean: 0.8604756593704224
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3460it [4:00:21,  3.16s/it]

objective/kl: -31.139612197875977
ppo/returns/mean: 0.8784507513046265
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3461it [4:00:24,  3.15s/it]

objective/kl: -31.797550201416016
ppo/returns/mean: 0.8844479322433472
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3462it [4:00:27,  3.20s/it]

objective/kl: -31.04494857788086
ppo/returns/mean: 0.8781439065933228
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3463it [4:00:30,  3.15s/it]

objective/kl: -29.188819885253906
ppo/returns/mean: 0.8525936007499695
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3464it [4:00:33,  3.11s/it]

objective/kl: -30.532821655273438
ppo/returns/mean: 0.8636919260025024
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3465it [4:00:36,  3.09s/it]

objective/kl: -29.366500854492188
ppo/returns/mean: 0.8480337262153625
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3466it [4:00:39,  3.08s/it]

objective/kl: -30.447601318359375
ppo/returns/mean: 0.8500505685806274
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3467it [4:00:42,  3.07s/it]

objective/kl: -32.03162384033203
ppo/returns/mean: 0.8798948526382446
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3468it [4:00:45,  3.07s/it]

objective/kl: -30.33767318725586
ppo/returns/mean: 0.8565490245819092
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


3469it [4:00:48,  3.06s/it]

objective/kl: -33.18173599243164
ppo/returns/mean: 0.8993173837661743
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3470it [4:00:51,  3.07s/it]

objective/kl: -32.05335998535156
ppo/returns/mean: 0.8881860971450806
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3471it [4:00:54,  3.06s/it]

objective/kl: -37.04046630859375
ppo/returns/mean: 0.974134624004364
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3472it [4:00:57,  3.05s/it]

objective/kl: -37.351829528808594
ppo/returns/mean: 0.9809515476226807
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3473it [4:01:01,  3.33s/it]

objective/kl: -34.93367004394531
ppo/returns/mean: 0.9373078346252441
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3474it [4:01:04,  3.24s/it]

objective/kl: -34.58932113647461
ppo/returns/mean: 0.9296973943710327
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3475it [4:01:08,  3.19s/it]

objective/kl: -35.315460205078125
ppo/returns/mean: 0.9538003206253052
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3476it [4:01:11,  3.21s/it]

objective/kl: -35.95793151855469
ppo/returns/mean: 0.96343994140625
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3477it [4:01:14,  3.16s/it]

objective/kl: -36.23640441894531
ppo/returns/mean: 0.9553348422050476
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3478it [4:01:17,  3.27s/it]

objective/kl: -37.157752990722656
ppo/returns/mean: 0.9638566970825195
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3479it [4:01:20,  3.21s/it]

objective/kl: -36.587196350097656
ppo/returns/mean: 0.9616211652755737
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3480it [4:01:23,  3.15s/it]

objective/kl: -34.808250427246094
ppo/returns/mean: 0.9487888216972351
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3481it [4:01:27,  3.12s/it]

objective/kl: -37.448486328125
ppo/returns/mean: 0.9815671443939209
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3482it [4:01:30,  3.12s/it]

objective/kl: -40.318115234375
ppo/returns/mean: 1.0348131656646729
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3483it [4:01:33,  3.08s/it]

objective/kl: -37.739253997802734
ppo/returns/mean: 1.0028622150421143
ppo/policy/advantages_mean: 4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


3484it [4:01:36,  3.09s/it]

objective/kl: -39.13141632080078
ppo/returns/mean: 1.0050086975097656
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3485it [4:01:39,  3.08s/it]

objective/kl: -37.80280303955078
ppo/returns/mean: 1.000197410583496
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3486it [4:01:42,  3.08s/it]

objective/kl: -38.954139709472656
ppo/returns/mean: 1.0090312957763672
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3487it [4:01:45,  3.10s/it]

objective/kl: -38.28823471069336
ppo/returns/mean: 0.9925350546836853
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3488it [4:01:48,  3.08s/it]

objective/kl: -38.12896728515625
ppo/returns/mean: 0.9814133644104004
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3489it [4:01:52,  3.21s/it]

objective/kl: -38.23476791381836
ppo/returns/mean: 0.9863333702087402
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3490it [4:01:55,  3.23s/it]

objective/kl: -37.4647102355957
ppo/returns/mean: 0.9835015535354614
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3491it [4:01:59,  3.45s/it]

objective/kl: -36.74152374267578
ppo/returns/mean: 0.9739328622817993
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3492it [4:02:02,  3.47s/it]

objective/kl: -36.086090087890625
ppo/returns/mean: 0.9552916884422302
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3493it [4:02:05,  3.34s/it]

objective/kl: -36.68156433105469
ppo/returns/mean: 0.9794154167175293
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3494it [4:02:08,  3.25s/it]

objective/kl: -37.66350555419922
ppo/returns/mean: 0.9893386363983154
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3495it [4:02:11,  3.19s/it]

objective/kl: -37.16203308105469
ppo/returns/mean: 0.9743005633354187
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3496it [4:02:15,  3.18s/it]

objective/kl: -35.65859603881836
ppo/returns/mean: 0.9504387974739075
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3497it [4:02:18,  3.14s/it]

objective/kl: -36.024932861328125
ppo/returns/mean: 0.9491567611694336
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3498it [4:02:21,  3.11s/it]

objective/kl: -39.10712432861328
ppo/returns/mean: 0.9897249341011047
ppo/policy/advantages_mean: -8.381903171539307e-09
---------------------------------------------------------------------------------------------------


3499it [4:02:24,  3.09s/it]

objective/kl: -38.80330276489258
ppo/returns/mean: 0.9853901863098145
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3500it [4:02:27,  3.18s/it]

objective/kl: -38.7376823425293
ppo/returns/mean: 0.9842057228088379
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3501it [4:02:30,  3.23s/it]

objective/kl: -35.66600799560547
ppo/returns/mean: 0.9431605935096741
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3502it [4:02:33,  3.17s/it]

objective/kl: -36.78369903564453
ppo/returns/mean: 0.9500249028205872
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3503it [4:02:37,  3.13s/it]

objective/kl: -39.367759704589844
ppo/returns/mean: 0.993027925491333
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3504it [4:02:40,  3.10s/it]

objective/kl: -39.829925537109375
ppo/returns/mean: 0.9988397359848022
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3505it [4:02:43,  3.08s/it]

objective/kl: -41.85548400878906
ppo/returns/mean: 1.0273120403289795
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3506it [4:02:46,  3.08s/it]

objective/kl: -42.39613342285156
ppo/returns/mean: 1.0286777019500732
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3507it [4:02:49,  3.07s/it]

objective/kl: -42.38584899902344
ppo/returns/mean: 1.0140743255615234
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3508it [4:02:52,  3.06s/it]

objective/kl: -44.253211975097656
ppo/returns/mean: 1.0472179651260376
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3509it [4:02:56,  3.29s/it]

objective/kl: -45.08095169067383
ppo/returns/mean: 1.0479974746704102
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3510it [4:02:59,  3.24s/it]

objective/kl: -45.90924072265625
ppo/returns/mean: 1.0587244033813477
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3511it [4:03:02,  3.18s/it]

objective/kl: -43.381996154785156
ppo/returns/mean: 1.0177674293518066
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3512it [4:03:05,  3.13s/it]

objective/kl: -42.91197204589844
ppo/returns/mean: 1.016560673713684
ppo/policy/advantages_mean: -2.8870999813079834e-08
---------------------------------------------------------------------------------------------------


3513it [4:03:08,  3.20s/it]

objective/kl: -45.55698013305664
ppo/returns/mean: 1.0446093082427979
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3514it [4:03:11,  3.18s/it]

objective/kl: -45.170005798339844
ppo/returns/mean: 1.034752368927002
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3515it [4:03:14,  3.13s/it]

objective/kl: -44.606815338134766
ppo/returns/mean: 1.0219886302947998
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3516it [4:03:18,  3.26s/it]

objective/kl: -47.10753631591797
ppo/returns/mean: 1.035866141319275
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3517it [4:03:21,  3.18s/it]

objective/kl: -48.93587112426758
ppo/returns/mean: 1.0587559938430786
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


3518it [4:03:24,  3.13s/it]

objective/kl: -47.161434173583984
ppo/returns/mean: 1.0247827768325806
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3519it [4:03:27,  3.09s/it]

objective/kl: -51.51765441894531
ppo/returns/mean: 1.0712616443634033
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3520it [4:03:30,  3.07s/it]

objective/kl: -52.338829040527344
ppo/returns/mean: 1.0846819877624512
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3521it [4:03:33,  3.06s/it]

objective/kl: -53.38722610473633
ppo/returns/mean: 1.0872935056686401
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3522it [4:03:36,  3.05s/it]

objective/kl: -54.826499938964844
ppo/returns/mean: 1.0959327220916748
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3523it [4:03:39,  3.03s/it]

objective/kl: -56.53205108642578
ppo/returns/mean: 1.1276991367340088
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3524it [4:03:42,  3.03s/it]

objective/kl: -56.44646453857422
ppo/returns/mean: 1.1109111309051514
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3525it [4:03:45,  3.18s/it]

objective/kl: -58.469627380371094
ppo/returns/mean: 1.127244472503662
ppo/policy/advantages_mean: -1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


3526it [4:03:48,  3.13s/it]

objective/kl: -57.618431091308594
ppo/returns/mean: 1.0917017459869385
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3527it [4:03:52,  3.26s/it]

objective/kl: -71.63006591796875
ppo/returns/mean: 1.2292189598083496
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3528it [4:03:55,  3.21s/it]

objective/kl: -73.53218078613281
ppo/returns/mean: 1.260969638824463
ppo/policy/advantages_mean: -2.421438694000244e-08
---------------------------------------------------------------------------------------------------


3529it [4:03:58,  3.17s/it]

objective/kl: -71.42156219482422
ppo/returns/mean: 1.2116223573684692
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3530it [4:04:01,  3.13s/it]

objective/kl: -74.35853576660156
ppo/returns/mean: 1.2730191946029663
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3531it [4:04:05,  3.32s/it]

objective/kl: -80.1358871459961
ppo/returns/mean: 1.3207032680511475
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3532it [4:04:08,  3.23s/it]

objective/kl: -79.92373657226562
ppo/returns/mean: 1.3276944160461426
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3533it [4:04:11,  3.18s/it]

objective/kl: -78.9083251953125
ppo/returns/mean: 1.2959930896759033
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3534it [4:04:14,  3.17s/it]

objective/kl: -79.86144256591797
ppo/returns/mean: 1.3018243312835693
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3535it [4:04:17,  3.19s/it]

objective/kl: -83.58956909179688
ppo/returns/mean: 1.3602046966552734
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3536it [4:04:20,  3.14s/it]

objective/kl: -89.18768310546875
ppo/returns/mean: 1.4160560369491577
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3537it [4:04:24,  3.12s/it]

objective/kl: -89.21249389648438
ppo/returns/mean: 1.395945429801941
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


3538it [4:04:27,  3.10s/it]

objective/kl: -91.89260864257812
ppo/returns/mean: 1.375449538230896
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3539it [4:04:30,  3.08s/it]

objective/kl: -88.43476867675781
ppo/returns/mean: 1.3796064853668213
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3540it [4:04:33,  3.08s/it]

objective/kl: -76.85177612304688
ppo/returns/mean: 1.2669661045074463
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3541it [4:04:36,  3.06s/it]

objective/kl: -85.07207489013672
ppo/returns/mean: 1.387412428855896
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3542it [4:04:39,  3.15s/it]

objective/kl: -84.39656066894531
ppo/returns/mean: 1.398902177810669
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3543it [4:04:42,  3.12s/it]

objective/kl: -79.0457992553711
ppo/returns/mean: 1.347066879272461
ppo/policy/advantages_mean: 8.381903171539307e-09
---------------------------------------------------------------------------------------------------


3544it [4:04:45,  3.16s/it]

objective/kl: -67.78182983398438
ppo/returns/mean: 1.126373529434204
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3545it [4:04:48,  3.13s/it]

objective/kl: -76.6527099609375
ppo/returns/mean: 1.2216055393218994
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3546it [4:04:51,  3.10s/it]

objective/kl: -74.56114959716797
ppo/returns/mean: 1.180516242980957
ppo/policy/advantages_mean: -1.1641532182693481e-08
---------------------------------------------------------------------------------------------------


3547it [4:04:55,  3.08s/it]

objective/kl: -61.805721282958984
ppo/returns/mean: 0.9866661429405212
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3548it [4:04:58,  3.07s/it]

objective/kl: -50.25593948364258
ppo/returns/mean: 0.8141255974769592
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3549it [4:05:01,  3.06s/it]

objective/kl: -35.92280578613281
ppo/returns/mean: 0.5914745330810547
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3550it [4:05:04,  3.05s/it]

objective/kl: -20.01758575439453
ppo/returns/mean: 0.3590589463710785
ppo/policy/advantages_mean: 8.381903171539307e-09
---------------------------------------------------------------------------------------------------


3551it [4:05:07,  3.25s/it]

objective/kl: -26.09072494506836
ppo/returns/mean: 0.4661886692047119
ppo/policy/advantages_mean: 4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


3552it [4:05:10,  3.17s/it]

objective/kl: -25.184104919433594
ppo/returns/mean: 0.4638015329837799
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3553it [4:05:13,  3.13s/it]

objective/kl: -19.82406234741211
ppo/returns/mean: 0.4068107604980469
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3554it [4:05:16,  3.10s/it]

objective/kl: -22.41262435913086
ppo/returns/mean: 0.4411590099334717
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3555it [4:05:19,  3.07s/it]

objective/kl: -19.88318634033203
ppo/returns/mean: 0.42801979184150696
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3556it [4:05:22,  3.04s/it]

objective/kl: -21.587703704833984
ppo/returns/mean: 0.4617202579975128
ppo/policy/advantages_mean: 2.514570951461792e-08
---------------------------------------------------------------------------------------------------


3557it [4:05:25,  3.04s/it]

objective/kl: -22.469318389892578
ppo/returns/mean: 0.47173836827278137
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3558it [4:05:28,  3.02s/it]

objective/kl: -17.006689071655273
ppo/returns/mean: 0.4132552444934845
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3559it [4:05:32,  3.10s/it]

objective/kl: -17.75446891784668
ppo/returns/mean: 0.42111459374427795
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3560it [4:05:35,  3.08s/it]

objective/kl: -13.146158218383789
ppo/returns/mean: 0.35048094391822815
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3561it [4:05:38,  3.17s/it]

objective/kl: -11.166332244873047
ppo/returns/mean: 0.3183242082595825
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3562it [4:05:41,  3.14s/it]

objective/kl: -11.91115951538086
ppo/returns/mean: 0.3114885687828064
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3563it [4:05:45,  3.35s/it]

objective/kl: -11.08676528930664
ppo/returns/mean: 0.29856160283088684
ppo/policy/advantages_mean: -1.6298145055770874e-08
---------------------------------------------------------------------------------------------------


3564it [4:05:49,  3.45s/it]

objective/kl: -15.824307441711426
ppo/returns/mean: 0.3443794250488281
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3565it [4:05:52,  3.39s/it]

objective/kl: -13.034480094909668
ppo/returns/mean: 0.314456582069397
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3566it [4:05:55,  3.40s/it]

objective/kl: -11.31997299194336
ppo/returns/mean: 0.2784927189350128
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3567it [4:05:59,  3.33s/it]

objective/kl: -15.920735359191895
ppo/returns/mean: 0.35892271995544434
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3568it [4:06:02,  3.32s/it]

objective/kl: -13.453882217407227
ppo/returns/mean: 0.31508469581604004
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3569it [4:06:05,  3.24s/it]

objective/kl: -13.986421585083008
ppo/returns/mean: 0.3046771287918091
ppo/policy/advantages_mean: 1.210719347000122e-08
---------------------------------------------------------------------------------------------------


3570it [4:06:08,  3.23s/it]

objective/kl: -20.85598373413086
ppo/returns/mean: 0.37969261407852173
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


3571it [4:06:12,  3.50s/it]

objective/kl: -19.058727264404297
ppo/returns/mean: 0.3558172583580017
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3572it [4:06:15,  3.36s/it]

objective/kl: -16.862947463989258
ppo/returns/mean: 0.33263784646987915
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3573it [4:06:18,  3.26s/it]

objective/kl: -15.447378158569336
ppo/returns/mean: 0.3311174809932709
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


3574it [4:06:21,  3.18s/it]

objective/kl: -14.402910232543945
ppo/returns/mean: 0.31411832571029663
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3575it [4:06:24,  3.15s/it]

objective/kl: -18.52241325378418
ppo/returns/mean: 0.3780663311481476
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


3576it [4:06:28,  3.26s/it]

objective/kl: -11.674704551696777
ppo/returns/mean: 0.26519501209259033
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3577it [4:06:31,  3.32s/it]

objective/kl: -13.514382362365723
ppo/returns/mean: 0.30485910177230835
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3578it [4:06:34,  3.25s/it]

objective/kl: -11.688117980957031
ppo/returns/mean: 0.2860802114009857
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3579it [4:06:37,  3.20s/it]

objective/kl: -8.046199798583984
ppo/returns/mean: 0.22531676292419434
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3580it [4:06:41,  3.15s/it]

objective/kl: -8.541465759277344
ppo/returns/mean: 0.23133084177970886
ppo/policy/advantages_mean: -1.955777406692505e-08
---------------------------------------------------------------------------------------------------


3581it [4:06:44,  3.11s/it]

objective/kl: -5.286355018615723
ppo/returns/mean: 0.1862676739692688
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3582it [4:06:47,  3.09s/it]

objective/kl: -2.4992618560791016
ppo/returns/mean: 0.13357388973236084
ppo/policy/advantages_mean: -1.210719347000122e-08
---------------------------------------------------------------------------------------------------


3583it [4:06:50,  3.09s/it]

objective/kl: -5.713938236236572
ppo/returns/mean: 0.18139216303825378
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3584it [4:06:53,  3.07s/it]

objective/kl: -3.616631031036377
ppo/returns/mean: 0.1386502981185913
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3585it [4:06:56,  3.05s/it]

objective/kl: -0.35732945799827576
ppo/returns/mean: 0.0886523574590683
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3586it [4:06:59,  3.05s/it]

objective/kl: -0.8515216112136841
ppo/returns/mean: 0.09792481362819672
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3587it [4:07:02,  3.07s/it]

objective/kl: -4.695209503173828
ppo/returns/mean: 0.16314904391765594
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3588it [4:07:05,  3.05s/it]

objective/kl: -0.3203706741333008
ppo/returns/mean: 0.09629921615123749
ppo/policy/advantages_mean: 1.210719347000122e-08
---------------------------------------------------------------------------------------------------


3589it [4:07:08,  3.09s/it]

objective/kl: -6.204878807067871
ppo/returns/mean: 0.15357007086277008
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3590it [4:07:11,  3.07s/it]

objective/kl: -6.351589679718018
ppo/returns/mean: 0.1632373332977295
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3591it [4:07:14,  3.06s/it]

objective/kl: -4.986078262329102
ppo/returns/mean: 0.14041079580783844
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3592it [4:07:17,  3.06s/it]

objective/kl: -5.143918991088867
ppo/returns/mean: 0.1459779441356659
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3593it [4:07:20,  3.05s/it]

objective/kl: -13.095515251159668
ppo/returns/mean: 0.2865961492061615
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3594it [4:07:23,  3.08s/it]

objective/kl: -21.82111167907715
ppo/returns/mean: 0.4425443410873413
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3595it [4:07:26,  3.06s/it]

objective/kl: -19.079448699951172
ppo/returns/mean: 0.4037206768989563
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3596it [4:07:29,  3.05s/it]

objective/kl: -21.03616714477539
ppo/returns/mean: 0.4266948103904724
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3597it [4:07:32,  3.04s/it]

objective/kl: -15.95366096496582
ppo/returns/mean: 0.3439076542854309
ppo/policy/advantages_mean: -3.5390257835388184e-08
---------------------------------------------------------------------------------------------------


3598it [4:07:35,  3.04s/it]

objective/kl: -17.489023208618164
ppo/returns/mean: 0.38408851623535156
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3599it [4:07:38,  3.03s/it]

objective/kl: -12.10485553741455
ppo/returns/mean: 0.2953845262527466
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3600it [4:07:42,  3.04s/it]

objective/kl: -12.068984985351562
ppo/returns/mean: 0.29121673107147217
ppo/policy/advantages_mean: -2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


3601it [4:07:45,  3.04s/it]

objective/kl: -11.45686149597168
ppo/returns/mean: 0.2941700518131256
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3602it [4:07:48,  3.04s/it]

objective/kl: -10.361858367919922
ppo/returns/mean: 0.26496195793151855
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3603it [4:07:51,  3.03s/it]

objective/kl: -8.632718086242676
ppo/returns/mean: 0.2337929755449295
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3604it [4:07:54,  3.06s/it]

objective/kl: -11.02420711517334
ppo/returns/mean: 0.27402806282043457
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3605it [4:07:57,  3.16s/it]

objective/kl: -6.6460466384887695
ppo/returns/mean: 0.20160752534866333
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3606it [4:08:00,  3.11s/it]

objective/kl: -7.002841949462891
ppo/returns/mean: 0.2042241394519806
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3607it [4:08:03,  3.07s/it]

objective/kl: -4.492053031921387
ppo/returns/mean: 0.1571420431137085
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


3608it [4:08:06,  3.05s/it]

objective/kl: -3.36421537399292
ppo/returns/mean: 0.14893606305122375
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


3609it [4:08:09,  3.08s/it]

objective/kl: -4.5713677406311035
ppo/returns/mean: 0.15759249031543732
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3610it [4:08:12,  3.08s/it]

objective/kl: -4.092239856719971
ppo/returns/mean: 0.1616123616695404
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3611it [4:08:15,  3.06s/it]

objective/kl: -1.1711710691452026
ppo/returns/mean: 0.10711657255887985
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3612it [4:08:18,  3.04s/it]

objective/kl: -2.5859994888305664
ppo/returns/mean: 0.13380712270736694
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3613it [4:08:22,  3.09s/it]

objective/kl: -0.11591506004333496
ppo/returns/mean: 0.08616195619106293
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3614it [4:08:25,  3.06s/it]

objective/kl: -1.3292638063430786
ppo/returns/mean: 0.11062036454677582
ppo/policy/advantages_mean: -3.166496753692627e-08
---------------------------------------------------------------------------------------------------


3615it [4:08:28,  3.15s/it]

objective/kl: -1.525050401687622
ppo/returns/mean: 0.10880349576473236
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3616it [4:08:31,  3.11s/it]

objective/kl: -0.5737839937210083
ppo/returns/mean: 0.08916620910167694
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3617it [4:08:34,  3.07s/it]

objective/kl: 1.8180543184280396
ppo/returns/mean: 0.03315528854727745
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3618it [4:08:37,  3.04s/it]

objective/kl: 2.928126335144043
ppo/returns/mean: 0.02037486620247364
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3619it [4:08:40,  3.03s/it]

objective/kl: 1.7873609066009521
ppo/returns/mean: 0.032656993716955185
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3620it [4:08:43,  3.09s/it]

objective/kl: 4.881799221038818
ppo/returns/mean: -0.02234555035829544
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


3621it [4:08:46,  3.06s/it]

objective/kl: 1.734871745109558
ppo/returns/mean: 0.028425373136997223
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3622it [4:08:49,  3.04s/it]

objective/kl: 4.759977340698242
ppo/returns/mean: -0.026274077594280243
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3623it [4:08:52,  3.03s/it]

objective/kl: 4.012351989746094
ppo/returns/mean: -0.031505174934864044
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3624it [4:08:55,  3.01s/it]

objective/kl: 2.9205970764160156
ppo/returns/mean: -0.0349603146314621
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3625it [4:08:58,  3.00s/it]

objective/kl: 3.8381006717681885
ppo/returns/mean: -0.038985006511211395
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3626it [4:09:01,  3.00s/it]

objective/kl: 1.1194887161254883
ppo/returns/mean: -0.021909136325120926
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3627it [4:09:04,  3.08s/it]

objective/kl: 1.7877106666564941
ppo/returns/mean: 0.0019437074661254883
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3628it [4:09:07,  3.06s/it]

objective/kl: 2.356034755706787
ppo/returns/mean: -0.0336722806096077
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3629it [4:09:10,  3.03s/it]

objective/kl: -0.2240256369113922
ppo/returns/mean: 0.017487064003944397
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3630it [4:09:13,  3.02s/it]

objective/kl: -5.8283281326293945
ppo/returns/mean: 0.11575253307819366
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3631it [4:09:16,  3.09s/it]

objective/kl: -3.851104259490967
ppo/returns/mean: 0.08594454824924469
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


3632it [4:09:20,  3.11s/it]

objective/kl: -5.245464324951172
ppo/returns/mean: 0.12855327129364014
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


3633it [4:09:23,  3.31s/it]

objective/kl: -4.072687149047852
ppo/returns/mean: 0.11265186965465546
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3634it [4:09:27,  3.31s/it]

objective/kl: -6.174996376037598
ppo/returns/mean: 0.14688405394554138
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3635it [4:09:30,  3.37s/it]

objective/kl: -4.9732441902160645
ppo/returns/mean: 0.1322077363729477
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3636it [4:09:33,  3.31s/it]

objective/kl: -3.185911178588867
ppo/returns/mean: 0.10910610109567642
ppo/policy/advantages_mean: -8.381903171539307e-09
---------------------------------------------------------------------------------------------------


3637it [4:09:36,  3.22s/it]

objective/kl: -1.9496415853500366
ppo/returns/mean: 0.09205539524555206
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


3638it [4:09:39,  3.16s/it]

objective/kl: -4.154027938842773
ppo/returns/mean: 0.11237668991088867
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3639it [4:09:43,  3.17s/it]

objective/kl: -3.742393732070923
ppo/returns/mean: 0.11233653128147125
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3640it [4:09:46,  3.13s/it]

objective/kl: -7.410442352294922
ppo/returns/mean: 0.19222551584243774
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3641it [4:09:49,  3.10s/it]

objective/kl: -5.695747375488281
ppo/returns/mean: 0.16585685312747955
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3642it [4:09:52,  3.09s/it]

objective/kl: -5.345869064331055
ppo/returns/mean: 0.1556239128112793
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3643it [4:09:55,  3.27s/it]

objective/kl: -2.066600799560547
ppo/returns/mean: 0.11341274529695511
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3644it [4:09:59,  3.24s/it]

objective/kl: -2.6933369636535645
ppo/returns/mean: 0.11005760729312897
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3645it [4:10:02,  3.18s/it]

objective/kl: -8.081411361694336
ppo/returns/mean: 0.1746540516614914
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3646it [4:10:05,  3.13s/it]

objective/kl: -6.862617492675781
ppo/returns/mean: 0.14939777553081512
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3647it [4:10:08,  3.12s/it]

objective/kl: -3.515336275100708
ppo/returns/mean: 0.1123485416173935
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3648it [4:10:11,  3.10s/it]

objective/kl: -9.964202880859375
ppo/returns/mean: 0.18592101335525513
ppo/policy/advantages_mean: 4.0978193283081055e-08
---------------------------------------------------------------------------------------------------


3649it [4:10:14,  3.08s/it]

objective/kl: -16.618236541748047
ppo/returns/mean: 0.26644837856292725
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3650it [4:10:17,  3.08s/it]

objective/kl: -16.679277420043945
ppo/returns/mean: 0.2767640948295593
ppo/policy/advantages_mean: 2.7939677238464355e-08
---------------------------------------------------------------------------------------------------


3651it [4:10:20,  3.06s/it]

objective/kl: -15.294210433959961
ppo/returns/mean: 0.2634631395339966
ppo/policy/advantages_mean: -2.1420419216156006e-08
---------------------------------------------------------------------------------------------------


3652it [4:10:23,  3.10s/it]

objective/kl: -14.855566024780273
ppo/returns/mean: 0.2654830515384674
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3653it [4:10:26,  3.09s/it]

objective/kl: -14.763383865356445
ppo/returns/mean: 0.257276326417923
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3654it [4:10:29,  3.08s/it]

objective/kl: -12.656457901000977
ppo/returns/mean: 0.2355484813451767
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3655it [4:10:32,  3.06s/it]

objective/kl: -12.954448699951172
ppo/returns/mean: 0.244111105799675
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3656it [4:10:35,  3.06s/it]

objective/kl: -10.892866134643555
ppo/returns/mean: 0.21419453620910645
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3657it [4:10:38,  3.06s/it]

objective/kl: -10.055012702941895
ppo/returns/mean: 0.20664450526237488
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3658it [4:10:41,  3.05s/it]

objective/kl: -7.816063404083252
ppo/returns/mean: 0.17829692363739014
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3659it [4:10:44,  3.04s/it]

objective/kl: -6.6598310470581055
ppo/returns/mean: 0.1692715883255005
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3660it [4:10:48,  3.07s/it]

objective/kl: -5.8863630294799805
ppo/returns/mean: 0.14430320262908936
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3661it [4:10:51,  3.05s/it]

objective/kl: -4.674827575683594
ppo/returns/mean: 0.11418852210044861
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3662it [4:10:54,  3.11s/it]

objective/kl: -4.019731044769287
ppo/returns/mean: 0.10881602764129639
ppo/policy/advantages_mean: -3.259629011154175e-08
---------------------------------------------------------------------------------------------------


3663it [4:10:57,  3.10s/it]

objective/kl: -3.026510000228882
ppo/returns/mean: 0.09344730526208878
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3664it [4:11:00,  3.08s/it]

objective/kl: -1.9827882051467896
ppo/returns/mean: 0.08823215961456299
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3665it [4:11:03,  3.15s/it]

objective/kl: 1.0803247690200806
ppo/returns/mean: 0.04379189759492874
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3666it [4:11:06,  3.11s/it]

objective/kl: -1.1153907775878906
ppo/returns/mean: 0.06890866905450821
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3667it [4:11:09,  3.09s/it]

objective/kl: 0.32691091299057007
ppo/returns/mean: 0.04074463993310928
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3668it [4:11:12,  3.07s/it]

objective/kl: 1.4930663108825684
ppo/returns/mean: 0.030461512506008148
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3669it [4:11:15,  3.08s/it]

objective/kl: 1.5817878246307373
ppo/returns/mean: 0.019554883241653442
ppo/policy/advantages_mean: 2.2817403078079224e-08
---------------------------------------------------------------------------------------------------


3670it [4:11:18,  3.06s/it]

objective/kl: 2.2859811782836914
ppo/returns/mean: 0.0065547022968530655
ppo/policy/advantages_mean: -1.210719347000122e-08
---------------------------------------------------------------------------------------------------


3671it [4:11:22,  3.06s/it]

objective/kl: 2.8401966094970703
ppo/returns/mean: 0.0026416387408971786
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3672it [4:11:25,  3.06s/it]

objective/kl: 2.67520809173584
ppo/returns/mean: -0.0036105834878981113
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3673it [4:11:28,  3.05s/it]

objective/kl: -3.2073373794555664
ppo/returns/mean: 0.0731598436832428
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


3674it [4:11:31,  3.05s/it]

objective/kl: -3.553556442260742
ppo/returns/mean: 0.07713583111763
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3675it [4:11:34,  3.05s/it]

objective/kl: -3.257260799407959
ppo/returns/mean: 0.07607005536556244
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3676it [4:11:37,  3.04s/it]

objective/kl: -3.8515539169311523
ppo/returns/mean: 0.08995506167411804
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3677it [4:11:40,  3.04s/it]

objective/kl: -3.89361834526062
ppo/returns/mean: 0.09713076055049896
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3678it [4:11:43,  3.03s/it]

objective/kl: -2.9047317504882812
ppo/returns/mean: 0.08092768490314484
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3679it [4:11:46,  3.03s/it]

objective/kl: -2.2751333713531494
ppo/returns/mean: 0.07623912394046783
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3680it [4:11:49,  3.10s/it]

objective/kl: -3.523029088973999
ppo/returns/mean: 0.07755935192108154
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3681it [4:11:52,  3.09s/it]

objective/kl: -3.3874545097351074
ppo/returns/mean: 0.0750066339969635
ppo/policy/advantages_mean: -6.51925802230835e-09
---------------------------------------------------------------------------------------------------


3682it [4:11:55,  3.07s/it]

objective/kl: -3.855278730392456
ppo/returns/mean: 0.08891875296831131
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3683it [4:11:58,  3.06s/it]

objective/kl: -3.6738638877868652
ppo/returns/mean: 0.08791732788085938
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3684it [4:12:01,  3.06s/it]

objective/kl: -2.4225032329559326
ppo/returns/mean: 0.06623529642820358
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3685it [4:12:04,  3.05s/it]

objective/kl: -1.3730525970458984
ppo/returns/mean: 0.05405560880899429
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3686it [4:12:07,  3.06s/it]

objective/kl: -0.7569001913070679
ppo/returns/mean: 0.04169987514615059
ppo/policy/advantages_mean: 3.166496753692627e-08
---------------------------------------------------------------------------------------------------


3687it [4:12:10,  3.05s/it]

objective/kl: -0.30047619342803955
ppo/returns/mean: 0.03078758344054222
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3688it [4:12:13,  3.05s/it]

objective/kl: 1.773481845855713
ppo/returns/mean: 0.007238475140184164
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3689it [4:12:17,  3.06s/it]

objective/kl: -0.17541640996932983
ppo/returns/mean: 0.03168194741010666
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3690it [4:12:20,  3.04s/it]

objective/kl: 1.319982647895813
ppo/returns/mean: 0.019177231937646866
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3691it [4:12:23,  3.05s/it]

objective/kl: -2.0357882976531982
ppo/returns/mean: 0.044388968497514725
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3692it [4:12:26,  3.04s/it]

objective/kl: 1.149820327758789
ppo/returns/mean: 0.009968209080398083
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3693it [4:12:29,  3.05s/it]

objective/kl: 1.3769264221191406
ppo/returns/mean: 0.008852461352944374
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3694it [4:12:32,  3.04s/it]

objective/kl: -0.9335035085678101
ppo/returns/mean: 0.037636298686265945
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3695it [4:12:35,  3.12s/it]

objective/kl: -3.7060940265655518
ppo/returns/mean: 0.064228355884552
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3696it [4:12:38,  3.10s/it]

objective/kl: -3.802732467651367
ppo/returns/mean: 0.06331640481948853
ppo/policy/advantages_mean: 1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


3697it [4:12:41,  3.09s/it]

objective/kl: -3.087533712387085
ppo/returns/mean: 0.05765338987112045
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3698it [4:12:44,  3.07s/it]

objective/kl: -1.3344465494155884
ppo/returns/mean: 0.04091302305459976
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3699it [4:12:47,  3.07s/it]

objective/kl: -0.3891284465789795
ppo/returns/mean: 0.031090036034584045
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3700it [4:12:51,  3.15s/it]

objective/kl: -0.9392893314361572
ppo/returns/mean: 0.03197493776679039
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


3701it [4:12:54,  3.15s/it]

objective/kl: -1.5887348651885986
ppo/returns/mean: 0.03500765562057495
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3702it [4:12:57,  3.17s/it]

objective/kl: -1.5946768522262573
ppo/returns/mean: 0.03171692043542862
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3703it [4:13:00,  3.13s/it]

objective/kl: -0.3953900635242462
ppo/returns/mean: 0.020081300288438797
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3704it [4:13:03,  3.11s/it]

objective/kl: 1.80265212059021
ppo/returns/mean: -0.0011445148847997189
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3705it [4:13:06,  3.08s/it]

objective/kl: 2.4082963466644287
ppo/returns/mean: -0.009348929859697819
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3706it [4:13:09,  3.09s/it]

objective/kl: -0.7179854512214661
ppo/returns/mean: 0.012366359122097492
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3707it [4:13:12,  3.13s/it]

objective/kl: -3.4061484336853027
ppo/returns/mean: 0.05051887407898903
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3708it [4:13:16,  3.19s/it]

objective/kl: -5.706776142120361
ppo/returns/mean: 0.094392329454422
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3709it [4:13:19,  3.17s/it]

objective/kl: -2.4631290435791016
ppo/returns/mean: 0.04644645005464554
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3710it [4:13:22,  3.12s/it]

objective/kl: -5.705299377441406
ppo/returns/mean: 0.09647015482187271
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3711it [4:13:25,  3.11s/it]

objective/kl: -28.6336669921875
ppo/returns/mean: 0.4167248606681824
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3712it [4:13:28,  3.10s/it]

objective/kl: -22.582876205444336
ppo/returns/mean: 0.3689268231391907
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3713it [4:13:31,  3.08s/it]

objective/kl: -25.3557186126709
ppo/returns/mean: 0.4409818649291992
ppo/policy/advantages_mean: -9.313225746154785e-10
---------------------------------------------------------------------------------------------------


3714it [4:13:34,  3.07s/it]

objective/kl: -18.957515716552734
ppo/returns/mean: 0.3516780138015747
ppo/policy/advantages_mean: 2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


3715it [4:13:37,  3.07s/it]

objective/kl: -26.204818725585938
ppo/returns/mean: 0.4461579918861389
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3716it [4:13:40,  3.06s/it]

objective/kl: -34.86597442626953
ppo/returns/mean: 0.5372090339660645
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3717it [4:13:43,  3.05s/it]

objective/kl: -36.31565856933594
ppo/returns/mean: 0.5567762851715088
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3718it [4:13:46,  3.05s/it]

objective/kl: -33.813926696777344
ppo/returns/mean: 0.5437784194946289
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


3719it [4:13:50,  3.29s/it]

objective/kl: -29.7858829498291
ppo/returns/mean: 0.4856142997741699
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3720it [4:13:53,  3.22s/it]

objective/kl: -26.92603302001953
ppo/returns/mean: 0.45664703845977783
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3721it [4:13:57,  3.29s/it]

objective/kl: -21.312896728515625
ppo/returns/mean: 0.36659523844718933
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3722it [4:14:00,  3.21s/it]

objective/kl: -21.255817413330078
ppo/returns/mean: 0.3787849247455597
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3723it [4:14:03,  3.30s/it]

objective/kl: -18.98480224609375
ppo/returns/mean: 0.3533713221549988
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


3724it [4:14:06,  3.21s/it]

objective/kl: -18.561079025268555
ppo/returns/mean: 0.3417428433895111
ppo/policy/advantages_mean: -4.7497451305389404e-08
---------------------------------------------------------------------------------------------------


3725it [4:14:09,  3.19s/it]

objective/kl: -14.067435264587402
ppo/returns/mean: 0.29727476835250854
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3726it [4:14:13,  3.27s/it]

objective/kl: -14.267337799072266
ppo/returns/mean: 0.2929565906524658
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3727it [4:14:17,  3.42s/it]

objective/kl: -14.471419334411621
ppo/returns/mean: 0.30077266693115234
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3728it [4:14:20,  3.32s/it]

objective/kl: -15.58824634552002
ppo/returns/mean: 0.3182743787765503
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3729it [4:14:23,  3.24s/it]

objective/kl: -14.493220329284668
ppo/returns/mean: 0.30427277088165283
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3730it [4:14:26,  3.35s/it]

objective/kl: -14.445953369140625
ppo/returns/mean: 0.3094173073768616
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3731it [4:14:29,  3.25s/it]

objective/kl: -10.49608039855957
ppo/returns/mean: 0.2647116780281067
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3732it [4:14:33,  3.26s/it]

objective/kl: -12.788076400756836
ppo/returns/mean: 0.29432469606399536
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3733it [4:14:36,  3.19s/it]

objective/kl: -11.26833438873291
ppo/returns/mean: 0.2627181112766266
ppo/policy/advantages_mean: 2.421438694000244e-08
---------------------------------------------------------------------------------------------------


3734it [4:14:39,  3.14s/it]

objective/kl: -10.449078559875488
ppo/returns/mean: 0.2565847635269165
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3735it [4:14:42,  3.17s/it]

objective/kl: -9.615314483642578
ppo/returns/mean: 0.2514418363571167
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3736it [4:14:45,  3.14s/it]

objective/kl: -10.20467758178711
ppo/returns/mean: 0.24125787615776062
ppo/policy/advantages_mean: 6.51925802230835e-09
---------------------------------------------------------------------------------------------------


3737it [4:14:48,  3.23s/it]

objective/kl: -9.545500755310059
ppo/returns/mean: 0.24152860045433044
ppo/policy/advantages_mean: -3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


3738it [4:14:51,  3.18s/it]

objective/kl: -6.461857795715332
ppo/returns/mean: 0.20417362451553345
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3739it [4:14:54,  3.13s/it]

objective/kl: -8.131670951843262
ppo/returns/mean: 0.22362741827964783
ppo/policy/advantages_mean: -4.0978193283081055e-08
---------------------------------------------------------------------------------------------------


3740it [4:14:58,  3.11s/it]

objective/kl: -5.3153557777404785
ppo/returns/mean: 0.1790618598461151
ppo/policy/advantages_mean: -3.725290298461914e-08
---------------------------------------------------------------------------------------------------


3741it [4:15:01,  3.09s/it]

objective/kl: -5.856290817260742
ppo/returns/mean: 0.18257099390029907
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3742it [4:15:04,  3.21s/it]

objective/kl: -7.85575008392334
ppo/returns/mean: 0.2074684053659439
ppo/policy/advantages_mean: 3.91155481338501e-08
---------------------------------------------------------------------------------------------------


3743it [4:15:08,  3.31s/it]

objective/kl: -7.504660129547119
ppo/returns/mean: 0.19892160594463348
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3744it [4:15:11,  3.22s/it]

objective/kl: -7.067564964294434
ppo/returns/mean: 0.20362374186515808
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3745it [4:15:14,  3.16s/it]

objective/kl: -2.443066120147705
ppo/returns/mean: 0.1384480595588684
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3746it [4:15:17,  3.13s/it]

objective/kl: -1.6548302173614502
ppo/returns/mean: 0.12326554954051971
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3747it [4:15:20,  3.11s/it]

objective/kl: -3.9049415588378906
ppo/returns/mean: 0.15805207192897797
ppo/policy/advantages_mean: 3.725290298461914e-08
---------------------------------------------------------------------------------------------------


3748it [4:15:23,  3.09s/it]

objective/kl: -5.04434061050415
ppo/returns/mean: 0.15494567155838013
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3749it [4:15:26,  3.08s/it]

objective/kl: -2.3875980377197266
ppo/returns/mean: 0.12924346327781677
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3750it [4:15:29,  3.07s/it]

objective/kl: -2.0496230125427246
ppo/returns/mean: 0.11896364390850067
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3751it [4:15:32,  3.08s/it]

objective/kl: -3.8469865322113037
ppo/returns/mean: 0.14095255732536316
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3752it [4:15:35,  3.06s/it]

objective/kl: -2.8261799812316895
ppo/returns/mean: 0.1273263841867447
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3753it [4:15:38,  3.06s/it]

objective/kl: -0.8604687452316284
ppo/returns/mean: 0.09171734750270844
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3754it [4:15:41,  3.06s/it]

objective/kl: -2.6179873943328857
ppo/returns/mean: 0.1134331077337265
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3755it [4:15:44,  3.05s/it]

objective/kl: -1.6769647598266602
ppo/returns/mean: 0.10050560534000397
ppo/policy/advantages_mean: 4.0046870708465576e-08
---------------------------------------------------------------------------------------------------


3756it [4:15:47,  3.09s/it]

objective/kl: -0.806549072265625
ppo/returns/mean: 0.07892866432666779
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3757it [4:15:50,  3.08s/it]

objective/kl: -3.0969090461730957
ppo/returns/mean: 0.1174224242568016
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3758it [4:15:53,  3.07s/it]

objective/kl: -2.4857993125915527
ppo/returns/mean: 0.10916479676961899
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3759it [4:15:56,  3.06s/it]

objective/kl: -0.16637036204338074
ppo/returns/mean: 0.07284598797559738
ppo/policy/advantages_mean: 3.166496753692627e-08
---------------------------------------------------------------------------------------------------


3760it [4:16:00,  3.06s/it]

objective/kl: -1.074815273284912
ppo/returns/mean: 0.07905356585979462
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3761it [4:16:03,  3.07s/it]

objective/kl: 2.9559171199798584
ppo/returns/mean: 0.02320886217057705
ppo/policy/advantages_mean: 1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


3762it [4:16:06,  3.30s/it]

objective/kl: 1.6382700204849243
ppo/returns/mean: 0.04173283278942108
ppo/policy/advantages_mean: -4.842877388000488e-08
---------------------------------------------------------------------------------------------------


3763it [4:16:10,  3.27s/it]

objective/kl: 2.2420754432678223
ppo/returns/mean: 0.0207380298525095
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3764it [4:16:13,  3.23s/it]

objective/kl: 1.1136932373046875
ppo/returns/mean: 0.0413244403898716
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3765it [4:16:16,  3.18s/it]

objective/kl: 0.3807591199874878
ppo/returns/mean: 0.04319917783141136
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3766it [4:16:19,  3.16s/it]

objective/kl: 0.056140363216400146
ppo/returns/mean: 0.05263727158308029
ppo/policy/advantages_mean: -2.0489096641540527e-08
---------------------------------------------------------------------------------------------------


3767it [4:16:22,  3.12s/it]

objective/kl: 0.48001259565353394
ppo/returns/mean: 0.04368806630373001
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3768it [4:16:26,  3.43s/it]

objective/kl: 1.5195152759552002
ppo/returns/mean: 0.026678308844566345
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3769it [4:16:29,  3.32s/it]

objective/kl: -0.5346522927284241
ppo/returns/mean: 0.05696069821715355
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3770it [4:16:32,  3.25s/it]

objective/kl: 1.8691589832305908
ppo/returns/mean: 0.018942466005682945
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3771it [4:16:35,  3.19s/it]

objective/kl: 2.1962811946868896
ppo/returns/mean: 0.009817888028919697
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3772it [4:16:38,  3.14s/it]

objective/kl: 2.8527369499206543
ppo/returns/mean: 0.006284954957664013
ppo/policy/advantages_mean: -2.3283064365386963e-08
---------------------------------------------------------------------------------------------------


3773it [4:16:41,  3.11s/it]

objective/kl: 1.309549331665039
ppo/returns/mean: 0.01634158380329609
ppo/policy/advantages_mean: 9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3774it [4:16:45,  3.27s/it]

objective/kl: -7.465124607086182
ppo/returns/mean: 0.125206857919693
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3775it [4:16:48,  3.20s/it]

objective/kl: -4.52972936630249
ppo/returns/mean: 0.08752328902482986
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3776it [4:16:51,  3.24s/it]

objective/kl: -5.683577537536621
ppo/returns/mean: 0.11307114362716675
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3777it [4:16:54,  3.17s/it]

objective/kl: -3.880141258239746
ppo/returns/mean: 0.08963876217603683
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3778it [4:16:58,  3.14s/it]

objective/kl: -2.2150754928588867
ppo/returns/mean: 0.06386671215295792
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3779it [4:17:01,  3.19s/it]

objective/kl: -3.135998010635376
ppo/returns/mean: 0.07196707278490067
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3780it [4:17:04,  3.29s/it]

objective/kl: -0.6739367246627808
ppo/returns/mean: 0.03752284124493599
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3781it [4:17:07,  3.21s/it]

objective/kl: -3.671104907989502
ppo/returns/mean: 0.07890301942825317
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3782it [4:17:10,  3.16s/it]

objective/kl: -3.853114604949951
ppo/returns/mean: 0.06921018660068512
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3783it [4:17:14,  3.14s/it]

objective/kl: -0.9751176238059998
ppo/returns/mean: 0.03982194513082504
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3784it [4:17:17,  3.13s/it]

objective/kl: -2.9436323642730713
ppo/returns/mean: 0.05965810641646385
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3785it [4:17:20,  3.11s/it]

objective/kl: -1.8303375244140625
ppo/returns/mean: 0.0498456135392189
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3786it [4:17:23,  3.10s/it]

objective/kl: 1.6088396310806274
ppo/returns/mean: -0.0014856024645268917
ppo/policy/advantages_mean: 2.7008354663848877e-08
---------------------------------------------------------------------------------------------------


3787it [4:17:26,  3.08s/it]

objective/kl: 0.6616331934928894
ppo/returns/mean: 0.014305580407381058
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3788it [4:17:29,  3.08s/it]

objective/kl: 1.0846631526947021
ppo/returns/mean: 0.015188734978437424
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3789it [4:17:32,  3.06s/it]

objective/kl: -0.766923189163208
ppo/returns/mean: 0.03374756500124931
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3790it [4:17:35,  3.06s/it]

objective/kl: 0.10236537456512451
ppo/returns/mean: 0.0221799835562706
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3791it [4:17:38,  3.05s/it]

objective/kl: -1.9123742580413818
ppo/returns/mean: 0.04506690055131912
ppo/policy/advantages_mean: 2.421438694000244e-08
---------------------------------------------------------------------------------------------------


3792it [4:17:41,  3.05s/it]

objective/kl: -3.0656495094299316
ppo/returns/mean: 0.06413541734218597
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3793it [4:17:44,  3.09s/it]

objective/kl: -2.4077842235565186
ppo/returns/mean: 0.05993121862411499
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3794it [4:17:47,  3.07s/it]

objective/kl: -1.1186416149139404
ppo/returns/mean: 0.037216875702142715
ppo/policy/advantages_mean: 2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


3795it [4:17:50,  3.06s/it]

objective/kl: -1.2220004796981812
ppo/returns/mean: 0.04184206947684288
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3796it [4:17:53,  3.07s/it]

objective/kl: -0.32083964347839355
ppo/returns/mean: 0.02699417434632778
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3797it [4:17:57,  3.16s/it]

objective/kl: 2.1391730308532715
ppo/returns/mean: -0.0043423655442893505
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3798it [4:18:00,  3.13s/it]

objective/kl: -1.8320939540863037
ppo/returns/mean: 0.0428430512547493
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3799it [4:18:03,  3.24s/it]

objective/kl: -2.167788028717041
ppo/returns/mean: 0.04942123964428902
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3800it [4:18:07,  3.36s/it]

objective/kl: -4.665720462799072
ppo/returns/mean: 0.08683797717094421
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3801it [4:18:10,  3.27s/it]

objective/kl: -5.587112903594971
ppo/returns/mean: 0.1043723076581955
ppo/policy/advantages_mean: -9.778887033462524e-09
---------------------------------------------------------------------------------------------------


3802it [4:18:14,  3.42s/it]

objective/kl: -0.9751490354537964
ppo/returns/mean: 0.028790250420570374
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3803it [4:18:17,  3.29s/it]

objective/kl: -2.803929090499878
ppo/returns/mean: 0.05896317958831787
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3804it [4:18:20,  3.21s/it]

objective/kl: -3.0541138648986816
ppo/returns/mean: 0.06299092620611191
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3805it [4:18:23,  3.15s/it]

objective/kl: -0.7889688611030579
ppo/returns/mean: 0.03410692512989044
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3806it [4:18:26,  3.10s/it]

objective/kl: -0.6604411005973816
ppo/returns/mean: 0.032305922359228134
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3807it [4:18:30,  3.30s/it]

objective/kl: -0.4658743739128113
ppo/returns/mean: 0.026653563603758812
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3808it [4:18:33,  3.21s/it]

objective/kl: -1.9092581272125244
ppo/returns/mean: 0.04311326518654823
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3809it [4:18:36,  3.16s/it]

objective/kl: -0.37931540608406067
ppo/returns/mean: 0.016611691564321518
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3810it [4:18:39,  3.15s/it]

objective/kl: -2.0337460041046143
ppo/returns/mean: 0.042679522186517715
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3811it [4:18:42,  3.12s/it]

objective/kl: -2.224059581756592
ppo/returns/mean: 0.04580392315983772
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3812it [4:18:45,  3.15s/it]

objective/kl: -1.0295445919036865
ppo/returns/mean: 0.03486606106162071
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3813it [4:18:48,  3.11s/it]

objective/kl: -2.417009115219116
ppo/returns/mean: 0.046343542635440826
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3814it [4:18:52,  3.29s/it]

objective/kl: -3.2613441944122314
ppo/returns/mean: 0.06103619188070297
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3815it [4:18:55,  3.26s/it]

objective/kl: -2.9011058807373047
ppo/returns/mean: 0.05655134841799736
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3816it [4:18:59,  3.49s/it]

objective/kl: -0.27743035554885864
ppo/returns/mean: 0.02922707237303257
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3817it [4:19:02,  3.40s/it]

objective/kl: -1.95962655544281
ppo/returns/mean: 0.05160176381468773
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3818it [4:19:05,  3.37s/it]

objective/kl: -2.6701483726501465
ppo/returns/mean: 0.055537935346364975
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


3819it [4:19:08,  3.28s/it]

objective/kl: -0.4541206955909729
ppo/returns/mean: 0.02143053337931633
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3820it [4:19:12,  3.20s/it]

objective/kl: -1.5005003213882446
ppo/returns/mean: 0.0393950417637825
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3821it [4:19:15,  3.25s/it]

objective/kl: -0.6084421873092651
ppo/returns/mean: 0.028823092579841614
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3822it [4:19:18,  3.34s/it]

objective/kl: -1.2165915966033936
ppo/returns/mean: 0.037381336092948914
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3823it [4:19:21,  3.25s/it]

objective/kl: -3.814113140106201
ppo/returns/mean: 0.059274330735206604
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3824it [4:19:25,  3.19s/it]

objective/kl: -4.146255016326904
ppo/returns/mean: 0.07370784878730774
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3825it [4:19:28,  3.16s/it]

objective/kl: -2.1087160110473633
ppo/returns/mean: 0.04301043599843979
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3826it [4:19:31,  3.12s/it]

objective/kl: -3.8791301250457764
ppo/returns/mean: 0.0662987232208252
ppo/policy/advantages_mean: 2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3827it [4:19:34,  3.18s/it]

objective/kl: -5.045482635498047
ppo/returns/mean: 0.060181256383657455
ppo/policy/advantages_mean: 1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3828it [4:19:37,  3.17s/it]

objective/kl: -4.937496185302734
ppo/returns/mean: 0.0663137435913086
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3829it [4:19:40,  3.16s/it]

objective/kl: -4.68544340133667
ppo/returns/mean: 0.06323805451393127
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3830it [4:19:43,  3.12s/it]

objective/kl: -7.9706573486328125
ppo/returns/mean: 0.11084200441837311
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3831it [4:19:46,  3.09s/it]

objective/kl: -17.27782440185547
ppo/returns/mean: 0.23054073750972748
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3832it [4:19:49,  3.09s/it]

objective/kl: -11.45022201538086
ppo/returns/mean: 0.1768864393234253
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3833it [4:19:52,  3.07s/it]

objective/kl: -11.137413024902344
ppo/returns/mean: 0.1672961711883545
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3834it [4:19:55,  3.06s/it]

objective/kl: -10.333473205566406
ppo/returns/mean: 0.1651582568883896
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3835it [4:19:58,  3.06s/it]

objective/kl: -13.255339622497559
ppo/returns/mean: 0.20081406831741333
ppo/policy/advantages_mean: 2.1420419216156006e-08
---------------------------------------------------------------------------------------------------


3836it [4:20:02,  3.25s/it]

objective/kl: -10.055354118347168
ppo/returns/mean: 0.15701323747634888
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3837it [4:20:06,  3.40s/it]

objective/kl: -12.36716079711914
ppo/returns/mean: 0.18401844799518585
ppo/policy/advantages_mean: -1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3838it [4:20:09,  3.29s/it]

objective/kl: -11.809228897094727
ppo/returns/mean: 0.19655287265777588
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3839it [4:20:12,  3.22s/it]

objective/kl: -7.38771915435791
ppo/returns/mean: 0.14053039252758026
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3840it [4:20:15,  3.16s/it]

objective/kl: -5.897004127502441
ppo/returns/mean: 0.1268807053565979
ppo/policy/advantages_mean: -2.3283064365386963e-09
---------------------------------------------------------------------------------------------------


3841it [4:20:18,  3.12s/it]

objective/kl: -7.408135414123535
ppo/returns/mean: 0.14805501699447632
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3842it [4:20:21,  3.09s/it]

objective/kl: -6.4435133934021
ppo/returns/mean: 0.13748763501644135
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3843it [4:20:24,  3.16s/it]

objective/kl: -5.710755825042725
ppo/returns/mean: 0.13007016479969025
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3844it [4:20:27,  3.11s/it]

objective/kl: -4.926183223724365
ppo/returns/mean: 0.10721534490585327
ppo/policy/advantages_mean: -2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


3845it [4:20:31,  3.14s/it]

objective/kl: -7.044486999511719
ppo/returns/mean: 0.13617053627967834
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3846it [4:20:34,  3.11s/it]

objective/kl: -3.611290216445923
ppo/returns/mean: 0.09693937003612518
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3847it [4:20:37,  3.09s/it]

objective/kl: -8.125346183776855
ppo/returns/mean: 0.165075421333313
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3848it [4:20:40,  3.07s/it]

objective/kl: -7.702423095703125
ppo/returns/mean: 0.1648179441690445
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3849it [4:20:43,  3.06s/it]

objective/kl: -5.19052791595459
ppo/returns/mean: 0.13530874252319336
ppo/policy/advantages_mean: 1.5366822481155396e-08
---------------------------------------------------------------------------------------------------


3850it [4:20:46,  3.06s/it]

objective/kl: -6.306811332702637
ppo/returns/mean: 0.14294017851352692
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3851it [4:20:49,  3.07s/it]

objective/kl: -4.374914169311523
ppo/returns/mean: 0.11301448941230774
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3852it [4:20:52,  3.09s/it]

objective/kl: -8.367971420288086
ppo/returns/mean: 0.1649688184261322
ppo/policy/advantages_mean: -4.470348358154297e-08
---------------------------------------------------------------------------------------------------


3853it [4:20:55,  3.06s/it]

objective/kl: -6.384415149688721
ppo/returns/mean: 0.14175879955291748
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3854it [4:20:58,  3.03s/it]

objective/kl: -2.762650489807129
ppo/returns/mean: 0.089774489402771
ppo/policy/advantages_mean: 1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


3855it [4:21:01,  3.09s/it]

objective/kl: -2.8909168243408203
ppo/returns/mean: 0.09031720459461212
ppo/policy/advantages_mean: -2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3856it [4:21:04,  3.07s/it]

objective/kl: -2.5431764125823975
ppo/returns/mean: 0.08847306668758392
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3857it [4:21:07,  3.08s/it]

objective/kl: 0.8501768112182617
ppo/returns/mean: 0.037583090364933014
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3858it [4:21:10,  3.05s/it]

objective/kl: -0.370426744222641
ppo/returns/mean: 0.05381812900304794
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3859it [4:21:14,  3.23s/it]

objective/kl: 0.39950239658355713
ppo/returns/mean: 0.04790191352367401
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3860it [4:21:17,  3.16s/it]

objective/kl: -0.6516332626342773
ppo/returns/mean: 0.06065259873867035
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3861it [4:21:20,  3.12s/it]

objective/kl: -1.3315324783325195
ppo/returns/mean: 0.058608539402484894
ppo/policy/advantages_mean: -1.7695128917694092e-08
---------------------------------------------------------------------------------------------------


3862it [4:21:23,  3.11s/it]

objective/kl: -2.177441358566284
ppo/returns/mean: 0.06384439021348953
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3863it [4:21:26,  3.07s/it]

objective/kl: -3.161160945892334
ppo/returns/mean: 0.07514812052249908
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3864it [4:21:29,  3.09s/it]

objective/kl: -4.1935014724731445
ppo/returns/mean: 0.09127514809370041
ppo/policy/advantages_mean: -4.6566128730773926e-09
---------------------------------------------------------------------------------------------------


3865it [4:21:32,  3.11s/it]

objective/kl: -4.241933822631836
ppo/returns/mean: 0.08754654228687286
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3866it [4:21:36,  3.16s/it]

objective/kl: -7.612665176391602
ppo/returns/mean: 0.1406804323196411
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------


3867it [4:21:39,  3.11s/it]

objective/kl: -6.1572585105896
ppo/returns/mean: 0.11475999653339386
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3868it [4:21:42,  3.09s/it]

objective/kl: -2.831242084503174
ppo/returns/mean: 0.07750600576400757
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3869it [4:21:45,  3.07s/it]

objective/kl: -0.45153817534446716
ppo/returns/mean: 0.04660966247320175
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3870it [4:21:48,  3.04s/it]

objective/kl: 0.6503823399543762
ppo/returns/mean: 0.023057110607624054
ppo/policy/advantages_mean: -2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3871it [4:21:51,  3.03s/it]

objective/kl: -3.06441593170166
ppo/returns/mean: 0.07086549699306488
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3872it [4:21:54,  3.02s/it]

objective/kl: -4.345630645751953
ppo/returns/mean: 0.09601183980703354
ppo/policy/advantages_mean: -1.3969838619232178e-08
---------------------------------------------------------------------------------------------------


3873it [4:21:57,  3.13s/it]

objective/kl: -0.9905822277069092
ppo/returns/mean: 0.045361459255218506
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3874it [4:22:00,  3.09s/it]

objective/kl: 2.3120458126068115
ppo/returns/mean: -0.005219088867306709
ppo/policy/advantages_mean: -3.91155481338501e-08
---------------------------------------------------------------------------------------------------


3875it [4:22:03,  3.07s/it]

objective/kl: 1.1510608196258545
ppo/returns/mean: 0.0021293829195201397
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3876it [4:22:06,  3.08s/it]

objective/kl: -0.30271321535110474
ppo/returns/mean: 0.02253454364836216
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3877it [4:22:09,  3.05s/it]

objective/kl: -1.3742649555206299
ppo/returns/mean: 0.02283620648086071
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3878it [4:22:13,  3.40s/it]

objective/kl: 0.8298298120498657
ppo/returns/mean: -0.006545280106365681
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3879it [4:22:16,  3.29s/it]

objective/kl: -0.5224226713180542
ppo/returns/mean: 0.02295326068997383
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3880it [4:22:19,  3.20s/it]

objective/kl: -3.150285482406616
ppo/returns/mean: 0.0511491522192955
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3881it [4:22:23,  3.23s/it]

objective/kl: -0.8516307473182678
ppo/returns/mean: 0.025826402008533478
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3882it [4:22:26,  3.16s/it]

objective/kl: -5.10202693939209
ppo/returns/mean: 0.11934071779251099
ppo/policy/advantages_mean: 2.514570951461792e-08
---------------------------------------------------------------------------------------------------


3883it [4:22:29,  3.12s/it]

objective/kl: -6.958438396453857
ppo/returns/mean: 0.1525917947292328
ppo/policy/advantages_mean: 1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3884it [4:22:32,  3.09s/it]

objective/kl: -2.7567782402038574
ppo/returns/mean: 0.08622020483016968
ppo/policy/advantages_mean: -2.7939677238464355e-09
---------------------------------------------------------------------------------------------------


3885it [4:22:35,  3.05s/it]

objective/kl: -3.366722822189331
ppo/returns/mean: 0.09228488057851791
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3886it [4:22:38,  3.03s/it]

objective/kl: -2.7673182487487793
ppo/returns/mean: 0.09109912812709808
ppo/policy/advantages_mean: 5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3887it [4:22:41,  3.08s/it]

objective/kl: -2.5717618465423584
ppo/returns/mean: 0.09434936195611954
ppo/policy/advantages_mean: 1.7695128917694092e-08
---------------------------------------------------------------------------------------------------


3888it [4:22:44,  3.06s/it]

objective/kl: -0.24731576442718506
ppo/returns/mean: 0.057755015790462494
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3889it [4:22:48,  3.34s/it]

objective/kl: -0.21883253753185272
ppo/returns/mean: 0.054623737931251526
ppo/policy/advantages_mean: -1.5832483768463135e-08
---------------------------------------------------------------------------------------------------


3890it [4:22:51,  3.23s/it]

objective/kl: -1.6593191623687744
ppo/returns/mean: 0.07393641769886017
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


3891it [4:22:54,  3.26s/it]

objective/kl: -1.3726826906204224
ppo/returns/mean: 0.07763923704624176
ppo/policy/advantages_mean: 2.2351741790771484e-08
---------------------------------------------------------------------------------------------------


3892it [4:22:57,  3.18s/it]

objective/kl: -2.625753402709961
ppo/returns/mean: 0.08930351585149765
ppo/policy/advantages_mean: 1.30385160446167e-08
---------------------------------------------------------------------------------------------------


3893it [4:23:00,  3.13s/it]

objective/kl: -1.2195838689804077
ppo/returns/mean: 0.051758136600255966
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3894it [4:23:03,  3.09s/it]

objective/kl: 0.38645389676094055
ppo/returns/mean: 0.031419143080711365
ppo/policy/advantages_mean: -1.862645149230957e-09
---------------------------------------------------------------------------------------------------


3895it [4:23:06,  3.07s/it]

objective/kl: -1.8985874652862549
ppo/returns/mean: 0.07994285225868225
ppo/policy/advantages_mean: 3.3527612686157227e-08
---------------------------------------------------------------------------------------------------


3896it [4:23:09,  3.04s/it]

objective/kl: -0.08553409576416016
ppo/returns/mean: 0.03877522423863411
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3897it [4:23:12,  3.03s/it]

objective/kl: -0.9136005640029907
ppo/returns/mean: 0.06270518153905869
ppo/policy/advantages_mean: 7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3898it [4:23:15,  3.03s/it]

objective/kl: -0.7746290564537048
ppo/returns/mean: 0.06816266477108002
ppo/policy/advantages_mean: 2.7008354663848877e-08
---------------------------------------------------------------------------------------------------


3899it [4:23:18,  3.08s/it]

objective/kl: 2.011669874191284
ppo/returns/mean: 0.012061910703778267
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3900it [4:23:21,  3.06s/it]

objective/kl: -1.2084417343139648
ppo/returns/mean: 0.05624314397573471
ppo/policy/advantages_mean: 2.421438694000244e-08
---------------------------------------------------------------------------------------------------


3901it [4:23:24,  3.04s/it]

objective/kl: 0.9741181135177612
ppo/returns/mean: 0.012732362374663353
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3902it [4:23:28,  3.05s/it]

objective/kl: 0.3249271512031555
ppo/returns/mean: 0.03239307180047035
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3903it [4:23:31,  3.04s/it]

objective/kl: 0.5990636348724365
ppo/returns/mean: 0.02856811136007309
ppo/policy/advantages_mean: 3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3904it [4:23:34,  3.02s/it]

objective/kl: 1.589786171913147
ppo/returns/mean: 0.012276710942387581
ppo/policy/advantages_mean: 2.9802322387695312e-08
---------------------------------------------------------------------------------------------------


3905it [4:23:37,  3.08s/it]

objective/kl: 2.4536001682281494
ppo/returns/mean: -0.015184327028691769
ppo/policy/advantages_mean: -5.587935447692871e-09
---------------------------------------------------------------------------------------------------


3906it [4:23:40,  3.05s/it]

objective/kl: 2.28665828704834
ppo/returns/mean: -0.01344649400562048
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3907it [4:23:43,  3.06s/it]

objective/kl: 1.1935808658599854
ppo/returns/mean: 0.014709152281284332
ppo/policy/advantages_mean: 1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3908it [4:23:46,  3.24s/it]

objective/kl: 1.4588816165924072
ppo/returns/mean: 0.015595928765833378
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3909it [4:23:50,  3.19s/it]

objective/kl: 2.5387182235717773
ppo/returns/mean: 0.010814301669597626
ppo/policy/advantages_mean: -1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3910it [4:23:53,  3.13s/it]

objective/kl: 2.6798129081726074
ppo/returns/mean: -0.013313516974449158
ppo/policy/advantages_mean: 1.862645149230957e-08
---------------------------------------------------------------------------------------------------


3911it [4:23:55,  3.08s/it]

objective/kl: 5.066709518432617
ppo/returns/mean: -0.04321790859103203
ppo/policy/advantages_mean: -7.450580596923828e-09
---------------------------------------------------------------------------------------------------


3912it [4:23:58,  3.06s/it]

objective/kl: 4.694790840148926
ppo/returns/mean: -0.029617449268698692
ppo/policy/advantages_mean: 1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3913it [4:24:02,  3.26s/it]

objective/kl: 2.624342441558838
ppo/returns/mean: -0.033063873648643494
ppo/policy/advantages_mean: -2.60770320892334e-08
---------------------------------------------------------------------------------------------------


3914it [4:24:05,  3.19s/it]

objective/kl: 2.7559823989868164
ppo/returns/mean: -0.033808253705501556
ppo/policy/advantages_mean: -3.725290298461914e-09
---------------------------------------------------------------------------------------------------


3915it [4:24:08,  3.17s/it]

objective/kl: 3.4805290699005127
ppo/returns/mean: -0.041983019560575485
ppo/policy/advantages_mean: -1.1175870895385742e-08
---------------------------------------------------------------------------------------------------


3916it [4:24:12,  3.32s/it]

objective/kl: 1.28335440158844
ppo/returns/mean: -0.004876744467765093
ppo/policy/advantages_mean: -1.4901161193847656e-08
---------------------------------------------------------------------------------------------------


3917it [4:24:15,  3.26s/it]

objective/kl: 4.576208591461182
ppo/returns/mean: -0.05052005499601364
ppo/policy/advantages_mean: 0.0
---------------------------------------------------------------------------------------------------


3918it [4:24:18,  3.21s/it]

objective/kl: 3.958761692047119
ppo/returns/mean: -0.040133409202098846
ppo/policy/advantages_mean: -1.6763806343078613e-08
---------------------------------------------------------------------------------------------------


3919it [4:24:22,  4.05s/it]

objective/kl: 2.7029216289520264
ppo/returns/mean: -0.023888345807790756
ppo/policy/advantages_mean: -9.313225746154785e-09
---------------------------------------------------------------------------------------------------





In [12]:
#### get a batch from the dataset
bs = 32
game_data = dict()
tokenized_train_data.set_format("pandas")
df_batch = tokenized_train_data[:].sample(bs)
game_data["query"] = df_batch["query"].tolist()
query_tensors = df_batch["input_ids"].tolist()

response_tensors_ref, response_tensors = [], []

#### get response from gpt2 and gpt2_ref
for i in range(bs):
    output = ref_model.generate(torch.tensor(query_tensors[i]).unsqueeze(dim=0).to("cuda"), **generation_kwargs).squeeze()
    response_tensors_ref.append(output)
    output = ppo_model.generate(torch.tensor(query_tensors[i]).unsqueeze(dim=0).to("cuda"), **generation_kwargs).squeeze()
    response_tensors.append(output)

#### decode responses
game_data["response (before)"] = [tokenizer.decode(response_tensors_ref[i],skip_special_tokens=True) for i in range(bs)]
game_data["response (after)"] = [tokenizer.decode(response_tensors[i],skip_special_tokens=True) for i in range(bs)]

#### sentiment analysis of query/response pairs before/after
game_data["rewards (before)"] = f1radgraph_reward(hyps=game_data["response (before)"], refs=game_data["query"])[1]

game_data["rewards (after)"] = f1radgraph_reward(hyps=game_data["response (after)"], refs=game_data["query"])[1]

# store results in a dataframe
df_results = pd.DataFrame(game_data)
df_results



Unnamed: 0,query,response (before),response (after),rewards (before),rewards (after)
0,No acute cardiopulmonary abnormality.,No acute cardiopulmonary abnormality.,--1-,1.0,0.0
1,"Right greater than left perihilar opacities, r...","Bilateral perihilar airspace opacities, right ...",pneum pneum pneum-,0.571429,0.0
2,No acute cardiopulmonary process.,No acute cardiopulmonary process.,--- pneum,1.0,0.0
3,"Moderate-sized right pleural effusion, which a...","Moderate-sized right pleural effusion, slightl...",pneum pneum pneum-,0.818182,0.0
4,Interval resolution of wedge-shaped opacity at...,Interval resolution of wedge-shaped opacity at...,pneum pneum pneum-,1.0,0.0
5,"Improving left basilar pneumonia. However, it ...","Left basilar pneumonia has improved, but not c...",pneum pneum pneum-,0.266667,0.0
6,No acute intrathoracic process.,No acute intrathoracic process.,--- pneum,1.0,0.0
7,1. Faint opacity at the left base likely refle...,1. Ill-defined opacity at the left base on the...,pneum pneum pneum-,0.516129,0.0
8,No evidence of acute cardiopulmonary process. ...,Right IJ central venous line with the tip term...,pneum pneum pneum-,0.380952,0.0
9,No acute cardiopulmonary abnormality.,No acute cardiopulmonary abnormality.,--- pneum,1.0,0.0
