# Unity ML Agents
## Proximal Policy Optimization (PPO)
Contains an implementation of PPO as described [here](https://arxiv.org/abs/1707.06347).

In [1]:
import numpy as np
import os
import tensorflow as tf

import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"


from ppo.history import *
from ppo.models import *
from ppo.trainer import Trainer
from unityagents import *

### Hyperparameters

In [2]:
### General parameters
#max_steps = 5e5 # Set maximum number of steps to run environment.
max_steps = 3400e4
run_path = "ppo" # The sub-directory name for model and summary statistics
load_model = True # Whether to load a saved model.
train_model = True # Whether to train the model.
summary_freq = 1000 # Frequency at which to save training statistics.
save_freq = 50000 # Frequency at which to save model.
env_name = "wizim" # Name of the training environment file.
curriculum_file = "E:\work\ml-agents\python\curricula\wizim.json" #None #

#"E:\work\ml-agents\python\curricula\wizim.json"

### Algorithm-specific parameters for tuning
gamma = 0.99 # Reward discount rate.
lambd = 0.95 # Lambda parameter for GAE.
time_horizon = 2048 # How many steps to collect per agent before adding to buffer.
beta = 1e-3 # Strength of entropy regularization
num_epoch = 5 # Number of gradient descent steps per batch of experiences.
num_layers = 2 # Number of hidden layers between state/observation encoding and value/policy layers.
epsilon = 0.2 # Acceptable threshold around ratio of old and new policy probabilities.
buffer_size = 2048 # How large the experience buffer should be before gradient descent.
learning_rate = 3e-5 # Model learning rate.
hidden_units = 32 # Number of units in hidden layer.
batch_size = 64 # How many experiences per gradient descent update step.
normalize = False

### Logging dictionary for hyperparameters
hyperparameter_dict = {'max_steps':max_steps, 'run_path':run_path, 'env_name':env_name,
    'curriculum_file':curriculum_file, 'gamma':gamma, 'lambd':lambd, 'time_horizon':time_horizon,
    'beta':beta, 'num_epoch':num_epoch, 'epsilon':epsilon, 'buffe_size':buffer_size,
    'leaning_rate':learning_rate, 'hidden_units':hidden_units, 'batch_size':batch_size}

### Load the environment

In [3]:
env = UnityEnvironment(file_name=env_name, curriculum=curriculum_file)
print(str(env))
brain_name = env.external_brain_names[0]

INFO:unityagents:
'WizimAcademy' started successfully!


Unity Academy name: WizimAcademy
        Number of brains: 1
        Reset Parameters :
		scale -> 1.0
Unity brain name: Brain
        Number of observations (per agent): 0
        State space type: continuous
        State space size (per agent): 6
        Action space type: continuous
        Action space size (per agent): 2
        Memory space size (per agent): 0
        Action descriptions: , 


### Train the Agent(s)

In [4]:
tf.reset_default_graph()

if curriculum_file == "None":
    curriculum_file = None


def get_progress():
    if curriculum_file is not None:
        if env._curriculum.measure_type == "progress":
            return steps / max_steps
        elif env._curriculum.measure_type == "reward":
            return last_reward
        else:
            return None
    else:
        return None

# Create the Tensorflow model graph
ppo_model = create_agent_model(env, lr=learning_rate,
                               h_size=hidden_units, epsilon=epsilon,
                               beta=beta, max_step=max_steps, 
                               normalize=normalize, num_layers=num_layers)

is_continuous = (env.brains[brain_name].action_space_type == "continuous")
use_observations = (env.brains[brain_name].number_observations > 0)
use_states = (env.brains[brain_name].state_space_size > 0)

model_path = './models/{}'.format(run_path)
summary_path = './summaries/{}'.format(run_path)

if not os.path.exists(model_path):
    os.makedirs(model_path)

if not os.path.exists(summary_path):
    os.makedirs(summary_path)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    # Instantiate model parameters
    if load_model:
        print('Loading Model...')
        ckpt = tf.train.get_checkpoint_state(model_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
    else:
        sess.run(init)
    steps, last_reward = sess.run([ppo_model.global_step, ppo_model.last_reward])    
    summary_writer = tf.summary.FileWriter(summary_path)
    info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]
    trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states, train_model)
    if train_model:
        trainer.write_text(summary_writer, 'Hyperparameters', hyperparameter_dict, steps)
        
    print(max_steps)
    while steps <= max_steps:
        if env.global_done:
            info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]
        # Decide and take an action
        new_info = trainer.take_action(info, env, brain_name, steps, normalize)
        info = new_info
        trainer.process_experiences(info, time_horizon, gamma, lambd)
        if len(trainer.training_buffer['actions']) > buffer_size and train_model:
            # Perform gradient descent with experience buffer
            trainer.update_model(batch_size, num_epoch)
        if steps % summary_freq == 0 and steps != 0 and train_model:
            # Write training statistics to tensorboard.
            trainer.write_summary(summary_writer, steps, env._curriculum.lesson_number)
        if steps % save_freq == 0 and steps != 0 and train_model:
            # Save Tensorflow model
            save_model(sess, model_path=model_path, steps=steps, saver=saver)
        steps += 1
        sess.run(ppo_model.increment_step)
        if len(trainer.stats['cumulative_reward']) > 0:
            mean_reward = np.mean(trainer.stats['cumulative_reward'])
            sess.run(ppo_model.update_reward, feed_dict={ppo_model.new_reward: mean_reward})
            last_reward = sess.run(ppo_model.last_reward)
    # Final save Tensorflow model
    if steps != 0 and train_model:
        save_model(sess, model_path=model_path, steps=steps, saver=saver)
env.close()
export_graph(model_path, env_name)

Loading Model...
INFO:tensorflow:Restoring parameters from ./models/ppo\model-6750000.cptk


INFO:tensorflow:Restoring parameters from ./models/ppo\model-6750000.cptk


34000000.0
Saved Model


INFO:unityagents:
Lesson changed. Now in Lesson 1 : 	scale -> 0.8
INFO:unityagents:
Lesson changed. Now in Lesson 2 : 	scale -> 0.85
INFO:unityagents:
Lesson changed. Now in Lesson 3 : 	scale -> 0.87


Step: 6751000. Mean Reward: 0.8713074380165291. Std of Reward: 0.3300969875420636.


INFO:unityagents:
Lesson changed. Now in Lesson 4 : 	scale -> 0.9
INFO:unityagents:
Lesson changed. Now in Lesson 5 : 	scale -> 0.93


Step: 6752000. Mean Reward: 0.8624543918918919. Std of Reward: 0.3374309157737336.


INFO:unityagents:
Lesson changed. Now in Lesson 6 : 	scale -> 0.95
INFO:unityagents:
Lesson changed. Now in Lesson 7 : 	scale -> 1.0
INFO:unityagents:
Lesson changed. Now in Lesson 8 : 	scale -> 1.1


Step: 6753000. Mean Reward: 0.8127719869706841. Std of Reward: 0.40933270173984576.


INFO:unityagents:
Lesson changed. Now in Lesson 9 : 	scale -> 1.3
INFO:unityagents:
Lesson changed. Now in Lesson 10 : 	scale -> 1.5


Step: 6754000. Mean Reward: 0.7997079207920792. Std of Reward: 0.4349641601676285.
Step: 6755000. Mean Reward: 0.7863643533123028. Std of Reward: 0.4501468684871738.


INFO:unityagents:
Lesson changed. Now in Lesson 11 : 	scale -> 1.7
INFO:unityagents:
Lesson changed. Now in Lesson 12 : 	scale -> 2


Step: 6756000. Mean Reward: 0.7710431309904153. Std of Reward: 0.47383596708019443.
Step: 6757000. Mean Reward: 0.7517784810126582. Std of Reward: 0.49251334716114026.
Step: 6758000. Mean Reward: 0.7400950080515297. Std of Reward: 0.4958641400644046.
Step: 6759000. Mean Reward: 0.7502. Std of Reward: 0.4924620391461661.
Step: 6760000. Mean Reward: 0.7369. Std of Reward: 0.5029852315177853.
Step: 6761000. Mean Reward: 0.7599836601307189. Std of Reward: 0.4850282416085122.
Step: 6762000. Mean Reward: 0.7485862619808307. Std of Reward: 0.4987322808143491.
Step: 6763000. Mean Reward: 0.7421837060702876. Std of Reward: 0.4973503401233077.
Step: 6764000. Mean Reward: 0.7581126984126985. Std of Reward: 0.485857107150764.
Step: 6765000. Mean Reward: 0.7631076443057722. Std of Reward: 0.47845702596805373.
Step: 6766000. Mean Reward: 0.7727468944099378. Std of Reward: 0.4784001186404535.
Step: 6767000. Mean Reward: 0.7149969325153375. Std of Reward: 0.5304901310408091.
Step: 6768000. Mean Reward

INFO:unityagents:
Lesson changed. Now in Lesson 13 : 	scale -> 3


Step: 6785000. Mean Reward: 0.7448287037037037. Std of Reward: 0.4976752932479477.
Step: 6786000. Mean Reward: 0.6600087463556851. Std of Reward: 0.5792139087678145.
Step: 6787000. Mean Reward: 0.6615798192771084. Std of Reward: 0.5770587003320226.
Step: 6788000. Mean Reward: 0.6577492537313433. Std of Reward: 0.5761426930873553.
Step: 6789000. Mean Reward: 0.6583946587537092. Std of Reward: 0.5811037500137789.
Step: 6790000. Mean Reward: 0.6536251896813354. Std of Reward: 0.5751921740244341.
Step: 6791000. Mean Reward: 0.6466348484848485. Std of Reward: 0.5883892586499808.
Step: 6792000. Mean Reward: 0.7049184615384615. Std of Reward: 0.5389528032404229.
Step: 6793000. Mean Reward: 0.6424494047619047. Std of Reward: 0.5857869888019412.
Step: 6794000. Mean Reward: 0.6390389805097451. Std of Reward: 0.5959290593333079.
Step: 6795000. Mean Reward: 0.6784847094801223. Std of Reward: 0.559525942514947.
Step: 6796000. Mean Reward: 0.6782481315396114. Std of Reward: 0.5623472917577989.
Step:

Step: 6884000. Mean Reward: 0.6412003012048192. Std of Reward: 0.5834083232406059.
Step: 6885000. Mean Reward: 0.6866872037914692. Std of Reward: 0.5558412579633384.
Step: 6886000. Mean Reward: 0.6841219879518072. Std of Reward: 0.5620340012425178.
Step: 6887000. Mean Reward: 0.6566294642857142. Std of Reward: 0.5808922000853094.
Step: 6888000. Mean Reward: 0.6652895522388059. Std of Reward: 0.5762391939957004.
Step: 6889000. Mean Reward: 0.6811993817619784. Std of Reward: 0.5578843831623482.
Step: 6890000. Mean Reward: 0.6794451219512195. Std of Reward: 0.5593558032874304.
Step: 6891000. Mean Reward: 0.6721517027863777. Std of Reward: 0.5639569471924024.
Step: 6892000. Mean Reward: 0.6426018662519439. Std of Reward: 0.5834372569354334.
Step: 6893000. Mean Reward: 0.6566650868878356. Std of Reward: 0.5800399002977922.
Step: 6894000. Mean Reward: 0.6692930232558139. Std of Reward: 0.5625990236608959.
Step: 6895000. Mean Reward: 0.7017411003236246. Std of Reward: 0.5391814822812772.
Step

Step: 6983000. Mean Reward: 0.6338230884557721. Std of Reward: 0.5962630014542283.
Step: 6984000. Mean Reward: 0.6628882352941177. Std of Reward: 0.5715201060560315.
Step: 6985000. Mean Reward: 0.693301684532925. Std of Reward: 0.5545918433062166.
Step: 6986000. Mean Reward: 0.6596494688922611. Std of Reward: 0.5692744310845761.
Step: 6987000. Mean Reward: 0.6934495268138801. Std of Reward: 0.5509103319277078.
Step: 6988000. Mean Reward: 0.643042748091603. Std of Reward: 0.5835530926257342.
Step: 6989000. Mean Reward: 0.6861921259842519. Std of Reward: 0.5536521898625297.
Step: 6990000. Mean Reward: 0.6865758998435054. Std of Reward: 0.5590282593813524.
Step: 6991000. Mean Reward: 0.6139906542056075. Std of Reward: 0.5978235837568084.
Step: 6992000. Mean Reward: 0.6672646153846153. Std of Reward: 0.5726219540730084.
Step: 6993000. Mean Reward: 0.6465045454545454. Std of Reward: 0.5844183031097177.
Step: 6994000. Mean Reward: 0.6980314960629921. Std of Reward: 0.5478891165622549.
Step: 

Step: 7082000. Mean Reward: 0.6572232277526395. Std of Reward: 0.5795645072316572.
Step: 7083000. Mean Reward: 0.616920245398773. Std of Reward: 0.5994539819379712.
Step: 7084000. Mean Reward: 0.6685575757575758. Std of Reward: 0.5698211505856281.
Step: 7085000. Mean Reward: 0.6662167414050822. Std of Reward: 0.5717152013098948.
Step: 7086000. Mean Reward: 0.673009375. Std of Reward: 0.5659170239859457.
Step: 7087000. Mean Reward: 0.6697847328244275. Std of Reward: 0.5667062632215626.
Step: 7088000. Mean Reward: 0.68798. Std of Reward: 0.5538960229279359.
Step: 7089000. Mean Reward: 0.685856923076923. Std of Reward: 0.5560263575571079.
Step: 7090000. Mean Reward: 0.6670015082956259. Std of Reward: 0.5696072990322782.
Step: 7091000. Mean Reward: 0.6743495440729483. Std of Reward: 0.56680778331748.
Step: 7092000. Mean Reward: 0.6946099397590362. Std of Reward: 0.5538752849574924.
Step: 7093000. Mean Reward: 0.6735502248875562. Std of Reward: 0.5630322610097317.
Step: 7094000. Mean Reward

Step: 7182000. Mean Reward: 0.6627365930599369. Std of Reward: 0.5687507382198433.
Step: 7183000. Mean Reward: 0.6804456521739131. Std of Reward: 0.5639186638768698.
Step: 7184000. Mean Reward: 0.6687672281776417. Std of Reward: 0.5630037640313342.
Step: 7185000. Mean Reward: 0.653594427244582. Std of Reward: 0.5779073004337641.
Step: 7186000. Mean Reward: 0.6539062980030722. Std of Reward: 0.5799803604972203.
Step: 7187000. Mean Reward: 0.6791821086261981. Std of Reward: 0.5559688622415244.
Step: 7188000. Mean Reward: 0.6591853582554518. Std of Reward: 0.5786302589268417.
Step: 7189000. Mean Reward: 0.661671212121212. Std of Reward: 0.5703175081306455.
Step: 7190000. Mean Reward: 0.6489061538461538. Std of Reward: 0.5816687707666685.
Step: 7191000. Mean Reward: 0.6424985029940119. Std of Reward: 0.5895180348986898.
Step: 7192000. Mean Reward: 0.6215045731707317. Std of Reward: 0.5950493010757961.
Step: 7193000. Mean Reward: 0.6417124802527646. Std of Reward: 0.5891816796316698.
Step: 

Step: 7281000. Mean Reward: 0.6972756410256411. Std of Reward: 0.5390964374709035.
Step: 7282000. Mean Reward: 0.6902067307692307. Std of Reward: 0.5521114893619458.
Step: 7283000. Mean Reward: 0.6723490566037736. Std of Reward: 0.5561317664740074.
Step: 7284000. Mean Reward: 0.7030977564102564. Std of Reward: 0.5382344996947269.
Step: 7285000. Mean Reward: 0.6900901898734177. Std of Reward: 0.5451163501677574.
Step: 7286000. Mean Reward: 0.6343619047619048. Std of Reward: 0.5802651658300169.
Step: 7287000. Mean Reward: 0.6930205371248026. Std of Reward: 0.5535675117811318.
Step: 7288000. Mean Reward: 0.6617341576506955. Std of Reward: 0.5670606665056753.
Step: 7289000. Mean Reward: 0.69285303514377. Std of Reward: 0.5482073377022568.
Step: 7290000. Mean Reward: 0.6937142857142857. Std of Reward: 0.5477112505869501.
Step: 7291000. Mean Reward: 0.6839333333333333. Std of Reward: 0.5481933401558382.
Step: 7292000. Mean Reward: 0.6878205128205128. Std of Reward: 0.5534408277635781.
Step: 

Step: 7381000. Mean Reward: 0.6763535660091047. Std of Reward: 0.5623319005192647.
Step: 7382000. Mean Reward: 0.6551571648690293. Std of Reward: 0.5766462091194288.
Step: 7383000. Mean Reward: 0.6504150943396226. Std of Reward: 0.5711880815913749.
Step: 7384000. Mean Reward: 0.6958881578947369. Std of Reward: 0.541606835688978.
Step: 7385000. Mean Reward: 0.6846666666666666. Std of Reward: 0.546207645745216.
Step: 7386000. Mean Reward: 0.6994875. Std of Reward: 0.5415666855002715.
Step: 7387000. Mean Reward: 0.69266. Std of Reward: 0.550479449571008.
Step: 7388000. Mean Reward: 0.6881209677419354. Std of Reward: 0.5459127129912942.
Step: 7389000. Mean Reward: 0.6637751572327045. Std of Reward: 0.5701108146274406.
Step: 7390000. Mean Reward: 0.6934191522762951. Std of Reward: 0.5444134819987588.
Step: 7391000. Mean Reward: 0.7282397476340693. Std of Reward: 0.5169342768882891.
Step: 7392000. Mean Reward: 0.6773010920436817. Std of Reward: 0.5548366161799649.
Step: 7393000. Mean Reward:

Step: 7481000. Mean Reward: 0.67039586645469. Std of Reward: 0.5681012113822772.
Step: 7482000. Mean Reward: 0.6805696202531646. Std of Reward: 0.5529626462890553.
Step: 7483000. Mean Reward: 0.6965506329113924. Std of Reward: 0.5424671231648875.
Step: 7484000. Mean Reward: 0.6905928237129485. Std of Reward: 0.5507915290438312.
Step: 7485000. Mean Reward: 0.6457001522070016. Std of Reward: 0.5901005769470309.
Step: 7486000. Mean Reward: 0.6954186413902054. Std of Reward: 0.5493348470527459.
Step: 7487000. Mean Reward: 0.6881283676703644. Std of Reward: 0.5477077323417185.
Step: 7488000. Mean Reward: 0.6885189873417722. Std of Reward: 0.5517368917706579.
Step: 7489000. Mean Reward: 0.6949984101748807. Std of Reward: 0.5389092336930539.
Step: 7490000. Mean Reward: 0.7177636932707354. Std of Reward: 0.5253941024968518.
Step: 7491000. Mean Reward: 0.7020585443037975. Std of Reward: 0.5481380809155478.
Step: 7492000. Mean Reward: 0.707334375. Std of Reward: 0.5299254435232935.
Step: 7493000

Step: 7581000. Mean Reward: 0.7060031897926634. Std of Reward: 0.5297762851152615.
Step: 7582000. Mean Reward: 0.7182055641421947. Std of Reward: 0.5284578854774434.
Step: 7583000. Mean Reward: 0.6986766467065868. Std of Reward: 0.5505856410561925.
Step: 7584000. Mean Reward: 0.6967310664605872. Std of Reward: 0.550456701218056.
Step: 7585000. Mean Reward: 0.6782728758169934. Std of Reward: 0.5605075277020675.
Step: 7586000. Mean Reward: 0.6807169517884913. Std of Reward: 0.5563693047275016.
Step: 7587000. Mean Reward: 0.7060616332819724. Std of Reward: 0.5453250052753259.
Step: 7588000. Mean Reward: 0.7036348228043143. Std of Reward: 0.5368797377235681.
Step: 7589000. Mean Reward: 0.6755767441860464. Std of Reward: 0.5620706670944539.
Step: 7590000. Mean Reward: 0.6771437802907916. Std of Reward: 0.5621968854148878.
Step: 7591000. Mean Reward: 0.6942146263910971. Std of Reward: 0.5426985361628667.
Step: 7592000. Mean Reward: 0.6940695915279879. Std of Reward: 0.5515468397954387.
Step:

Step: 7680000. Mean Reward: 0.68214673046252. Std of Reward: 0.5492321746401644.
Step: 7681000. Mean Reward: 0.6866714060031596. Std of Reward: 0.5539251631211399.
Step: 7682000. Mean Reward: 0.7141711568938194. Std of Reward: 0.5279048555127513.
Step: 7683000. Mean Reward: 0.6826446540880504. Std of Reward: 0.5490193196527098.
Step: 7684000. Mean Reward: 0.6897452229299362. Std of Reward: 0.5481061244172304.
Step: 7685000. Mean Reward: 0.7111851851851851. Std of Reward: 0.5260157656137705.
Step: 7686000. Mean Reward: 0.6900549273021002. Std of Reward: 0.5465274576143898.
Step: 7687000. Mean Reward: 0.7303030769230769. Std of Reward: 0.5117055362576083.
Step: 7688000. Mean Reward: 0.7071559934318555. Std of Reward: 0.5269083545453559.
Step: 7689000. Mean Reward: 0.7086341853035144. Std of Reward: 0.5283447254654974.
Step: 7690000. Mean Reward: 0.7127993630573249. Std of Reward: 0.5223038209709634.
Step: 7691000. Mean Reward: 0.7075135566188198. Std of Reward: 0.5290957900790518.
Step: 

Step: 7780000. Mean Reward: 0.6874218009478673. Std of Reward: 0.5541251909967914.
Step: 7781000. Mean Reward: 0.7005368916797489. Std of Reward: 0.5446015471749184.
Step: 7782000. Mean Reward: 0.701275039745628. Std of Reward: 0.5322953735800231.
Step: 7783000. Mean Reward: 0.6750092735703246. Std of Reward: 0.5643176535724143.
Step: 7784000. Mean Reward: 0.6538688271604938. Std of Reward: 0.5728341844333442.
Step: 7785000. Mean Reward: 0.6960285261489698. Std of Reward: 0.5472343337044844.
Step: 7786000. Mean Reward: 0.6827034161490683. Std of Reward: 0.5576664756490783.
Step: 7787000. Mean Reward: 0.7061537267080745. Std of Reward: 0.5362994635153545.
Step: 7788000. Mean Reward: 0.6907285714285715. Std of Reward: 0.553691878365396.
Step: 7789000. Mean Reward: 0.6858925750394944. Std of Reward: 0.5506572214233564.
Step: 7790000. Mean Reward: 0.6741216429699843. Std of Reward: 0.5643318738791684.
Step: 7791000. Mean Reward: 0.6924908789386401. Std of Reward: 0.5485528507047509.
Step: 

Step: 7879000. Mean Reward: 0.7283233830845771. Std of Reward: 0.5142189845582299.
Step: 7880000. Mean Reward: 0.7098047619047619. Std of Reward: 0.5332579217305969.
Step: 7881000. Mean Reward: 0.6918454258675079. Std of Reward: 0.5432480463188386.
Step: 7882000. Mean Reward: 0.7062660256410257. Std of Reward: 0.5364371122360985.
Step: 7883000. Mean Reward: 0.6961619047619048. Std of Reward: 0.5375159967877431.
Step: 7884000. Mean Reward: 0.721321370309951. Std of Reward: 0.5214247730477307.
Step: 7885000. Mean Reward: 0.697008. Std of Reward: 0.5429134663424734.
Step: 7886000. Mean Reward: 0.7074205457463884. Std of Reward: 0.5248571246648174.
Step: 7887000. Mean Reward: 0.7123447154471545. Std of Reward: 0.5273269714605892.
Step: 7888000. Mean Reward: 0.6887981220657278. Std of Reward: 0.5429187118099781.
Step: 7889000. Mean Reward: 0.675636655948553. Std of Reward: 0.5607061921340792.
Step: 7890000. Mean Reward: 0.7184847512038524. Std of Reward: 0.5313281242416693.
Step: 7891000. M

Step: 7979000. Mean Reward: 0.6936848673946957. Std of Reward: 0.5497153969975019.
Step: 7980000. Mean Reward: 0.694392694063927. Std of Reward: 0.5426092335579644.
Step: 7981000. Mean Reward: 0.67698623853211. Std of Reward: 0.5645618485607908.
Step: 7982000. Mean Reward: 0.728843295638126. Std of Reward: 0.5170661953786528.
Step: 7983000. Mean Reward: 0.7300125000000001. Std of Reward: 0.5121375241024133.
Step: 7984000. Mean Reward: 0.6927699680511181. Std of Reward: 0.5483318951265721.
Step: 7985000. Mean Reward: 0.6800612903225806. Std of Reward: 0.5550134373133097.
Step: 7986000. Mean Reward: 0.7072669826224328. Std of Reward: 0.5327265760834717.
Step: 7987000. Mean Reward: 0.7203478964401294. Std of Reward: 0.5212330603865334.
Step: 7988000. Mean Reward: 0.7196479099678457. Std of Reward: 0.5221094917450073.
Step: 7989000. Mean Reward: 0.6952889245585874. Std of Reward: 0.5466286722141863.
Step: 7990000. Mean Reward: 0.6776666666666666. Std of Reward: 0.5549178484717333.
Step: 79

Step: 8079000. Mean Reward: 0.6714832000000001. Std of Reward: 0.5605915904807706.
Step: 8080000. Mean Reward: 0.69202034428795. Std of Reward: 0.549873419401127.
Step: 8081000. Mean Reward: 0.6986731707317073. Std of Reward: 0.5431543063436969.
Step: 8082000. Mean Reward: 0.6735071542130365. Std of Reward: 0.5562840360860116.
Step: 8083000. Mean Reward: 0.6711360759493671. Std of Reward: 0.5705069174652846.
Step: 8084000. Mean Reward: 0.6974149443561208. Std of Reward: 0.5439516231814387.
Step: 8085000. Mean Reward: 0.6605761904761905. Std of Reward: 0.5740625095143663.
Step: 8086000. Mean Reward: 0.713792937399679. Std of Reward: 0.5314933809060178.
Step: 8087000. Mean Reward: 0.6695363924050632. Std of Reward: 0.5607459249486161.
Step: 8088000. Mean Reward: 0.7379953560371517. Std of Reward: 0.5083109908765753.
Step: 8089000. Mean Reward: 0.7190354391371341. Std of Reward: 0.519242075630069.
Step: 8090000. Mean Reward: 0.7303402555910543. Std of Reward: 0.515567779273351.
Step: 8091

INFO:unityagents:
Lesson changed. Now in Lesson 14 : 	scale -> 4


Step: 8104000. Mean Reward: 0.6915434083601286. Std of Reward: 0.5414289274553856.
Step: 8105000. Mean Reward: 0.5868842592592592. Std of Reward: 0.6267100240116426.
Step: 8106000. Mean Reward: 0.5948204724409449. Std of Reward: 0.6175786902379301.
Step: 8107000. Mean Reward: 0.5334187116564417. Std of Reward: 0.6437874686120401.
Step: 8108000. Mean Reward: 0.5533604294478528. Std of Reward: 0.6388503073354802.
Step: 8109000. Mean Reward: 0.5624093023255814. Std of Reward: 0.6282766955052663.
Step: 8110000. Mean Reward: 0.6221255813953488. Std of Reward: 0.6002178116639121.
Step: 8111000. Mean Reward: 0.5512419106317411. Std of Reward: 0.6432365233662835.
Step: 8112000. Mean Reward: 0.5804138461538462. Std of Reward: 0.623524675565493.
Step: 8113000. Mean Reward: 0.557261217948718. Std of Reward: 0.6363541718930552.
Step: 8114000. Mean Reward: 0.5870014925373134. Std of Reward: 0.6212024461496026.
Step: 8115000. Mean Reward: 0.5890095846645368. Std of Reward: 0.6146192098129325.
Step: 

Step: 8204000. Mean Reward: 0.5950170542635659. Std of Reward: 0.6157227993185123.
Step: 8205000. Mean Reward: 0.5880936106983655. Std of Reward: 0.6213898015272613.
Step: 8206000. Mean Reward: 0.5907079510703364. Std of Reward: 0.6179060368412944.
Step: 8207000. Mean Reward: 0.6062738853503183. Std of Reward: 0.6071768030893019.
Step: 8208000. Mean Reward: 0.5830476947535771. Std of Reward: 0.6149299268171308.
Step: 8209000. Mean Reward: 0.6269701257861635. Std of Reward: 0.5959050703603018.
Step: 8210000. Mean Reward: 0.6233666666666666. Std of Reward: 0.5990611865561775.
Step: 8211000. Mean Reward: 0.6376514195583596. Std of Reward: 0.5843801887385458.
Step: 8212000. Mean Reward: 0.6189769357495882. Std of Reward: 0.6004975911391499.
Step: 8213000. Mean Reward: 0.6029446254071661. Std of Reward: 0.6010006749296559.
Step: 8214000. Mean Reward: 0.6026822580645161. Std of Reward: 0.6050471701217356.
Step: 8215000. Mean Reward: 0.6110355411954765. Std of Reward: 0.6054700936280989.
Step

Step: 8304000. Mean Reward: 0.5818121911037891. Std of Reward: 0.6143052445049902.
Step: 8305000. Mean Reward: 0.6302698675496688. Std of Reward: 0.5837348960162779.
Step: 8306000. Mean Reward: 0.5905177065767285. Std of Reward: 0.6056419921909534.
Step: 8307000. Mean Reward: 0.5853338735818477. Std of Reward: 0.6102256659686442.
Step: 8308000. Mean Reward: 0.5893424657534247. Std of Reward: 0.6091215147271594.
Step: 8309000. Mean Reward: 0.5876247960848288. Std of Reward: 0.6127695021873469.
Step: 8310000. Mean Reward: 0.571340872374798. Std of Reward: 0.6167855269291648.
Step: 8311000. Mean Reward: 0.5997328990228012. Std of Reward: 0.6078145640672384.
Step: 8312000. Mean Reward: 0.5895279106858055. Std of Reward: 0.6069473415192569.
Step: 8313000. Mean Reward: 0.5879230769230769. Std of Reward: 0.6146434722754544.
Step: 8314000. Mean Reward: 0.6141744749596122. Std of Reward: 0.594190092618448.
Step: 8315000. Mean Reward: 0.6064835526315789. Std of Reward: 0.6044343373357745.
Step: 

Step: 8403000. Mean Reward: 0.5867382875605817. Std of Reward: 0.6136728164834802.
Step: 8404000. Mean Reward: 0.6146677471636953. Std of Reward: 0.5962688320531847.
Step: 8405000. Mean Reward: 0.5824081300813009. Std of Reward: 0.612808614216118.
Step: 8406000. Mean Reward: 0.5959103214890017. Std of Reward: 0.6008552086863194.
Step: 8407000. Mean Reward: 0.5874629032258064. Std of Reward: 0.6093905656695509.
Step: 8408000. Mean Reward: 0.5981111111111111. Std of Reward: 0.6076941650764138.
Step: 8409000. Mean Reward: 0.5681214057507987. Std of Reward: 0.6195552702257302.
Step: 8410000. Mean Reward: 0.6142. Std of Reward: 0.5936801750470555.
Step: 8411000. Mean Reward: 0.6104322169059011. Std of Reward: 0.6026774892853205.
Step: 8412000. Mean Reward: 0.6093657718120805. Std of Reward: 0.5928608637740175.
Step: 8413000. Mean Reward: 0.6017004991680531. Std of Reward: 0.6006039113973319.
Step: 8414000. Mean Reward: 0.5936072013093289. Std of Reward: 0.6088305359347564.
Step: 8415000. Me

Step: 8502000. Mean Reward: 0.5903622448979593. Std of Reward: 0.6105807732670558.
Step: 8503000. Mean Reward: 0.5729102773246328. Std of Reward: 0.6184201943752963.
Step: 8504000. Mean Reward: 0.5688762711864407. Std of Reward: 0.6203023128019529.
Step: 8505000. Mean Reward: 0.5780737179487179. Std of Reward: 0.6150657388892732.
Step: 8506000. Mean Reward: 0.6384867549668874. Std of Reward: 0.5772621681278979.
Step: 8507000. Mean Reward: 0.614661101836394. Std of Reward: 0.5977871551069043.
Step: 8508000. Mean Reward: 0.5955504885993486. Std of Reward: 0.6077881562605846.
Step: 8509000. Mean Reward: 0.5992257001647446. Std of Reward: 0.6102980826763379.
Step: 8510000. Mean Reward: 0.5761786310517528. Std of Reward: 0.616963751172815.
Step: 8511000. Mean Reward: 0.5778482871125612. Std of Reward: 0.6155198188980457.
Step: 8512000. Mean Reward: 0.5811941747572815. Std of Reward: 0.6203175779963394.
Step: 8513000. Mean Reward: 0.6419119496855346. Std of Reward: 0.5867236675458584.
Step: 

Saved Model
Step: 8601000. Mean Reward: 0.6008781725888325. Std of Reward: 0.5995358938667162.
Step: 8602000. Mean Reward: 0.6291713810316139. Std of Reward: 0.5855794367845818.
Step: 8603000. Mean Reward: 0.6143053173241853. Std of Reward: 0.5943331470048367.
Step: 8604000. Mean Reward: 0.641187808896211. Std of Reward: 0.5770725481671105.
Step: 8605000. Mean Reward: 0.5755107438016528. Std of Reward: 0.6093895294841398.
Step: 8606000. Mean Reward: 0.6024160839160839. Std of Reward: 0.604136144192953.
Step: 8607000. Mean Reward: 0.606509060955519. Std of Reward: 0.596028829201366.
Step: 8608000. Mean Reward: 0.6274086378737542. Std of Reward: 0.5914962044894178.
Step: 8609000. Mean Reward: 0.6187714776632303. Std of Reward: 0.5906861008781811.
Step: 8610000. Mean Reward: 0.587501677852349. Std of Reward: 0.6094891950283001.
Step: 8611000. Mean Reward: 0.605717607973422. Std of Reward: 0.6034390505234177.
Step: 8612000. Mean Reward: 0.5967149917627678. Std of Reward: 0.6042202699229733

Step: 8700000. Mean Reward: 0.607631746031746. Std of Reward: 0.6049555415451269.
Saved Model
Step: 8701000. Mean Reward: 0.6214916387959866. Std of Reward: 0.5990054486580765.
Step: 8702000. Mean Reward: 0.583388178913738. Std of Reward: 0.6166279897569001.
Step: 8703000. Mean Reward: 0.5630922098569157. Std of Reward: 0.6318435622880195.
Step: 8704000. Mean Reward: 0.569262658227848. Std of Reward: 0.6220532258744278.
Step: 8705000. Mean Reward: 0.5812355769230769. Std of Reward: 0.6219744671389205.
Step: 8706000. Mean Reward: 0.5455395569620253. Std of Reward: 0.6397492969845366.
Step: 8707000. Mean Reward: 0.5771233974358975. Std of Reward: 0.6139187303947348.
Step: 8708000. Mean Reward: 0.6095582922824302. Std of Reward: 0.6052376593292246.
Step: 8709000. Mean Reward: 0.577848200312989. Std of Reward: 0.6204563672954113.
Step: 8710000. Mean Reward: 0.56507336523126. Std of Reward: 0.6285933901338251.
Step: 8711000. Mean Reward: 0.568031545741325. Std of Reward: 0.6190163604666447.

Step: 8799000. Mean Reward: 0.5676755407653911. Std of Reward: 0.6213967998892183.
Step: 8800000. Mean Reward: 0.6041639610389611. Std of Reward: 0.6068674041079533.
Saved Model
Step: 8801000. Mean Reward: 0.6016108452950558. Std of Reward: 0.6050805857282832.
Step: 8802000. Mean Reward: 0.6146761268781302. Std of Reward: 0.5993206604462702.
Step: 8803000. Mean Reward: 0.5612548076923076. Std of Reward: 0.6222305596544674.
Step: 8804000. Mean Reward: 0.5868716666666668. Std of Reward: 0.6106242231224445.
Step: 8805000. Mean Reward: 0.6032674230145867. Std of Reward: 0.6025972350372664.
Step: 8806000. Mean Reward: 0.5889806451612903. Std of Reward: 0.6011036325333138.
Step: 8807000. Mean Reward: 0.6082009884678747. Std of Reward: 0.6036074849166947.
Step: 8808000. Mean Reward: 0.5587516233766234. Std of Reward: 0.6238455012364432.
Step: 8809000. Mean Reward: 0.6541061806656101. Std of Reward: 0.5714050909373499.
Step: 8810000. Mean Reward: 0.6137698541329012. Std of Reward: 0.5936655866

Step: 8899000. Mean Reward: 0.6117499999999999. Std of Reward: 0.5968304809440206.
Step: 8900000. Mean Reward: 0.6107195512820512. Std of Reward: 0.594400484329878.
Saved Model
Step: 8901000. Mean Reward: 0.6148903436988543. Std of Reward: 0.5965891068764001.
Step: 8902000. Mean Reward: 0.589492561983471. Std of Reward: 0.6123200686516828.
Step: 8903000. Mean Reward: 0.6134577702702704. Std of Reward: 0.5946662545476504.
Step: 8904000. Mean Reward: 0.5963377483443708. Std of Reward: 0.6066926722063135.
Step: 8905000. Mean Reward: 0.6011698412698413. Std of Reward: 0.6042920885604909.
Step: 8906000. Mean Reward: 0.631115. Std of Reward: 0.5883653358600136.
Step: 8907000. Mean Reward: 0.6173943894389439. Std of Reward: 0.5918998398078523.
Step: 8908000. Mean Reward: 0.6051645161290322. Std of Reward: 0.6019646292515259.
Step: 8909000. Mean Reward: 0.5976551155115511. Std of Reward: 0.6062235682634386.
Step: 8910000. Mean Reward: 0.5741607142857142. Std of Reward: 0.6143790248567291.
Step

Step: 8999000. Mean Reward: 0.5894058919803601. Std of Reward: 0.6045061927325704.
Step: 9000000. Mean Reward: 0.6088888888888889. Std of Reward: 0.5964143965695881.
Saved Model
Step: 9001000. Mean Reward: 0.5932580128205128. Std of Reward: 0.6075304248210378.
Step: 9002000. Mean Reward: 0.5905317725752508. Std of Reward: 0.6001554131168971.
Step: 9003000. Mean Reward: 0.5790928689883913. Std of Reward: 0.6117953987642835.
Step: 9004000. Mean Reward: 0.6189689034369885. Std of Reward: 0.5886226292690999.
Step: 9005000. Mean Reward: 0.6206677740863787. Std of Reward: 0.592296080605828.
Step: 9006000. Mean Reward: 0.6418190789473684. Std of Reward: 0.5790128283772886.
Step: 9007000. Mean Reward: 0.599263247863248. Std of Reward: 0.5978341270963672.
Step: 9008000. Mean Reward: 0.6104703947368422. Std of Reward: 0.5975704971997265.
Step: 9009000. Mean Reward: 0.5789122516556292. Std of Reward: 0.6041493943095523.
Step: 9010000. Mean Reward: 0.6463056027164685. Std of Reward: 0.568650362068

Step: 9098000. Mean Reward: 0.6159240310077518. Std of Reward: 0.5956210169051288.
Step: 9099000. Mean Reward: 0.6102003231017771. Std of Reward: 0.6042337156547811.
Step: 9100000. Mean Reward: 0.6473117932148628. Std of Reward: 0.5828819115543914.
Saved Model
Step: 9101000. Mean Reward: 0.5646897106109324. Std of Reward: 0.6257866626326061.
Step: 9102000. Mean Reward: 0.5961693290734824. Std of Reward: 0.612404713917181.
Step: 9103000. Mean Reward: 0.58788125. Std of Reward: 0.6135734916441856.
Step: 9104000. Mean Reward: 0.6027524590163934. Std of Reward: 0.6069361832406601.
Step: 9105000. Mean Reward: 0.6022969004893963. Std of Reward: 0.605475801422194.
Step: 9106000. Mean Reward: 0.6281909385113268. Std of Reward: 0.5932468112653161.
Step: 9107000. Mean Reward: 0.6195031545741325. Std of Reward: 0.5998161501892922.
Step: 9108000. Mean Reward: 0.6184788732394366. Std of Reward: 0.5942647295629164.
Step: 9109000. Mean Reward: 0.6045815831987076. Std of Reward: 0.6063021456414683.
St

Step: 9197000. Mean Reward: 0.5932624584717608. Std of Reward: 0.6072227208995725.
Step: 9198000. Mean Reward: 0.6246140651801029. Std of Reward: 0.5873565605919783.
Step: 9199000. Mean Reward: 0.5842873949579831. Std of Reward: 0.6111896401508026.
Step: 9200000. Mean Reward: 0.5580339558573855. Std of Reward: 0.6174494794134857.
Saved Model
Step: 9201000. Mean Reward: 0.6155252525252526. Std of Reward: 0.5933302322601007.
Step: 9202000. Mean Reward: 0.6181374795417349. Std of Reward: 0.5916089942256111.
Step: 9203000. Mean Reward: 0.6396508196721312. Std of Reward: 0.5795768419959657.
Step: 9204000. Mean Reward: 0.605457429048414. Std of Reward: 0.6062656853581653.
Step: 9205000. Mean Reward: 0.5706710310965629. Std of Reward: 0.6165011072039659.
Step: 9206000. Mean Reward: 0.6089708265802268. Std of Reward: 0.6006265926922264.
Step: 9207000. Mean Reward: 0.6013043478260869. Std of Reward: 0.5970178742341049.
Step: 9208000. Mean Reward: 0.6315026086956521. Std of Reward: 0.58320420429

Step: 9296000. Mean Reward: 0.6267453310696094. Std of Reward: 0.590175899363551.
Step: 9297000. Mean Reward: 0.595610197368421. Std of Reward: 0.6039534889409187.
Step: 9298000. Mean Reward: 0.629897689768977. Std of Reward: 0.5837054527235609.
Step: 9299000. Mean Reward: 0.6206468646864686. Std of Reward: 0.5870126536059254.
Step: 9300000. Mean Reward: 0.6189123376623377. Std of Reward: 0.5941531860968859.
Saved Model
Step: 9301000. Mean Reward: 0.5835117845117845. Std of Reward: 0.6121582320279426.
Step: 9302000. Mean Reward: 0.6221687898089172. Std of Reward: 0.590476079255054.
Step: 9303000. Mean Reward: 0.6194233333333333. Std of Reward: 0.5875224172649831.
Step: 9304000. Mean Reward: 0.6037328881469115. Std of Reward: 0.5944024865050994.
Step: 9305000. Mean Reward: 0.6167416267942584. Std of Reward: 0.5976083195356426.
Step: 9306000. Mean Reward: 0.607349025974026. Std of Reward: 0.5940994412032486.
Step: 9307000. Mean Reward: 0.6384058441558442. Std of Reward: 0.580893328689954

Step: 9395000. Mean Reward: 0.6320872483221476. Std of Reward: 0.5869636913285743.
Step: 9396000. Mean Reward: 0.5986016666666666. Std of Reward: 0.5985529018646184.
Step: 9397000. Mean Reward: 0.5574685990338164. Std of Reward: 0.6330631163157605.
Step: 9398000. Mean Reward: 0.5616218487394957. Std of Reward: 0.6197189765876148.
Step: 9399000. Mean Reward: 0.6043011647254575. Std of Reward: 0.6057687522314169.
Step: 9400000. Mean Reward: 0.6361715210355987. Std of Reward: 0.5833576229137682.
Saved Model
Step: 9401000. Mean Reward: 0.6365073409461663. Std of Reward: 0.5778883329097048.
Step: 9402000. Mean Reward: 0.635499176276771. Std of Reward: 0.581974245694524.
Step: 9403000. Mean Reward: 0.6283092621664049. Std of Reward: 0.5844017967861638.
Step: 9404000. Mean Reward: 0.6081483979763912. Std of Reward: 0.5983496455942481.
Step: 9405000. Mean Reward: 0.6532010135135135. Std of Reward: 0.5726814581414111.
Step: 9406000. Mean Reward: 0.6230162601626016. Std of Reward: 0.584996195995

Step: 9494000. Mean Reward: 0.6487406199021207. Std of Reward: 0.5712926839758926.
Step: 9495000. Mean Reward: 0.5814870129870129. Std of Reward: 0.6107922121758873.
Step: 9496000. Mean Reward: 0.6142803278688524. Std of Reward: 0.5967886836156461.
Step: 9497000. Mean Reward: 0.6546911764705883. Std of Reward: 0.5631903122663262.
Step: 9498000. Mean Reward: 0.6566099173553719. Std of Reward: 0.5692191751189815.
Step: 9499000. Mean Reward: 0.6331094276094276. Std of Reward: 0.5786231566277552.
Step: 9500000. Mean Reward: 0.6592808988764044. Std of Reward: 0.5705994170796038.
Saved Model
Step: 9501000. Mean Reward: 0.6399528619528618. Std of Reward: 0.582750532357706.
Step: 9502000. Mean Reward: 0.6384605263157894. Std of Reward: 0.5696572536181059.
Step: 9503000. Mean Reward: 0.6383434343434343. Std of Reward: 0.5834017130764432.
Step: 9504000. Mean Reward: 0.6324474576271186. Std of Reward: 0.5781623751032765.
Step: 9505000. Mean Reward: 0.648107973421927. Std of Reward: 0.565344868701

Step: 9593000. Mean Reward: 0.605. Std of Reward: 0.600744768576993.
Step: 9594000. Mean Reward: 0.6429469320066336. Std of Reward: 0.571652476945319.
Step: 9595000. Mean Reward: 0.6625906821963394. Std of Reward: 0.5589191067586323.
Step: 9596000. Mean Reward: 0.6257626527050612. Std of Reward: 0.5788783999735738.
Step: 9597000. Mean Reward: 0.635158940397351. Std of Reward: 0.5785055002424763.
Step: 9598000. Mean Reward: 0.5985728813559322. Std of Reward: 0.6068323482854191.
Step: 9599000. Mean Reward: 0.6371708126036484. Std of Reward: 0.572615177615524.
Step: 9600000. Mean Reward: 0.6685889464594128. Std of Reward: 0.5557672599837764.
Saved Model
Step: 9601000. Mean Reward: 0.6266126279863481. Std of Reward: 0.584380946435848.
Step: 9602000. Mean Reward: 0.6768126036484245. Std of Reward: 0.5480138052276482.
Step: 9603000. Mean Reward: 0.6339795570698467. Std of Reward: 0.5824537500625989.
Step: 9604000. Mean Reward: 0.6532656514382403. Std of Reward: 0.5562621861897888.
Step: 9605

Step: 9693000. Mean Reward: 0.6296036484245439. Std of Reward: 0.5838613364083346.
Step: 9694000. Mean Reward: 0.6254557595993323. Std of Reward: 0.5876136746881547.
Step: 9695000. Mean Reward: 0.5798659966499161. Std of Reward: 0.6124749655154369.
Step: 9696000. Mean Reward: 0.5766957983193277. Std of Reward: 0.6171573433517049.
Step: 9697000. Mean Reward: 0.662044982698962. Std of Reward: 0.5705336463129697.
Step: 9698000. Mean Reward: 0.5787986906710311. Std of Reward: 0.6092848001204856.
Step: 9699000. Mean Reward: 0.6204292763157895. Std of Reward: 0.5927982884486377.
Step: 9700000. Mean Reward: 0.6561492537313434. Std of Reward: 0.5701289088865774.
Saved Model
Step: 9701000. Mean Reward: 0.615152027027027. Std of Reward: 0.5935316117824563.
Step: 9702000. Mean Reward: 0.6358856209150326. Std of Reward: 0.5821786470779423.
Step: 9703000. Mean Reward: 0.6422142857142856. Std of Reward: 0.5743890972450761.
Step: 9704000. Mean Reward: 0.6124186046511628. Std of Reward: 0.600529285528

Step: 9792000. Mean Reward: 0.6000229132569559. Std of Reward: 0.5957669329759075.
Step: 9793000. Mean Reward: 0.6283338983050847. Std of Reward: 0.5871140870595648.
Step: 9794000. Mean Reward: 0.6150253378378379. Std of Reward: 0.5923000897496584.
Step: 9795000. Mean Reward: 0.6247403508771929. Std of Reward: 0.5794754215333731.
Step: 9796000. Mean Reward: 0.633358361774744. Std of Reward: 0.5847231379271307.
Step: 9797000. Mean Reward: 0.6404087837837837. Std of Reward: 0.5756269541363144.
Step: 9798000. Mean Reward: 0.6519534482758621. Std of Reward: 0.5701290957002383.
Step: 9799000. Mean Reward: 0.6334636678200692. Std of Reward: 0.5796866946074286.
Step: 9800000. Mean Reward: 0.6119546218487395. Std of Reward: 0.5884129427690509.
Saved Model
Step: 9801000. Mean Reward: 0.6474822335025381. Std of Reward: 0.5665068843560165.
Step: 9802000. Mean Reward: 0.6433339011925042. Std of Reward: 0.5680416439783612.
Step: 9803000. Mean Reward: 0.6627762237762237. Std of Reward: 0.55615200064

Step: 9892000. Mean Reward: 0.6172845117845118. Std of Reward: 0.5930589493208435.
Step: 9893000. Mean Reward: 0.6602844974446337. Std of Reward: 0.5648158034297233.
Step: 9894000. Mean Reward: 0.641601652892562. Std of Reward: 0.569108636085141.
Step: 9895000. Mean Reward: 0.6409627118644068. Std of Reward: 0.5754723294819877.
Step: 9896000. Mean Reward: 0.6313101160862356. Std of Reward: 0.5863231227962321.
Step: 9897000. Mean Reward: 0.6274888888888889. Std of Reward: 0.5838816795280608.
Step: 9898000. Mean Reward: 0.618. Std of Reward: 0.590617118756543.
Step: 9899000. Mean Reward: 0.6325581787521078. Std of Reward: 0.572805901459445.
Step: 9900000. Mean Reward: 0.6357309027777778. Std of Reward: 0.5776862122972571.
Saved Model
Step: 9901000. Mean Reward: 0.6395966386554621. Std of Reward: 0.5785980291328622.
Step: 9902000. Mean Reward: 0.6259898132427844. Std of Reward: 0.5859032322144332.
Step: 9903000. Mean Reward: 0.6552412060301508. Std of Reward: 0.5707588783922131.
Step: 990

Step: 9991000. Mean Reward: 0.6355393835616437. Std of Reward: 0.5758850411676933.
Step: 9992000. Mean Reward: 0.6221819699499166. Std of Reward: 0.5948218166796865.
Step: 9993000. Mean Reward: 0.5867512355848435. Std of Reward: 0.6074073558507803.
Step: 9994000. Mean Reward: 0.5998341625207296. Std of Reward: 0.607425215204928.
Step: 9995000. Mean Reward: 0.6302802013422819. Std of Reward: 0.5873832772902442.
Step: 9996000. Mean Reward: 0.5815927750410509. Std of Reward: 0.6102923160380798.
Step: 9997000. Mean Reward: 0.5936133333333333. Std of Reward: 0.6116353383802767.
Step: 9998000. Mean Reward: 0.6399013377926421. Std of Reward: 0.5802620691214329.
Step: 9999000. Mean Reward: 0.6522516891891893. Std of Reward: 0.5745267728018125.
Step: 10000000. Mean Reward: 0.618471088435374. Std of Reward: 0.5953641692796776.
Saved Model
Step: 10001000. Mean Reward: 0.6263186991869918. Std of Reward: 0.5860600395655241.
Step: 10002000. Mean Reward: 0.639763458401305. Std of Reward: 0.5816393363

Step: 10089000. Mean Reward: 0.6036806722689075. Std of Reward: 0.6067819999874681.
Step: 10090000. Mean Reward: 0.6476369426751593. Std of Reward: 0.5793627171283082.
Step: 10091000. Mean Reward: 0.5941644518272425. Std of Reward: 0.6087277276233796.
Step: 10092000. Mean Reward: 0.6467675941080197. Std of Reward: 0.5751850808633083.
Step: 10093000. Mean Reward: 0.625664406779661. Std of Reward: 0.5839064667851022.
Step: 10094000. Mean Reward: 0.6511268533772652. Std of Reward: 0.5725211782623958.
Step: 10095000. Mean Reward: 0.6169216354344124. Std of Reward: 0.5886176228710751.
Step: 10096000. Mean Reward: 0.6167444253859349. Std of Reward: 0.5900641982100189.
Step: 10097000. Mean Reward: 0.5973556280587276. Std of Reward: 0.6023310705526933.
Step: 10098000. Mean Reward: 0.6333801652892561. Std of Reward: 0.5881284232641816.
Step: 10099000. Mean Reward: 0.6506638655462185. Std of Reward: 0.5720283749986053.
Step: 10100000. Mean Reward: 0.6299855072463768. Std of Reward: 0.58252551777

Step: 10187000. Mean Reward: 0.6523039999999999. Std of Reward: 0.5725102398944495.
Step: 10188000. Mean Reward: 0.5997582236842106. Std of Reward: 0.6081296599579147.
Step: 10189000. Mean Reward: 0.6332608695652174. Std of Reward: 0.5796967655367984.
Step: 10190000. Mean Reward: 0.6090086355785838. Std of Reward: 0.599169282374331.
Step: 10191000. Mean Reward: 0.6299523809523809. Std of Reward: 0.5809376708143296.
Step: 10192000. Mean Reward: 0.6093016393442623. Std of Reward: 0.5979319037087103.
Step: 10193000. Mean Reward: 0.6045140495867769. Std of Reward: 0.6080882998225846.
Step: 10194000. Mean Reward: 0.6645073170731708. Std of Reward: 0.5644715029022607.
Step: 10195000. Mean Reward: 0.6352599337748345. Std of Reward: 0.5790726089256361.
Step: 10196000. Mean Reward: 0.5925237288135593. Std of Reward: 0.6049149305197855.
Step: 10197000. Mean Reward: 0.6129870340356564. Std of Reward: 0.5911652557765842.
Step: 10198000. Mean Reward: 0.6054187898089172. Std of Reward: 0.60702270817

Step: 10285000. Mean Reward: 0.6550834782608695. Std of Reward: 0.568754841344774.
Step: 10286000. Mean Reward: 0.6267435008665511. Std of Reward: 0.5811428411321802.
Step: 10287000. Mean Reward: 0.6431962774957698. Std of Reward: 0.5734104892873182.
Step: 10288000. Mean Reward: 0.6283829059829059. Std of Reward: 0.5745004760422325.
Step: 10289000. Mean Reward: 0.6425225694444444. Std of Reward: 0.5684801003470747.
Step: 10290000. Mean Reward: 0.6346601769911504. Std of Reward: 0.5757915133285725.
Step: 10291000. Mean Reward: 0.6179560632688927. Std of Reward: 0.5862310974788747.
Step: 10292000. Mean Reward: 0.6488040540540541. Std of Reward: 0.5732616220198784.
Step: 10293000. Mean Reward: 0.6151867768595041. Std of Reward: 0.5864367507541337.
Step: 10294000. Mean Reward: 0.6425381944444444. Std of Reward: 0.573440068734365.
Step: 10295000. Mean Reward: 0.6543116438356165. Std of Reward: 0.5669677717895284.
Step: 10296000. Mean Reward: 0.5982652388797365. Std of Reward: 0.599852376535

Step: 10383000. Mean Reward: 0.6188780068728522. Std of Reward: 0.5894222669945691.
Step: 10384000. Mean Reward: 0.6530560271646859. Std of Reward: 0.5692645050038002.
Step: 10385000. Mean Reward: 0.604401680672269. Std of Reward: 0.5866134240203447.
Step: 10386000. Mean Reward: 0.632446735395189. Std of Reward: 0.5861858393275753.
Step: 10387000. Mean Reward: 0.615710569105691. Std of Reward: 0.5879145069219415.
Step: 10388000. Mean Reward: 0.63623322147651. Std of Reward: 0.5788117175149231.
Step: 10389000. Mean Reward: 0.6325899653979239. Std of Reward: 0.5838035999292144.
Step: 10390000. Mean Reward: 0.6205922671353251. Std of Reward: 0.5753239652898735.
Step: 10391000. Mean Reward: 0.6624307432432432. Std of Reward: 0.5626638907796424.
Step: 10392000. Mean Reward: 0.6079918962722852. Std of Reward: 0.6000714584128732.
Step: 10393000. Mean Reward: 0.6319549393414212. Std of Reward: 0.5855871734059572.
Step: 10394000. Mean Reward: 0.6171723549488055. Std of Reward: 0.586324136018697

Step: 10481000. Mean Reward: 0.6745289115646258. Std of Reward: 0.548798766305843.
Step: 10482000. Mean Reward: 0.6289262295081967. Std of Reward: 0.5767346005537967.
Step: 10483000. Mean Reward: 0.6205324232081911. Std of Reward: 0.5844559383129593.
Step: 10484000. Mean Reward: 0.6067080536912752. Std of Reward: 0.5946483440003733.
Step: 10485000. Mean Reward: 0.6476282722513089. Std of Reward: 0.5745885481710683.
Step: 10486000. Mean Reward: 0.6134819897084048. Std of Reward: 0.5938223590443655.
Step: 10487000. Mean Reward: 0.6591170568561873. Std of Reward: 0.5540221972027336.
Step: 10488000. Mean Reward: 0.6286301369863013. Std of Reward: 0.5823921649106351.
Step: 10489000. Mean Reward: 0.6727077464788732. Std of Reward: 0.5484577183459374.
Step: 10490000. Mean Reward: 0.6531398601398601. Std of Reward: 0.5649345942883625.
Step: 10491000. Mean Reward: 0.6194585448392556. Std of Reward: 0.5862384676952426.
Step: 10492000. Mean Reward: 0.6226841216216217. Std of Reward: 0.57966044046

Step: 10579000. Mean Reward: 0.6593650519031141. Std of Reward: 0.5563317168685024.
Step: 10580000. Mean Reward: 0.6528904593639576. Std of Reward: 0.5547364274156774.
Step: 10581000. Mean Reward: 0.7036817391304347. Std of Reward: 0.5231231142121923.
Step: 10582000. Mean Reward: 0.6632535460992908. Std of Reward: 0.5553017018167842.
Step: 10583000. Mean Reward: 0.661432387312187. Std of Reward: 0.5579587574165076.
Step: 10584000. Mean Reward: 0.657090909090909. Std of Reward: 0.5634889449221083.
Step: 10585000. Mean Reward: 0.6559092526690391. Std of Reward: 0.5656570996623077.
Step: 10586000. Mean Reward: 0.6366955074875208. Std of Reward: 0.5700557600041659.
Step: 10587000. Mean Reward: 0.6249522123893806. Std of Reward: 0.5773901821949394.
Step: 10588000. Mean Reward: 0.6318547008547009. Std of Reward: 0.5750331817249675.
Step: 10589000. Mean Reward: 0.6595389830508475. Std of Reward: 0.5617548270984926.
Step: 10590000. Mean Reward: 0.6482907801418439. Std of Reward: 0.568370506332

Step: 10677000. Mean Reward: 0.6274363327674023. Std of Reward: 0.5786572696853693.
Step: 10678000. Mean Reward: 0.6620980392156863. Std of Reward: 0.566417384785288.
Step: 10679000. Mean Reward: 0.5861748366013072. Std of Reward: 0.6121091740864454.
Step: 10680000. Mean Reward: 0.6509932998324958. Std of Reward: 0.563802922392649.
Step: 10681000. Mean Reward: 0.6518681506849315. Std of Reward: 0.5681730256425528.
Step: 10682000. Mean Reward: 0.6493596774193549. Std of Reward: 0.5679606398106206.
Step: 10683000. Mean Reward: 0.6559429967426711. Std of Reward: 0.5689246542583134.
Step: 10684000. Mean Reward: 0.6401945337620579. Std of Reward: 0.5843119369382905.
Step: 10685000. Mean Reward: 0.6487775891341256. Std of Reward: 0.5645166718304193.
Step: 10686000. Mean Reward: 0.6457893835616438. Std of Reward: 0.575400341713016.
Step: 10687000. Mean Reward: 0.6474713584288053. Std of Reward: 0.5754237124522487.
Step: 10688000. Mean Reward: 0.651175468483816. Std of Reward: 0.57373546149161

Step: 10775000. Mean Reward: 0.605424040066778. Std of Reward: 0.5942072598028197.
Step: 10776000. Mean Reward: 0.6581245614035087. Std of Reward: 0.5603586103592547.
Step: 10777000. Mean Reward: 0.6141756756756758. Std of Reward: 0.5915323060814658.
Step: 10778000. Mean Reward: 0.6110896785109983. Std of Reward: 0.5976084178297522.
Step: 10779000. Mean Reward: 0.6275159128978225. Std of Reward: 0.5769162710048399.
Step: 10780000. Mean Reward: 0.6369930795847751. Std of Reward: 0.5807856836499421.
Step: 10781000. Mean Reward: 0.6483062605752961. Std of Reward: 0.5773490464466763.
Step: 10782000. Mean Reward: 0.6084619289340102. Std of Reward: 0.5908764359784431.
Step: 10783000. Mean Reward: 0.6518048780487804. Std of Reward: 0.5728857329107789.
Step: 10784000. Mean Reward: 0.6402546689303905. Std of Reward: 0.5710428577002721.
Step: 10785000. Mean Reward: 0.648706390328152. Std of Reward: 0.5717651215384412.
Step: 10786000. Mean Reward: 0.6484229452054794. Std of Reward: 0.575136160966

Step: 10873000. Mean Reward: 0.6282683760683762. Std of Reward: 0.5748917056919775.
Step: 10874000. Mean Reward: 0.6053568904593639. Std of Reward: 0.591826963997681.
Step: 10875000. Mean Reward: 0.5899489436619718. Std of Reward: 0.5969715523918048.
Step: 10876000. Mean Reward: 0.5954756756756756. Std of Reward: 0.5904941043280323.
Step: 10877000. Mean Reward: 0.6108765880217786. Std of Reward: 0.5861467913368877.
Step: 10878000. Mean Reward: 0.6241492805755395. Std of Reward: 0.5739174210046041.
Step: 10879000. Mean Reward: 0.6483390410958904. Std of Reward: 0.5714723726676278.
Step: 10880000. Mean Reward: 0.6155492957746479. Std of Reward: 0.5914507358369243.
Step: 10881000. Mean Reward: 0.6646325503355704. Std of Reward: 0.552068283954557.
Step: 10882000. Mean Reward: 0.6443873873873873. Std of Reward: 0.5630318342042556.
Step: 10883000. Mean Reward: 0.6018738738738738. Std of Reward: 0.5908474463115847.
Step: 10884000. Mean Reward: 0.6220376344086022. Std of Reward: 0.576202165788

Step: 10971000. Mean Reward: 0.6455567010309279. Std of Reward: 0.5747612856175918.
Step: 10972000. Mean Reward: 0.6106113902847571. Std of Reward: 0.588998675951204.
Step: 10973000. Mean Reward: 0.6307169811320754. Std of Reward: 0.5768255416433006.
Step: 10974000. Mean Reward: 0.6434050847457626. Std of Reward: 0.5761706288361699.
Step: 10975000. Mean Reward: 0.6531340206185567. Std of Reward: 0.5554478387986604.
Step: 10976000. Mean Reward: 0.6681448040885861. Std of Reward: 0.5543364261979995.
Step: 10977000. Mean Reward: 0.6542852404643449. Std of Reward: 0.5622848362225658.
Step: 10978000. Mean Reward: 0.6372176870748298. Std of Reward: 0.5740029926799465.
Step: 10979000. Mean Reward: 0.6799216354344123. Std of Reward: 0.5551273905630788.
Step: 10980000. Mean Reward: 0.640885521885522. Std of Reward: 0.5714877333058023.
Step: 10981000. Mean Reward: 0.64380276816609. Std of Reward: 0.5703262732729545.
Step: 10982000. Mean Reward: 0.5834680134680135. Std of Reward: 0.60980409435215

Step: 11069000. Mean Reward: 0.6500033840947548. Std of Reward: 0.5597846969379765.
Step: 11070000. Mean Reward: 0.637459234608985. Std of Reward: 0.5816599878934596.
Step: 11071000. Mean Reward: 0.6145238907849829. Std of Reward: 0.5958193918653083.
Step: 11072000. Mean Reward: 0.6461317957166393. Std of Reward: 0.5713693016261568.
Step: 11073000. Mean Reward: 0.6533147208121828. Std of Reward: 0.5704666974475064.
Step: 11074000. Mean Reward: 0.63817089678511. Std of Reward: 0.5698715171776855.
Step: 11075000. Mean Reward: 0.6135795847750866. Std of Reward: 0.5939073868463774.
Step: 11076000. Mean Reward: 0.6896473684210526. Std of Reward: 0.5402402261649525.
Step: 11077000. Mean Reward: 0.6362755632582322. Std of Reward: 0.569673312665845.
Step: 11078000. Mean Reward: 0.6346621848739497. Std of Reward: 0.5800249184724806.
Step: 11079000. Mean Reward: 0.6551655737704918. Std of Reward: 0.5634668132503519.
Step: 11080000. Mean Reward: 0.679536013400335. Std of Reward: 0.553191372188842

Step: 11167000. Mean Reward: 0.6607719928186714. Std of Reward: 0.5545358128753983.
Step: 11168000. Mean Reward: 0.6524457831325302. Std of Reward: 0.5575811812988961.
Step: 11169000. Mean Reward: 0.6698375451263539. Std of Reward: 0.5495478406873071.
Step: 11170000. Mean Reward: 0.617859402460457. Std of Reward: 0.5841694700063641.
Step: 11171000. Mean Reward: 0.700536028119508. Std of Reward: 0.5220507478436544.
Step: 11172000. Mean Reward: 0.6725528756957329. Std of Reward: 0.5371655549968117.
Step: 11173000. Mean Reward: 0.627523725834798. Std of Reward: 0.5733214422561024.
Step: 11174000. Mean Reward: 0.6706575809199319. Std of Reward: 0.5551896681201243.
Step: 11175000. Mean Reward: 0.6436347517730496. Std of Reward: 0.5678635536199721.
Step: 11176000. Mean Reward: 0.6427200704225351. Std of Reward: 0.5584883087456969.
Step: 11177000. Mean Reward: 0.6601996466431096. Std of Reward: 0.5616927256888306.
Step: 11178000. Mean Reward: 0.6174504347826086. Std of Reward: 0.5801234176597

Step: 11265000. Mean Reward: 0.6213327645051194. Std of Reward: 0.5812950266767428.
Step: 11266000. Mean Reward: 0.6143842281879195. Std of Reward: 0.5956756963773204.
Step: 11267000. Mean Reward: 0.6365143824027072. Std of Reward: 0.58190564708658.
Step: 11268000. Mean Reward: 0.636764802631579. Std of Reward: 0.576356265699563.
Step: 11269000. Mean Reward: 0.6349252173913044. Std of Reward: 0.5819788428611441.
Step: 11270000. Mean Reward: 0.621874149659864. Std of Reward: 0.5777283991715406.
Step: 11271000. Mean Reward: 0.6593601398601399. Std of Reward: 0.562155348665782.
Step: 11272000. Mean Reward: 0.6109453924914676. Std of Reward: 0.5923225562730978.
Step: 11273000. Mean Reward: 0.6212879858657244. Std of Reward: 0.5852710375391579.
Step: 11274000. Mean Reward: 0.6456498257839721. Std of Reward: 0.574238709203806.
Step: 11275000. Mean Reward: 0.652288256227758. Std of Reward: 0.562408365493781.
Step: 11276000. Mean Reward: 0.6783057553956835. Std of Reward: 0.5364041950530145.
S

Step: 11363000. Mean Reward: 0.6343098827470687. Std of Reward: 0.5795059954651489.
Step: 11364000. Mean Reward: 0.6903852596314908. Std of Reward: 0.5315433005745972.
Step: 11365000. Mean Reward: 0.6198023255813954. Std of Reward: 0.5869757043831711.
Step: 11366000. Mean Reward: 0.6670479704797048. Std of Reward: 0.5481417598055676.
Step: 11367000. Mean Reward: 0.6433689655172413. Std of Reward: 0.5699165683667478.
Step: 11368000. Mean Reward: 0.6299964664310954. Std of Reward: 0.5742024334818594.
Step: 11369000. Mean Reward: 0.6953496621621621. Std of Reward: 0.534578911418763.
Step: 11370000. Mean Reward: 0.6595459272097055. Std of Reward: 0.5625189402286777.
Step: 11371000. Mean Reward: 0.6636783216783217. Std of Reward: 0.5609533013658687.
Step: 11372000. Mean Reward: 0.6880643478260869. Std of Reward: 0.5287804189940577.
Step: 11373000. Mean Reward: 0.6542787456445993. Std of Reward: 0.5622537918787657.
Step: 11374000. Mean Reward: 0.6628373702422146. Std of Reward: 0.55949414670

Step: 11461000. Mean Reward: 0.6013354735152489. Std of Reward: 0.6014725174296583.
Step: 11462000. Mean Reward: 0.6426984924623115. Std of Reward: 0.5749575466501754.
Step: 11463000. Mean Reward: 0.6471794019933554. Std of Reward: 0.5723724257234497.
Step: 11464000. Mean Reward: 0.6352666666666666. Std of Reward: 0.5886972640723646.
Step: 11465000. Mean Reward: 0.6778142620232173. Std of Reward: 0.5566293597254542.
Step: 11466000. Mean Reward: 0.6240284280936454. Std of Reward: 0.5850057463581969.
Step: 11467000. Mean Reward: 0.6527089041095889. Std of Reward: 0.5707418755890038.
Step: 11468000. Mean Reward: 0.6236006493506494. Std of Reward: 0.5906847333014648.
Step: 11469000. Mean Reward: 0.6298745762711864. Std of Reward: 0.5807932867174059.
Step: 11470000. Mean Reward: 0.6380236486486486. Std of Reward: 0.587779694303347.
Step: 11471000. Mean Reward: 0.6154144736842104. Std of Reward: 0.5998145266409107.
Step: 11472000. Mean Reward: 0.638212947189097. Std of Reward: 0.577760910893

Step: 11559000. Mean Reward: 0.6221746293245469. Std of Reward: 0.5913478356687107.
Step: 11560000. Mean Reward: 0.6297980456026059. Std of Reward: 0.5861127629908819.
Step: 11561000. Mean Reward: 0.6061437802907916. Std of Reward: 0.6059724293588447.
Step: 11562000. Mean Reward: 0.6239495934959349. Std of Reward: 0.5915494954686933.
Step: 11563000. Mean Reward: 0.6433677966101695. Std of Reward: 0.5717300316866635.
Step: 11564000. Mean Reward: 0.628. Std of Reward: 0.5859222578172283.
Step: 11565000. Mean Reward: 0.6238659127625202. Std of Reward: 0.5862059878671001.
Step: 11566000. Mean Reward: 0.6072472089314194. Std of Reward: 0.6050983400148287.
Step: 11567000. Mean Reward: 0.5988809135399674. Std of Reward: 0.6045886691806359.
Step: 11568000. Mean Reward: 0.6124230769230768. Std of Reward: 0.5998426604760513.
Step: 11569000. Mean Reward: 0.594418487394958. Std of Reward: 0.604983281658913.
Step: 11570000. Mean Reward: 0.6664194078947369. Std of Reward: 0.5584270231779966.
Step: 1

Step: 11658000. Mean Reward: 0.6775422297297298. Std of Reward: 0.5432957872152508.
Step: 11659000. Mean Reward: 0.6139052287581699. Std of Reward: 0.5944635337673758.
Step: 11660000. Mean Reward: 0.6508286189683861. Std of Reward: 0.5740890452358617.
Step: 11661000. Mean Reward: 0.6656314847942755. Std of Reward: 0.5518226622755145.
Step: 11662000. Mean Reward: 0.6306973244147156. Std of Reward: 0.5787514849200727.
Step: 11663000. Mean Reward: 0.655018302828619. Std of Reward: 0.568540035275965.
Step: 11664000. Mean Reward: 0.6183147540983607. Std of Reward: 0.592856385959096.
Step: 11665000. Mean Reward: 0.6448376623376624. Std of Reward: 0.5853265984843304.
Step: 11666000. Mean Reward: 0.616039408866995. Std of Reward: 0.5953455745420001.
Step: 11667000. Mean Reward: 0.6074659090909093. Std of Reward: 0.6001878430039699.
Step: 11668000. Mean Reward: 0.6666841216216216. Std of Reward: 0.5575049023095926.
Step: 11669000. Mean Reward: 0.6402966666666666. Std of Reward: 0.57412084267067

Step: 11756000. Mean Reward: 0.6432924836601307. Std of Reward: 0.5686637223871336.
Step: 11757000. Mean Reward: 0.6928827470686766. Std of Reward: 0.543853684802131.
Step: 11758000. Mean Reward: 0.6446188118811881. Std of Reward: 0.573032387454545.
Step: 11759000. Mean Reward: 0.6223963210702341. Std of Reward: 0.585905012836324.
Step: 11760000. Mean Reward: 0.6404564102564102. Std of Reward: 0.5841250380404406.
Step: 11761000. Mean Reward: 0.6542433774834437. Std of Reward: 0.5677537639353567.
Step: 11762000. Mean Reward: 0.6581388429752065. Std of Reward: 0.5662545179243479.
Step: 11763000. Mean Reward: 0.6656284779050736. Std of Reward: 0.5557381029388538.
Step: 11764000. Mean Reward: 0.6427772194304857. Std of Reward: 0.5747944376133057.
Step: 11765000. Mean Reward: 0.6376323777403036. Std of Reward: 0.5782737348424881.
Step: 11766000. Mean Reward: 0.672443143812709. Std of Reward: 0.5446875321107866.
Step: 11767000. Mean Reward: 0.67640625. Std of Reward: 0.5521847185798967.
Step

Step: 11854000. Mean Reward: 0.6169847972972973. Std of Reward: 0.5965185324582759.
Step: 11855000. Mean Reward: 0.612274873524452. Std of Reward: 0.5845394465555364.
Step: 11856000. Mean Reward: 0.6276256499133449. Std of Reward: 0.5862519837563024.
Step: 11857000. Mean Reward: 0.6077538726333908. Std of Reward: 0.5875777310481815.
Step: 11858000. Mean Reward: 0.6053056537102472. Std of Reward: 0.5915007715629415.
Step: 11859000. Mean Reward: 0.6594715302491103. Std of Reward: 0.5648466740662532.
Step: 11860000. Mean Reward: 0.6067504332755633. Std of Reward: 0.5925293081730273.
Step: 11861000. Mean Reward: 0.6167244897959184. Std of Reward: 0.5920604017600085.
Step: 11862000. Mean Reward: 0.6473368794326241. Std of Reward: 0.5666267768494041.
Step: 11863000. Mean Reward: 0.6113880597014926. Std of Reward: 0.582301613835994.
Step: 11864000. Mean Reward: 0.6295354609929078. Std of Reward: 0.5783817027989292.
Step: 11865000. Mean Reward: 0.5786323777403035. Std of Reward: 0.607461377558

Step: 11952000. Mean Reward: 0.6299775086505189. Std of Reward: 0.5805045960191267.
Step: 11953000. Mean Reward: 0.6300584192439863. Std of Reward: 0.5800191163068249.
Step: 11954000. Mean Reward: 0.6359487179487179. Std of Reward: 0.5664791099286326.
Step: 11955000. Mean Reward: 0.620522033898305. Std of Reward: 0.5850641616466701.
Step: 11956000. Mean Reward: 0.6119173553719008. Std of Reward: 0.5972118716302837.
Step: 11957000. Mean Reward: 0.6360380794701987. Std of Reward: 0.5772674327790412.
Step: 11958000. Mean Reward: 0.6611635111876076. Std of Reward: 0.5666510634006399.
Step: 11959000. Mean Reward: 0.6443145695364239. Std of Reward: 0.5728618209888567.
Step: 11960000. Mean Reward: 0.6386174496644295. Std of Reward: 0.5808710896427516.
Step: 11961000. Mean Reward: 0.6028239316239316. Std of Reward: 0.5930301110311943.
Step: 11962000. Mean Reward: 0.6746839378238341. Std of Reward: 0.5452158913247669.
Step: 11963000. Mean Reward: 0.6775716783216783. Std of Reward: 0.55428968919

Step: 12050000. Mean Reward: 0.6186400000000001. Std of Reward: 0.5924805963630089.
Saved Model
Step: 12051000. Mean Reward: 0.6600187074829932. Std of Reward: 0.5567696075654035.
Step: 12052000. Mean Reward: 0.6337517006802721. Std of Reward: 0.5769155666130955.
Step: 12053000. Mean Reward: 0.6610976027397261. Std of Reward: 0.5522772151039913.
Step: 12054000. Mean Reward: 0.6544023178807947. Std of Reward: 0.5699588855230528.
Step: 12055000. Mean Reward: 0.5993050847457627. Std of Reward: 0.6077287852445558.
Step: 12056000. Mean Reward: 0.6611273666092944. Std of Reward: 0.5625089612212117.
Step: 12057000. Mean Reward: 0.6715110732538331. Std of Reward: 0.5580264730246163.
Step: 12058000. Mean Reward: 0.6172357859531772. Std of Reward: 0.5880013419918773.
Step: 12059000. Mean Reward: 0.625749169435216. Std of Reward: 0.5915687729789005.
Step: 12060000. Mean Reward: 0.6231318101933216. Std of Reward: 0.5824511974310076.
Step: 12061000. Mean Reward: 0.6619999999999999. Std of Reward: 0

Step: 12148000. Mean Reward: 0.6237973640856672. Std of Reward: 0.5893714716467663.
Step: 12149000. Mean Reward: 0.6358788395904437. Std of Reward: 0.5783784417134913.
Step: 12150000. Mean Reward: 0.5772023411371237. Std of Reward: 0.6147686740581491.
Saved Model
Step: 12151000. Mean Reward: 0.6790610169491524. Std of Reward: 0.5538979007114263.
Step: 12152000. Mean Reward: 0.6589931740614334. Std of Reward: 0.5667059960510675.
Step: 12153000. Mean Reward: 0.6294748322147651. Std of Reward: 0.5816388392288367.
Step: 12154000. Mean Reward: 0.651297161936561. Std of Reward: 0.5712706002176907.
Step: 12155000. Mean Reward: 0.66861. Std of Reward: 0.5585954927912207.
Step: 12156000. Mean Reward: 0.6541339130434782. Std of Reward: 0.5740701194691262.
Step: 12157000. Mean Reward: 0.6129133448873484. Std of Reward: 0.5875185246052442.
Step: 12158000. Mean Reward: 0.6369589743589743. Std of Reward: 0.572419574488039.
Step: 12159000. Mean Reward: 0.681253807106599. Std of Reward: 0.543617465795

Step: 12246000. Mean Reward: 0.6383087971274686. Std of Reward: 0.5761084747356937.
Step: 12247000. Mean Reward: 0.6278472222222222. Std of Reward: 0.5837857361110568.
Step: 12248000. Mean Reward: 0.6499527145359019. Std of Reward: 0.5662669593201383.
Step: 12249000. Mean Reward: 0.637. Std of Reward: 0.568278600675963.
Step: 12250000. Mean Reward: 0.6607975567190226. Std of Reward: 0.5647716190676786.
Saved Model
Step: 12251000. Mean Reward: 0.6668487972508591. Std of Reward: 0.5514633900890602.
Step: 12252000. Mean Reward: 0.6589283276450512. Std of Reward: 0.5501156362921416.
Step: 12253000. Mean Reward: 0.6552883642495784. Std of Reward: 0.5652556491416355.
Step: 12254000. Mean Reward: 0.6546122807017544. Std of Reward: 0.5611382156315561.
Step: 12255000. Mean Reward: 0.6468896797153025. Std of Reward: 0.5654894758172249.
Step: 12256000. Mean Reward: 0.6667140381282495. Std of Reward: 0.5596542350117041.
Step: 12257000. Mean Reward: 0.6252521588946459. Std of Reward: 0.576882667202

Step: 12345000. Mean Reward: 0.6697535714285715. Std of Reward: 0.5509653443743359.
Step: 12346000. Mean Reward: 0.6473253012048192. Std of Reward: 0.5681763948755316.
Step: 12347000. Mean Reward: 0.6631794425087107. Std of Reward: 0.5610134690002453.
Step: 12348000. Mean Reward: 0.6561137931034483. Std of Reward: 0.5602014546658641.
Step: 12349000. Mean Reward: 0.666368330464716. Std of Reward: 0.560455831165338.
Step: 12350000. Mean Reward: 0.6601272727272726. Std of Reward: 0.559585565950226.
Saved Model
Step: 12351000. Mean Reward: 0.6816458333333333. Std of Reward: 0.5358004813510083.
Step: 12352000. Mean Reward: 0.673766323024055. Std of Reward: 0.5532647654799023.
Step: 12353000. Mean Reward: 0.6112307692307692. Std of Reward: 0.5870209097255087.
Step: 12354000. Mean Reward: 0.6533785211267605. Std of Reward: 0.5618568304461019.
Step: 12355000. Mean Reward: 0.6734675767918088. Std of Reward: 0.5492248895708953.
Step: 12356000. Mean Reward: 0.6625872193436961. Std of Reward: 0.55

Step: 12443000. Mean Reward: 0.6720921985815603. Std of Reward: 0.5464640936821181.
Step: 12444000. Mean Reward: 0.6310329289428076. Std of Reward: 0.5762603283124234.
Step: 12445000. Mean Reward: 0.6470807560137457. Std of Reward: 0.5696916498885103.
Step: 12446000. Mean Reward: 0.6980053956834532. Std of Reward: 0.5225703783516618.
Step: 12447000. Mean Reward: 0.6379266547406083. Std of Reward: 0.5729521985400581.
Step: 12448000. Mean Reward: 0.643704152249135. Std of Reward: 0.5701216139462186.
Step: 12449000. Mean Reward: 0.6871622574955908. Std of Reward: 0.5401131259653422.
Step: 12450000. Mean Reward: 0.6562385964912281. Std of Reward: 0.555755937773012.
Saved Model
Step: 12451000. Mean Reward: 0.645362831858407. Std of Reward: 0.5683101088819971.
Step: 12452000. Mean Reward: 0.7033747927031508. Std of Reward: 0.5259950174379671.
Step: 12453000. Mean Reward: 0.6534598930481283. Std of Reward: 0.5593729959246205.
Step: 12454000. Mean Reward: 0.6354765342960288. Std of Reward: 0.5

Step: 12541000. Mean Reward: 0.6967867647058824. Std of Reward: 0.5363352072451606.
Step: 12542000. Mean Reward: 0.6153734729493893. Std of Reward: 0.5797886238117743.
Step: 12543000. Mean Reward: 0.6781569343065693. Std of Reward: 0.5474739744900082.
Step: 12544000. Mean Reward: 0.6616089285714285. Std of Reward: 0.5453537831984409.
Step: 12545000. Mean Reward: 0.6402955390334574. Std of Reward: 0.5722888565517065.
Step: 12546000. Mean Reward: 0.6531473880597015. Std of Reward: 0.5593809267281176.
Step: 12547000. Mean Reward: 0.649406474820144. Std of Reward: 0.5608278761979174.
Step: 12548000. Mean Reward: 0.6594746376811593. Std of Reward: 0.5559702865276888.
Step: 12549000. Mean Reward: 0.6763787085514835. Std of Reward: 0.533357550966705.
Step: 12550000. Mean Reward: 0.606295652173913. Std of Reward: 0.5940709337519573.
Saved Model
Step: 12551000. Mean Reward: 0.667613475177305. Std of Reward: 0.5520149418164624.
Step: 12552000. Mean Reward: 0.6583996383363472. Std of Reward: 0.55

Step: 12639000. Mean Reward: 0.6719572953736654. Std of Reward: 0.5509811678148675.
Step: 12640000. Mean Reward: 0.6474179894179894. Std of Reward: 0.5662194530602278.
Step: 12641000. Mean Reward: 0.6708327645051194. Std of Reward: 0.5491468182599126.
Step: 12642000. Mean Reward: 0.6487305699481865. Std of Reward: 0.5736424327978521.
Step: 12643000. Mean Reward: 0.6528611111111111. Std of Reward: 0.5605260006248981.
Step: 12644000. Mean Reward: 0.6557069271758437. Std of Reward: 0.5595337830980677.
Step: 12645000. Mean Reward: 0.6529190647482015. Std of Reward: 0.5638604273922594.
Step: 12646000. Mean Reward: 0.678068376068376. Std of Reward: 0.5432558939071754.
Step: 12647000. Mean Reward: 0.6797754749568221. Std of Reward: 0.5503613019776957.
Step: 12648000. Mean Reward: 0.6596672384219553. Std of Reward: 0.561286853713303.
Step: 12649000. Mean Reward: 0.6442236135957066. Std of Reward: 0.5730909711966199.
Step: 12650000. Mean Reward: 0.6686306306306306. Std of Reward: 0.550018724135

Step: 12737000. Mean Reward: 0.672171875. Std of Reward: 0.5480665890966028.
Step: 12738000. Mean Reward: 0.6656464471403812. Std of Reward: 0.5540919248063064.
Step: 12739000. Mean Reward: 0.6826134301270418. Std of Reward: 0.5419664247484883.
Step: 12740000. Mean Reward: 0.6401258865248226. Std of Reward: 0.5663760780719445.
Step: 12741000. Mean Reward: 0.6850466786355475. Std of Reward: 0.5350573901438016.
Step: 12742000. Mean Reward: 0.6493417721518988. Std of Reward: 0.5621344436517981.
Step: 12743000. Mean Reward: 0.6577379679144385. Std of Reward: 0.5526994014008728.
Step: 12744000. Mean Reward: 0.6651116928446772. Std of Reward: 0.5531093401394432.
Step: 12745000. Mean Reward: 0.6266577540106952. Std of Reward: 0.5734375225908208.
Step: 12746000. Mean Reward: 0.646. Std of Reward: 0.5628439826048142.
Step: 12747000. Mean Reward: 0.6248909426987062. Std of Reward: 0.5695910401212059.
Step: 12748000. Mean Reward: 0.6618551236749116. Std of Reward: 0.5489076079357251.
Step: 127490

Step: 12835000. Mean Reward: 0.6272248628884826. Std of Reward: 0.5752428870075398.
Step: 12836000. Mean Reward: 0.6688540145985401. Std of Reward: 0.5454596506737048.
Step: 12837000. Mean Reward: 0.6622065637065636. Std of Reward: 0.5474903022671807.
Step: 12838000. Mean Reward: 0.7242121771217712. Std of Reward: 0.499489600377153.
Step: 12839000. Mean Reward: 0.6562371323529411. Std of Reward: 0.547865162286907.
Step: 12840000. Mean Reward: 0.6784377358490566. Std of Reward: 0.5389811255232834.
Step: 12841000. Mean Reward: 0.6738085501858736. Std of Reward: 0.5389791936185108.
Step: 12842000. Mean Reward: 0.6428315602836878. Std of Reward: 0.562956848361776.
Step: 12843000. Mean Reward: 0.6814075403949731. Std of Reward: 0.5312987043693095.
Step: 12844000. Mean Reward: 0.6456703296703297. Std of Reward: 0.5594328182459987.
Step: 12845000. Mean Reward: 0.6795683060109289. Std of Reward: 0.5445486971664288.
Step: 12846000. Mean Reward: 0.6658561020036429. Std of Reward: 0.5401265412552

Step: 12933000. Mean Reward: 0.6423664383561644. Std of Reward: 0.5809399144628313.
Step: 12934000. Mean Reward: 0.654704584040747. Std of Reward: 0.5671934908151972.
Step: 12935000. Mean Reward: 0.6663373287671233. Std of Reward: 0.5648934718130261.
Step: 12936000. Mean Reward: 0.6432599653379549. Std of Reward: 0.575174763593225.
Step: 12937000. Mean Reward: 0.6464634146341464. Std of Reward: 0.5720593015416552.
Step: 12938000. Mean Reward: 0.6582587412587413. Std of Reward: 0.5589882620493073.
Step: 12939000. Mean Reward: 0.6929139784946237. Std of Reward: 0.5340664851134619.
Step: 12940000. Mean Reward: 0.6593057996485061. Std of Reward: 0.5696590303683846.
Step: 12941000. Mean Reward: 0.622946218487395. Std of Reward: 0.5867530399415568.
Step: 12942000. Mean Reward: 0.6425485519591142. Std of Reward: 0.5783713952222753.
Step: 12943000. Mean Reward: 0.6435901639344263. Std of Reward: 0.5725326757673177.
Step: 12944000. Mean Reward: 0.682296108291032. Std of Reward: 0.55437699241051

Step: 13031000. Mean Reward: 0.6604474576271187. Std of Reward: 0.5663345299217345.
Step: 13032000. Mean Reward: 0.6294829931972789. Std of Reward: 0.592061208153823.
Step: 13033000. Mean Reward: 0.7115788617886178. Std of Reward: 0.5152664792064651.
Step: 13034000. Mean Reward: 0.6622909407665505. Std of Reward: 0.5625151432927026.
Step: 13035000. Mean Reward: 0.6740292598967298. Std of Reward: 0.5500439201718722.
Step: 13036000. Mean Reward: 0.6435181347150258. Std of Reward: 0.5732590593728087.
Step: 13037000. Mean Reward: 0.6338449197860963. Std of Reward: 0.5716874442231447.
Step: 13038000. Mean Reward: 0.6708455008488964. Std of Reward: 0.5551799795978878.
Step: 13039000. Mean Reward: 0.6841597845601436. Std of Reward: 0.5486376350021093.
Step: 13040000. Mean Reward: 0.6731782006920415. Std of Reward: 0.5400312893750037.
Step: 13041000. Mean Reward: 0.7019426086956522. Std of Reward: 0.5287162255322043.
Step: 13042000. Mean Reward: 0.669479930191972. Std of Reward: 0.555045213036

Step: 13129000. Mean Reward: 0.6993296903460838. Std of Reward: 0.5052789258851194.
Step: 13130000. Mean Reward: 0.6840924214417746. Std of Reward: 0.5362051421701403.
Step: 13131000. Mean Reward: 0.6628610603290677. Std of Reward: 0.5460816203543394.
Step: 13132000. Mean Reward: 0.7000055555555555. Std of Reward: 0.5095693079442339.
Step: 13133000. Mean Reward: 0.710476618705036. Std of Reward: 0.5105293428246507.
Step: 13134000. Mean Reward: 0.6184531548757171. Std of Reward: 0.5697219905068631.
Step: 13135000. Mean Reward: 0.6702105263157895. Std of Reward: 0.5383271122085722.
Step: 13136000. Mean Reward: 0.6902699275362318. Std of Reward: 0.5216359834602808.
Step: 13137000. Mean Reward: 0.6487438794726931. Std of Reward: 0.5453565656917883.
Step: 13138000. Mean Reward: 0.666984934086629. Std of Reward: 0.543043730450579.
Step: 13139000. Mean Reward: 0.657728624535316. Std of Reward: 0.5406780067882382.
Step: 13140000. Mean Reward: 0.6543590192644484. Std of Reward: 0.55576868306305

Step: 13227000. Mean Reward: 0.6854822335025381. Std of Reward: 0.539720579401849.
Step: 13228000. Mean Reward: 0.6810940594059407. Std of Reward: 0.5562345435070102.
Step: 13229000. Mean Reward: 0.6327719594594594. Std of Reward: 0.5672880158525687.
Step: 13230000. Mean Reward: 0.625659793814433. Std of Reward: 0.5869074969984173.
Step: 13231000. Mean Reward: 0.6352283333333333. Std of Reward: 0.5737235015207433.
Step: 13232000. Mean Reward: 0.6179587628865979. Std of Reward: 0.5928044348314694.
Step: 13233000. Mean Reward: 0.678639455782313. Std of Reward: 0.5510174194510364.
Step: 13234000. Mean Reward: 0.6428644366197183. Std of Reward: 0.5667488771238647.
Step: 13235000. Mean Reward: 0.6905526315789474. Std of Reward: 0.5385214340666629.
Step: 13236000. Mean Reward: 0.6358101694915254. Std of Reward: 0.5708560398223586.
Step: 13237000. Mean Reward: 0.6931761744966444. Std of Reward: 0.5398410039692925.
Step: 13238000. Mean Reward: 0.6390165289256199. Std of Reward: 0.5752040239326

Step: 13325000. Mean Reward: 0.6776295652173913. Std of Reward: 0.5405275524448174.
Step: 13326000. Mean Reward: 0.652. Std of Reward: 0.5627733446564704.
Step: 13327000. Mean Reward: 0.6952869875222816. Std of Reward: 0.5313530114787179.
Step: 13328000. Mean Reward: 0.6473529411764705. Std of Reward: 0.5712764224655625.
Step: 13329000. Mean Reward: 0.6745492227979274. Std of Reward: 0.550508719586258.
Step: 13330000. Mean Reward: 0.6887207678883072. Std of Reward: 0.5317561791566814.
Step: 13331000. Mean Reward: 0.6471356521739131. Std of Reward: 0.565332618397855.
Step: 13332000. Mean Reward: 0.6897931654676258. Std of Reward: 0.528572070618739.
Step: 13333000. Mean Reward: 0.6903653198653199. Std of Reward: 0.5347992664825606.
Step: 13334000. Mean Reward: 0.6550017889087656. Std of Reward: 0.5579996810070373.
Step: 13335000. Mean Reward: 0.6705454545454546. Std of Reward: 0.5406962550652414.
Step: 13336000. Mean Reward: 0.6828653136531365. Std of Reward: 0.5335501635858994.
Step: 13

Step: 13423000. Mean Reward: 0.6691606805293006. Std of Reward: 0.5351746836740894.
Step: 13424000. Mean Reward: 0.6672125693160813. Std of Reward: 0.5400524116321163.
Step: 13425000. Mean Reward: 0.627427797833935. Std of Reward: 0.5705614272449299.
Step: 13426000. Mean Reward: 0.6488802816901409. Std of Reward: 0.564301528033343.
Step: 13427000. Mean Reward: 0.636838768115942. Std of Reward: 0.5642422529541476.
Step: 13428000. Mean Reward: 0.6599092558983666. Std of Reward: 0.5527451529472744.
Step: 13429000. Mean Reward: 0.623239332096475. Std of Reward: 0.565260181323855.
Step: 13430000. Mean Reward: 0.6700790441176471. Std of Reward: 0.5424840767538152.
Step: 13431000. Mean Reward: 0.7091873873873875. Std of Reward: 0.5153361196707624.
Step: 13432000. Mean Reward: 0.6794376098418278. Std of Reward: 0.5332128009516284.
Step: 13433000. Mean Reward: 0.6408229166666667. Std of Reward: 0.5700678534441159.
Step: 13434000. Mean Reward: 0.6589012567324956. Std of Reward: 0.549921939244411

Step: 13521000. Mean Reward: 0.6177514018691589. Std of Reward: 0.5739978799929077.
Step: 13522000. Mean Reward: 0.6226872659176029. Std of Reward: 0.5724690508511464.
Step: 13523000. Mean Reward: 0.6469783783783783. Std of Reward: 0.5521216371735563.
Step: 13524000. Mean Reward: 0.6317363636363637. Std of Reward: 0.55985408614084.
Step: 13525000. Mean Reward: 0.6524874551971326. Std of Reward: 0.5477996249123372.
Step: 13526000. Mean Reward: 0.645919266055046. Std of Reward: 0.558308731217406.
Step: 13527000. Mean Reward: 0.6288266423357664. Std of Reward: 0.5714721808835165.
Step: 13528000. Mean Reward: 0.6282292418772563. Std of Reward: 0.5759841646412476.
Step: 13529000. Mean Reward: 0.627753448275862. Std of Reward: 0.5676888137106872.
Step: 13530000. Mean Reward: 0.6457904085257549. Std of Reward: 0.5595344316730825.
Step: 13531000. Mean Reward: 0.6299717314487632. Std of Reward: 0.5651942433976072.
Step: 13532000. Mean Reward: 0.6479260628465805. Std of Reward: 0.550054561303968

Step: 13619000. Mean Reward: 0.6693145009416195. Std of Reward: 0.5351813366025823.
Step: 13620000. Mean Reward: 0.6467140255009107. Std of Reward: 0.5501847348340838.
Step: 13621000. Mean Reward: 0.6492640144665461. Std of Reward: 0.5587248421930988.
Step: 13622000. Mean Reward: 0.6582247619047619. Std of Reward: 0.5513511152969848.
Step: 13623000. Mean Reward: 0.6605. Std of Reward: 0.5333660659117335.
Step: 13624000. Mean Reward: 0.690592936802974. Std of Reward: 0.5202944616893489.
Step: 13625000. Mean Reward: 0.6549573283858998. Std of Reward: 0.5463736752299915.
Step: 13626000. Mean Reward: 0.671. Std of Reward: 0.5329349594271963.
Step: 13627000. Mean Reward: 0.6304875239923224. Std of Reward: 0.5661151008637052.
Step: 13628000. Mean Reward: 0.6229798165137614. Std of Reward: 0.5604384600315498.
Step: 13629000. Mean Reward: 0.6623694029850746. Std of Reward: 0.5364982418810286.
Step: 13630000. Mean Reward: 0.6509012567324955. Std of Reward: 0.54608567528919.
Step: 13631000. Mean

Step: 13717000. Mean Reward: 0.6628153564899452. Std of Reward: 0.5483425088758207.
Step: 13718000. Mean Reward: 0.6845153583617747. Std of Reward: 0.5424935921701663.
Step: 13719000. Mean Reward: 0.6491684210526316. Std of Reward: 0.5592174793210677.
Step: 13720000. Mean Reward: 0.7080842293906809. Std of Reward: 0.5163426612255345.
Step: 13721000. Mean Reward: 0.6298720720720721. Std of Reward: 0.5785553520158265.
Step: 13722000. Mean Reward: 0.6494186046511627. Std of Reward: 0.5572090431726695.
Step: 13723000. Mean Reward: 0.6458957597173145. Std of Reward: 0.565921124200133.
Step: 13724000. Mean Reward: 0.6571064981949459. Std of Reward: 0.546149020136483.
Step: 13725000. Mean Reward: 0.6436274509803922. Std of Reward: 0.5632559331546947.
Step: 13726000. Mean Reward: 0.6370679785330948. Std of Reward: 0.5680165493961101.
Step: 13727000. Mean Reward: 0.6195242214532872. Std of Reward: 0.5804488183536944.
Step: 13728000. Mean Reward: 0.6700359712230215. Std of Reward: 0.547456527758

INFO:unityagents:
Lesson changed. Now in Lesson 15 : 	scale -> 5


Step: 13772000. Mean Reward: 0.5572344322344323. Std of Reward: 0.6221663205200944.
Step: 13773000. Mean Reward: 0.5057313167259787. Std of Reward: 0.6359326807363459.
Step: 13774000. Mean Reward: 0.45832167832167825. Std of Reward: 0.6595912895500744.
Step: 13775000. Mean Reward: 0.48931283905967454. Std of Reward: 0.6460735978303546.
Step: 13776000. Mean Reward: 0.4667074010327022. Std of Reward: 0.6468636770010864.
Step: 13777000. Mean Reward: 0.5192459893048128. Std of Reward: 0.6378498670206382.
Step: 13778000. Mean Reward: 0.5019964412811387. Std of Reward: 0.639495205161898.
Step: 13779000. Mean Reward: 0.5046395147313691. Std of Reward: 0.6382911260217943.
Step: 13780000. Mean Reward: 0.5535052264808362. Std of Reward: 0.6229400358784223.
Step: 13781000. Mean Reward: 0.5191500904159133. Std of Reward: 0.6308907084210092.
Step: 13782000. Mean Reward: 0.5422752136752137. Std of Reward: 0.6301302927140501.
Step: 13783000. Mean Reward: 0.5102581818181819. Std of Reward: 0.639254826

Step: 13870000. Mean Reward: 0.5074935543278085. Std of Reward: 0.6320792076379753.
Step: 13871000. Mean Reward: 0.5534911347517731. Std of Reward: 0.618230968217584.
Step: 13872000. Mean Reward: 0.4439417122040073. Std of Reward: 0.6594348937660139.
Step: 13873000. Mean Reward: 0.4792177858439201. Std of Reward: 0.6358973080035266.
Step: 13874000. Mean Reward: 0.5442078651685393. Std of Reward: 0.6185510765653566.
Step: 13875000. Mean Reward: 0.49651459854014607. Std of Reward: 0.6386433512367081.
Step: 13876000. Mean Reward: 0.48595169946332734. Std of Reward: 0.6416838142917004.
Step: 13877000. Mean Reward: 0.5449839285714286. Std of Reward: 0.6251504916751719.
Step: 13878000. Mean Reward: 0.49524809160305344. Std of Reward: 0.625610745482181.
Step: 13879000. Mean Reward: 0.542072380952381. Std of Reward: 0.611694883868971.
Step: 13880000. Mean Reward: 0.5259686924493554. Std of Reward: 0.613897335265724.
Step: 13881000. Mean Reward: 0.5281588447653429. Std of Reward: 0.617540045357

Step: 13968000. Mean Reward: 0.5471919385796545. Std of Reward: 0.6142310749607915.
Step: 13969000. Mean Reward: 0.5453400402414487. Std of Reward: 0.6012963787325345.
Step: 13970000. Mean Reward: 0.5593346303501945. Std of Reward: 0.5849135631436299.
Step: 13971000. Mean Reward: 0.51240206185567. Std of Reward: 0.6180877203915651.
Step: 13972000. Mean Reward: 0.473978886756238. Std of Reward: 0.6285441771774167.
Step: 13973000. Mean Reward: 0.5083078512396694. Std of Reward: 0.6119453621868495.
Step: 13974000. Mean Reward: 0.4876929460580913. Std of Reward: 0.6206050864702379.
Step: 13975000. Mean Reward: 0.5351597633136095. Std of Reward: 0.6054062617235357.
Step: 13976000. Mean Reward: 0.5512695167286246. Std of Reward: 0.611121620906323.
Step: 13977000. Mean Reward: 0.5519924528301886. Std of Reward: 0.5883968401108848.
Step: 13978000. Mean Reward: 0.5393147113594041. Std of Reward: 0.6120979580653306.
Step: 13979000. Mean Reward: 0.5078167330677291. Std of Reward: 0.60876396740186

Step: 14066000. Mean Reward: 0.495125. Std of Reward: 0.6119856092181838.
Step: 14067000. Mean Reward: 0.5132275449101795. Std of Reward: 0.6110638673818973.
Step: 14068000. Mean Reward: 0.5557430278884462. Std of Reward: 0.5971936759167626.
Step: 14069000. Mean Reward: 0.5227809523809525. Std of Reward: 0.5978092573794895.
Step: 14070000. Mean Reward: 0.5211005692599621. Std of Reward: 0.6162707820372834.
Step: 14071000. Mean Reward: 0.5199351145038168. Std of Reward: 0.6001017849318747.
Step: 14072000. Mean Reward: 0.539546845124283. Std of Reward: 0.6160132407790393.
Step: 14073000. Mean Reward: 0.5456158536585366. Std of Reward: 0.5994558684373238.
Step: 14074000. Mean Reward: 0.5202624521072797. Std of Reward: 0.6145341459670931.
Step: 14075000. Mean Reward: 0.5180853889943074. Std of Reward: 0.6216445642372509.
Step: 14076000. Mean Reward: 0.5144903474903474. Std of Reward: 0.6174191824182546.
Step: 14077000. Mean Reward: 0.5392714843749999. Std of Reward: 0.6123412952293101.
Ste

Step: 14165000. Mean Reward: 0.5285294117647058. Std of Reward: 0.6152239457339074.
Step: 14166000. Mean Reward: 0.5547658349328215. Std of Reward: 0.599572021757359.
Step: 14167000. Mean Reward: 0.5556210317460317. Std of Reward: 0.6046298727075632.
Step: 14168000. Mean Reward: 0.5640174418604651. Std of Reward: 0.5935482596364738.
Step: 14169000. Mean Reward: 0.5338704061895552. Std of Reward: 0.616684327357008.
Step: 14170000. Mean Reward: 0.5387374517374518. Std of Reward: 0.6128821151355013.
Step: 14171000. Mean Reward: 0.5261576923076924. Std of Reward: 0.6115753539791107.
Step: 14172000. Mean Reward: 0.5314476190476191. Std of Reward: 0.6119172241087824.
Step: 14173000. Mean Reward: 0.5397462121212121. Std of Reward: 0.5963215334010447.
Step: 14174000. Mean Reward: 0.5614455066921605. Std of Reward: 0.6031495494167642.
Step: 14175000. Mean Reward: 0.5942749529190208. Std of Reward: 0.58989696163187.
Step: 14176000. Mean Reward: 0.5441629213483146. Std of Reward: 0.60363816893873

Step: 14263000. Mean Reward: 0.54640036900369. Std of Reward: 0.6135111070307109.
Step: 14264000. Mean Reward: 0.5254586466165413. Std of Reward: 0.6192049232779905.
Step: 14265000. Mean Reward: 0.525317843866171. Std of Reward: 0.6150309035797928.
Step: 14266000. Mean Reward: 0.5692777777777778. Std of Reward: 0.603542020684746.
Step: 14267000. Mean Reward: 0.5553351449275361. Std of Reward: 0.6078750333383814.
Step: 14268000. Mean Reward: 0.5674798464491363. Std of Reward: 0.6028358352151555.
Step: 14269000. Mean Reward: 0.5584693877551021. Std of Reward: 0.6046799246006304.
Step: 14270000. Mean Reward: 0.5239618320610688. Std of Reward: 0.6161893812011373.
Step: 14271000. Mean Reward: 0.6044649805447472. Std of Reward: 0.5839134141572547.
Step: 14272000. Mean Reward: 0.5344963636363637. Std of Reward: 0.6250709101486404.
Step: 14273000. Mean Reward: 0.5393004032258064. Std of Reward: 0.6066940861697812.
Step: 14274000. Mean Reward: 0.5577749999999999. Std of Reward: 0.59626147574023

Step: 14361000. Mean Reward: 0.5664604051565377. Std of Reward: 0.6105634973889699.
Step: 14362000. Mean Reward: 0.5367830188679246. Std of Reward: 0.6159101901682993.
Step: 14363000. Mean Reward: 0.5540509803921569. Std of Reward: 0.6093882188905265.
Step: 14364000. Mean Reward: 0.4952215799614643. Std of Reward: 0.6289147142193968.
Step: 14365000. Mean Reward: 0.5276196078431373. Std of Reward: 0.6153047168009468.
Step: 14366000. Mean Reward: 0.49982387475538165. Std of Reward: 0.6236412614569693.
Step: 14367000. Mean Reward: 0.5634282982791587. Std of Reward: 0.5999555561314943.
Step: 14368000. Mean Reward: 0.5506201834862384. Std of Reward: 0.6146401235092688.
Step: 14369000. Mean Reward: 0.52218281535649. Std of Reward: 0.6247032022432636.
Step: 14370000. Mean Reward: 0.5620514285714285. Std of Reward: 0.6082938037172632.
Step: 14371000. Mean Reward: 0.5592747826086956. Std of Reward: 0.6088682795858414.
Step: 14372000. Mean Reward: 0.5501525423728812. Std of Reward: 0.62001577138

Step: 14459000. Mean Reward: 0.5553715415019762. Std of Reward: 0.6017321245404369.
Step: 14460000. Mean Reward: 0.5828345724907064. Std of Reward: 0.5972976633606095.
Step: 14461000. Mean Reward: 0.4973449612403101. Std of Reward: 0.6255468190808805.
Step: 14462000. Mean Reward: 0.53148031496063. Std of Reward: 0.6060716882884257.
Step: 14463000. Mean Reward: 0.5320583804143126. Std of Reward: 0.6132830721088184.
Step: 14464000. Mean Reward: 0.5284060150375939. Std of Reward: 0.6131276840563477.
Step: 14465000. Mean Reward: 0.5145606936416184. Std of Reward: 0.6185134142593729.
Step: 14466000. Mean Reward: 0.5260982658959537. Std of Reward: 0.6062487009913013.
Step: 14467000. Mean Reward: 0.5502104247104247. Std of Reward: 0.6157325926127438.
Step: 14468000. Mean Reward: 0.5375842696629214. Std of Reward: 0.609400927214289.
Step: 14469000. Mean Reward: 0.524364990689013. Std of Reward: 0.618972577049532.
Step: 14470000. Mean Reward: 0.5204487179487179. Std of Reward: 0.627827417118087

Step: 14557000. Mean Reward: 0.5171366223908919. Std of Reward: 0.6103942424691158.
Step: 14558000. Mean Reward: 0.5165888429752066. Std of Reward: 0.6095413415557664.
Step: 14559000. Mean Reward: 0.5538050847457626. Std of Reward: 0.5861900615664088.
Step: 14560000. Mean Reward: 0.5345152749490835. Std of Reward: 0.5986324514130538.
Step: 14561000. Mean Reward: 0.5757909836065573. Std of Reward: 0.5819207291381463.
Step: 14562000. Mean Reward: 0.49873347547974417. Std of Reward: 0.6033807343856722.
Step: 14563000. Mean Reward: 0.5385748987854251. Std of Reward: 0.6033060816831204.
Step: 14564000. Mean Reward: 0.5507231404958677. Std of Reward: 0.5948270199641182.
Step: 14565000. Mean Reward: 0.5329126819126819. Std of Reward: 0.5955194771600731.
Step: 14566000. Mean Reward: 0.5362983870967741. Std of Reward: 0.5981761119980807.
Step: 14567000. Mean Reward: 0.5429519038076153. Std of Reward: 0.5844136860171835.
Step: 14568000. Mean Reward: 0.5456860465116278. Std of Reward: 0.598972377

Step: 14655000. Mean Reward: 0.5752499999999999. Std of Reward: 0.5953300846775864.
Step: 14656000. Mean Reward: 0.5021781609195403. Std of Reward: 0.6322959386888718.
Step: 14657000. Mean Reward: 0.5542947558770344. Std of Reward: 0.6100455307983182.
Step: 14658000. Mean Reward: 0.5816819923371647. Std of Reward: 0.5958731209458624.
Step: 14659000. Mean Reward: 0.5156261343012705. Std of Reward: 0.629238295879398.
Step: 14660000. Mean Reward: 0.5583533834586466. Std of Reward: 0.6023773165642184.
Step: 14661000. Mean Reward: 0.5418984375. Std of Reward: 0.612433447785397.
Step: 14662000. Mean Reward: 0.5374228571428571. Std of Reward: 0.6090515251105244.
Step: 14663000. Mean Reward: 0.5231313559322034. Std of Reward: 0.6050669381934296.
Step: 14664000. Mean Reward: 0.5549877800407331. Std of Reward: 0.5933832780912264.
Step: 14665000. Mean Reward: 0.5731234567901233. Std of Reward: 0.5947184114130378.
Step: 14666000. Mean Reward: 0.5226306818181817. Std of Reward: 0.6162668200774102.


Step: 14754000. Mean Reward: 0.5506428571428571. Std of Reward: 0.6104280467235736.
Step: 14755000. Mean Reward: 0.48953254437869825. Std of Reward: 0.6408339706468019.
Step: 14756000. Mean Reward: 0.5599354243542436. Std of Reward: 0.6042396135558588.
Step: 14757000. Mean Reward: 0.5443307692307693. Std of Reward: 0.6046157156976976.
Step: 14758000. Mean Reward: 0.5271508379888269. Std of Reward: 0.6209479879113098.
Step: 14759000. Mean Reward: 0.5636751467710372. Std of Reward: 0.611839112798317.
Step: 14760000. Mean Reward: 0.5578048327137547. Std of Reward: 0.6039073830789286.
Step: 14761000. Mean Reward: 0.5603620689655173. Std of Reward: 0.6034559292618116.
Step: 14762000. Mean Reward: 0.5895315985130112. Std of Reward: 0.5850009594927874.
Step: 14763000. Mean Reward: 0.5326084452975048. Std of Reward: 0.6131835840279674.
Step: 14764000. Mean Reward: 0.5695753968253969. Std of Reward: 0.6021525658579285.
Step: 14765000. Mean Reward: 0.559894422310757. Std of Reward: 0.60562434842

Step: 14852000. Mean Reward: 0.6231056466302368. Std of Reward: 0.5673613419150334.
Step: 14853000. Mean Reward: 0.5515251937984496. Std of Reward: 0.6003742674308914.
Step: 14854000. Mean Reward: 0.5586899810964083. Std of Reward: 0.606444098438596.
Step: 14855000. Mean Reward: 0.5195063752276867. Std of Reward: 0.6205172576821668.
Step: 14856000. Mean Reward: 0.5563956442831215. Std of Reward: 0.6076753565255022.
Step: 14857000. Mean Reward: 0.527504743833017. Std of Reward: 0.6117589530042953.
Step: 14858000. Mean Reward: 0.5627703984819735. Std of Reward: 0.604783479718212.
Step: 14859000. Mean Reward: 0.6083369963369963. Std of Reward: 0.5848643936511619.
Step: 14860000. Mean Reward: 0.5203871595330738. Std of Reward: 0.6085527354466154.
Step: 14861000. Mean Reward: 0.5701007462686566. Std of Reward: 0.59445107715755.
Step: 14862000. Mean Reward: 0.5242111111111111. Std of Reward: 0.6203191441647468.
Step: 14863000. Mean Reward: 0.5547430683918669. Std of Reward: 0.609092335961772

Step: 14950000. Mean Reward: 0.5199829545454545. Std of Reward: 0.6181035616844707.
Saved Model
Step: 14951000. Mean Reward: 0.524279926335175. Std of Reward: 0.6209505210835082.
Step: 14952000. Mean Reward: 0.5269333333333333. Std of Reward: 0.6203320520614866.
Step: 14953000. Mean Reward: 0.5283386773547093. Std of Reward: 0.6067298490650903.
Step: 14954000. Mean Reward: 0.5207699619771863. Std of Reward: 0.617549264002752.
Step: 14955000. Mean Reward: 0.5605391621129325. Std of Reward: 0.6122108527686762.
Step: 14956000. Mean Reward: 0.5560477064220183. Std of Reward: 0.6098599549868595.
Step: 14957000. Mean Reward: 0.5394548944337811. Std of Reward: 0.6118516517447994.
Step: 14958000. Mean Reward: 0.5294377358490565. Std of Reward: 0.6195190502884842.
Step: 14959000. Mean Reward: 0.49746560846560844. Std of Reward: 0.6351512020949733.
Step: 14960000. Mean Reward: 0.519936329588015. Std of Reward: 0.6259473985558314.
Step: 14961000. Mean Reward: 0.5358195211786372. Std of Reward: 0.

Step: 15048000. Mean Reward: 0.48631135531135533. Std of Reward: 0.6214344287305912.
Step: 15049000. Mean Reward: 0.5206797752808989. Std of Reward: 0.612795134491845.
Step: 15050000. Mean Reward: 0.4916038095238095. Std of Reward: 0.6268495734983085.
Saved Model
Step: 15051000. Mean Reward: 0.542476618705036. Std of Reward: 0.6057340405413759.
Step: 15052000. Mean Reward: 0.5048386491557223. Std of Reward: 0.6206747918438813.
Step: 15053000. Mean Reward: 0.536209009009009. Std of Reward: 0.6129045540304544.
Step: 15054000. Mean Reward: 0.5656917562724014. Std of Reward: 0.5934955483554228.
Step: 15055000. Mean Reward: 0.5315571955719556. Std of Reward: 0.6126739616305167.
Step: 15056000. Mean Reward: 0.5298472222222222. Std of Reward: 0.6207951310331824.
Step: 15057000. Mean Reward: 0.5335134649910233. Std of Reward: 0.6207914435827191.
Step: 15058000. Mean Reward: 0.5345567010309279. Std of Reward: 0.6196137828616637.
Step: 15059000. Mean Reward: 0.5771773308957953. Std of Reward: 0.

Step: 15146000. Mean Reward: 0.5625867924528303. Std of Reward: 0.6003906723527624.
Step: 15147000. Mean Reward: 0.5463688212927756. Std of Reward: 0.6023240674436018.
Step: 15148000. Mean Reward: 0.5500076481835564. Std of Reward: 0.6076411244140514.
Step: 15149000. Mean Reward: 0.5659906191369606. Std of Reward: 0.6012330371953865.
Step: 15150000. Mean Reward: 0.4886423220973782. Std of Reward: 0.6179822317293472.
Saved Model
Step: 15151000. Mean Reward: 0.5254187866927592. Std of Reward: 0.6074000419156027.
Step: 15152000. Mean Reward: 0.5122718808193668. Std of Reward: 0.6122646126275618.
Step: 15153000. Mean Reward: 0.5197773654916511. Std of Reward: 0.6298466569628738.
Step: 15154000. Mean Reward: 0.5047609489051095. Std of Reward: 0.6289268024035717.
Step: 15155000. Mean Reward: 0.5055221238938054. Std of Reward: 0.6233645263378195.
Step: 15156000. Mean Reward: 0.5646464088397789. Std of Reward: 0.6009585653411564.
Step: 15157000. Mean Reward: 0.5466071428571428. Std of Reward: 

Step: 15244000. Mean Reward: 0.5721952554744525. Std of Reward: 0.5901449811161776.
Step: 15245000. Mean Reward: 0.5223105360443623. Std of Reward: 0.6168658032984242.
Step: 15246000. Mean Reward: 0.5600698529411765. Std of Reward: 0.6080504667546655.
Step: 15247000. Mean Reward: 0.568657894736842. Std of Reward: 0.5930572373079915.
Step: 15248000. Mean Reward: 0.5914806629834255. Std of Reward: 0.5887237617711399.
Step: 15249000. Mean Reward: 0.539870036101083. Std of Reward: 0.6008186242207111.
Step: 15250000. Mean Reward: 0.5779268292682926. Std of Reward: 0.5921859149566637.
Saved Model
Step: 15251000. Mean Reward: 0.5517953216374268. Std of Reward: 0.5959718380854739.
Step: 15252000. Mean Reward: 0.5478393881453154. Std of Reward: 0.5934805115648482.
Step: 15253000. Mean Reward: 0.5667769784172662. Std of Reward: 0.5972235285612323.
Step: 15254000. Mean Reward: 0.48612313432835824. Std of Reward: 0.6211605740213509.
Step: 15255000. Mean Reward: 0.5360058139534883. Std of Reward: 0

Step: 15342000. Mean Reward: 0.530649019607843. Std of Reward: 0.6064162748322243.
Step: 15343000. Mean Reward: 0.524929292929293. Std of Reward: 0.5916936699348018.
Step: 15344000. Mean Reward: 0.5709206680584551. Std of Reward: 0.584895113958689.
Step: 15345000. Mean Reward: 0.5788540856031128. Std of Reward: 0.578212992504671.
Step: 15346000. Mean Reward: 0.574272380952381. Std of Reward: 0.574154503927336.
Step: 15347000. Mean Reward: 0.5294301310043668. Std of Reward: 0.5933205689339197.
Step: 15348000. Mean Reward: 0.5551411290322581. Std of Reward: 0.5796168299822955.
Step: 15349000. Mean Reward: 0.5989939393939394. Std of Reward: 0.5667079075577838.
Step: 15350000. Mean Reward: 0.508475687103594. Std of Reward: 0.6027749185843936.
Saved Model
Step: 15351000. Mean Reward: 0.5619684418145957. Std of Reward: 0.5807275710904329.
Step: 15352000. Mean Reward: 0.5287190082644628. Std of Reward: 0.6054448773885541.
Step: 15353000. Mean Reward: 0.4897006109979633. Std of Reward: 0.60765

Step: 15440000. Mean Reward: 0.569169090909091. Std of Reward: 0.5983809325999396.
Step: 15441000. Mean Reward: 0.5191583969465648. Std of Reward: 0.6096816398823602.
Step: 15442000. Mean Reward: 0.5275063985374772. Std of Reward: 0.6084395582377338.
Step: 15443000. Mean Reward: 0.525777358490566. Std of Reward: 0.6100720274956434.
Step: 15444000. Mean Reward: 0.523852189781022. Std of Reward: 0.6093759477288117.
Step: 15445000. Mean Reward: 0.5249503676470588. Std of Reward: 0.6046398868394458.
Step: 15446000. Mean Reward: 0.5792818181818182. Std of Reward: 0.5858394153507532.
Step: 15447000. Mean Reward: 0.5967039711191336. Std of Reward: 0.5799507097368727.
Step: 15448000. Mean Reward: 0.6015623869801084. Std of Reward: 0.5813184710066788.
Step: 15449000. Mean Reward: 0.5430018148820326. Std of Reward: 0.603058482712932.
Step: 15450000. Mean Reward: 0.5192124542124542. Std of Reward: 0.6045080362180627.
Saved Model
Step: 15451000. Mean Reward: 0.4926041666666666. Std of Reward: 0.62

Step: 15538000. Mean Reward: 0.5010502692998204. Std of Reward: 0.6172594736382402.
Step: 15539000. Mean Reward: 0.523087431693989. Std of Reward: 0.6117582141453257.
Step: 15540000. Mean Reward: 0.5793812949640287. Std of Reward: 0.5861888264251215.
Step: 15541000. Mean Reward: 0.5380036036036036. Std of Reward: 0.6037164628818578.
Step: 15542000. Mean Reward: 0.5603129629629628. Std of Reward: 0.5943102228155533.
Step: 15543000. Mean Reward: 0.5338566243194192. Std of Reward: 0.6074523569904781.
Step: 15544000. Mean Reward: 0.4634498207885305. Std of Reward: 0.6329895786044442.
Step: 15545000. Mean Reward: 0.5636383363471971. Std of Reward: 0.6034229961124923.
Step: 15546000. Mean Reward: 0.5392326820603908. Std of Reward: 0.6113258550878129.
Step: 15547000. Mean Reward: 0.5248202443280977. Std of Reward: 0.6222991184460962.
Step: 15548000. Mean Reward: 0.5752660714285714. Std of Reward: 0.587959904116643.
Step: 15549000. Mean Reward: 0.502023381294964. Std of Reward: 0.6155267304740

Step: 15636000. Mean Reward: 0.5445055147058824. Std of Reward: 0.6127857976431221.
Step: 15637000. Mean Reward: 0.5047124773960217. Std of Reward: 0.629106256724075.
Step: 15638000. Mean Reward: 0.5330533807829182. Std of Reward: 0.6073916059568085.
Step: 15639000. Mean Reward: 0.5624058500914076. Std of Reward: 0.6022544784872217.
Step: 15640000. Mean Reward: 0.5725157699443414. Std of Reward: 0.5951580800840582.
Step: 15641000. Mean Reward: 0.5376982142857143. Std of Reward: 0.6040908605946245.
Step: 15642000. Mean Reward: 0.5551921641791044. Std of Reward: 0.6036680898472222.
Step: 15643000. Mean Reward: 0.5473659420289856. Std of Reward: 0.6046235475090045.
Step: 15644000. Mean Reward: 0.5675646630236794. Std of Reward: 0.5951765208673265.
Step: 15645000. Mean Reward: 0.5308156424581005. Std of Reward: 0.6152146719980915.
Step: 15646000. Mean Reward: 0.5413140186915887. Std of Reward: 0.6099947467904027.
Step: 15647000. Mean Reward: 0.5557814207650273. Std of Reward: 0.60715294534

Step: 15734000. Mean Reward: 0.5896312849162011. Std of Reward: 0.5864725263796785.
Step: 15735000. Mean Reward: 0.49837111517367455. Std of Reward: 0.6296492910600268.
Step: 15736000. Mean Reward: 0.5211147540983606. Std of Reward: 0.6228497229346177.
Step: 15737000. Mean Reward: 0.5263580470162749. Std of Reward: 0.6122864643928079.
Step: 15738000. Mean Reward: 0.5191827956989248. Std of Reward: 0.6258024373554879.
Step: 15739000. Mean Reward: 0.5343401109057301. Std of Reward: 0.6192725555060108.
Step: 15740000. Mean Reward: 0.5064626334519573. Std of Reward: 0.6325793435039911.
Step: 15741000. Mean Reward: 0.5371786339754817. Std of Reward: 0.6304648075182109.
Step: 15742000. Mean Reward: 0.5294511545293074. Std of Reward: 0.6193806845057482.
Step: 15743000. Mean Reward: 0.5249725274725274. Std of Reward: 0.6326285570432718.
Step: 15744000. Mean Reward: 0.4950833333333333. Std of Reward: 0.6372008649797314.
Step: 15745000. Mean Reward: 0.5271009009009009. Std of Reward: 0.623994199

Step: 15832000. Mean Reward: 0.49000555555555547. Std of Reward: 0.6312261539072531.
Step: 15833000. Mean Reward: 0.5449923518164437. Std of Reward: 0.6078473882153672.
Step: 15834000. Mean Reward: 0.5611622574955908. Std of Reward: 0.598430621075164.
Step: 15835000. Mean Reward: 0.6021836734693877. Std of Reward: 0.5811269053990817.
Step: 15836000. Mean Reward: 0.5333497164461246. Std of Reward: 0.6191511878276488.
Step: 15837000. Mean Reward: 0.48846346153846154. Std of Reward: 0.6322494658965552.
Step: 15838000. Mean Reward: 0.5635716911764705. Std of Reward: 0.6027512522311816.
Step: 15839000. Mean Reward: 0.5376844919786097. Std of Reward: 0.6178003403414324.
Step: 15840000. Mean Reward: 0.5011783783783784. Std of Reward: 0.6303804447459416.
Step: 15841000. Mean Reward: 0.5468625954198473. Std of Reward: 0.6113285808726245.
Step: 15842000. Mean Reward: 0.5456060606060606. Std of Reward: 0.6111656802792086.
Step: 15843000. Mean Reward: 0.5546118299445472. Std of Reward: 0.596641293

Step: 15930000. Mean Reward: 0.4983698884758364. Std of Reward: 0.6235971458860573.
Step: 15931000. Mean Reward: 0.5426513274336282. Std of Reward: 0.6166030583161152.
Step: 15932000. Mean Reward: 0.5260110294117647. Std of Reward: 0.6191102188735429.
Step: 15933000. Mean Reward: 0.5161942959001782. Std of Reward: 0.618997516001436.
Step: 15934000. Mean Reward: 0.5840954198473282. Std of Reward: 0.5883491998561642.
Step: 15935000. Mean Reward: 0.5230151228733458. Std of Reward: 0.6143987022827782.
Step: 15936000. Mean Reward: 0.507673624288425. Std of Reward: 0.6114224430909676.
Step: 15937000. Mean Reward: 0.5483935599284437. Std of Reward: 0.6110184749860416.
Step: 15938000. Mean Reward: 0.4956155268022181. Std of Reward: 0.6216687369805203.
Step: 15939000. Mean Reward: 0.5539894551845344. Std of Reward: 0.6116183262801292.
Step: 15940000. Mean Reward: 0.5202157996146435. Std of Reward: 0.6173642523641881.
Step: 15941000. Mean Reward: 0.546155722326454. Std of Reward: 0.6152843972071

Step: 16028000. Mean Reward: 0.5120643382352941. Std of Reward: 0.6163717794423625.
Step: 16029000. Mean Reward: 0.5492705882352941. Std of Reward: 0.6081715164726753.
Step: 16030000. Mean Reward: 0.5798533834586466. Std of Reward: 0.5801210279545281.
Step: 16031000. Mean Reward: 0.5596336996336997. Std of Reward: 0.6077207986573645.
Step: 16032000. Mean Reward: 0.5479465930018417. Std of Reward: 0.6033476552503765.
Step: 16033000. Mean Reward: 0.5675606060606061. Std of Reward: 0.5983392610609013.
Step: 16034000. Mean Reward: 0.5232809611829945. Std of Reward: 0.6090263908244886.
Step: 16035000. Mean Reward: 0.5533345794392523. Std of Reward: 0.5977985331091403.
Step: 16036000. Mean Reward: 0.5344171428571428. Std of Reward: 0.615168612806532.
Step: 16037000. Mean Reward: 0.5390761718749999. Std of Reward: 0.6064384211533521.
Step: 16038000. Mean Reward: 0.5206070110701106. Std of Reward: 0.6216280304895592.
Step: 16039000. Mean Reward: 0.5358324420677362. Std of Reward: 0.61426413557

Step: 16126000. Mean Reward: 0.5199819999999999. Std of Reward: 0.6106786075146239.
Step: 16127000. Mean Reward: 0.5278637200736648. Std of Reward: 0.6027796599457851.
Step: 16128000. Mean Reward: 0.5385711462450592. Std of Reward: 0.6069369541246318.
Step: 16129000. Mean Reward: 0.4727834274952919. Std of Reward: 0.6256271672034827.
Step: 16130000. Mean Reward: 0.5001745730550284. Std of Reward: 0.6257331152924419.
Step: 16131000. Mean Reward: 0.5516485981308411. Std of Reward: 0.6003417592659998.
Step: 16132000. Mean Reward: 0.5076796116504855. Std of Reward: 0.612022247415801.
Step: 16133000. Mean Reward: 0.5260652591170825. Std of Reward: 0.6147783674128039.
Step: 16134000. Mean Reward: 0.5182. Std of Reward: 0.6014532689639942.
Step: 16135000. Mean Reward: 0.5398049281314168. Std of Reward: 0.5917818887361146.
Step: 16136000. Mean Reward: 0.499001996007984. Std of Reward: 0.614099839415764.
Step: 16137000. Mean Reward: 0.51612375249501. Std of Reward: 0.607038901717422.
Step: 1613

Step: 16224000. Mean Reward: 0.5159821073558648. Std of Reward: 0.6019126751175204.
Step: 16225000. Mean Reward: 0.49786454183266926. Std of Reward: 0.6044035464734885.
Step: 16226000. Mean Reward: 0.49153831417624516. Std of Reward: 0.6017889864098609.
Step: 16227000. Mean Reward: 0.49801361867704275. Std of Reward: 0.6067362533568345.
Step: 16228000. Mean Reward: 0.4816621621621622. Std of Reward: 0.6050444539626826.
Step: 16229000. Mean Reward: 0.47285714285714286. Std of Reward: 0.6202903209042769.
Step: 16230000. Mean Reward: 0.5234979838709677. Std of Reward: 0.5930702645602902.
Step: 16231000. Mean Reward: 0.5227114337568058. Std of Reward: 0.6100454731763473.
Step: 16232000. Mean Reward: 0.5056888045540798. Std of Reward: 0.6111776155345101.
Step: 16233000. Mean Reward: 0.5230588235294117. Std of Reward: 0.5984619012728267.
Step: 16234000. Mean Reward: 0.49805905511811027. Std of Reward: 0.6127147224917664.
Step: 16235000. Mean Reward: 0.5505019379844962. Std of Reward: 0.59500

Step: 16322000. Mean Reward: 0.5107562862669245. Std of Reward: 0.6148786851649307.
Step: 16323000. Mean Reward: 0.5513384030418251. Std of Reward: 0.5983133005670797.
Step: 16324000. Mean Reward: 0.5820075046904315. Std of Reward: 0.5926809315498973.
Step: 16325000. Mean Reward: 0.552180198019802. Std of Reward: 0.5926295765935296.
Step: 16326000. Mean Reward: 0.5705782178217822. Std of Reward: 0.5904797498399765.
Step: 16327000. Mean Reward: 0.560405303030303. Std of Reward: 0.6046821248334451.
Step: 16328000. Mean Reward: 0.5508647619047619. Std of Reward: 0.604928125440331.
Step: 16329000. Mean Reward: 0.4801936758893281. Std of Reward: 0.6231129758490574.
Step: 16330000. Mean Reward: 0.5216779661016948. Std of Reward: 0.6187850801289682.
Step: 16331000. Mean Reward: 0.5236104651162791. Std of Reward: 0.6146879379435325.
Step: 16332000. Mean Reward: 0.5538498168498169. Std of Reward: 0.5986095942344144.
Step: 16333000. Mean Reward: 0.5538585657370518. Std of Reward: 0.6000724706534

Step: 16420000. Mean Reward: 0.5526612612612612. Std of Reward: 0.6264053724717503.
Step: 16421000. Mean Reward: 0.5241753371868979. Std of Reward: 0.6182246367091293.
Step: 16422000. Mean Reward: 0.561852994555354. Std of Reward: 0.6043023892584295.
Step: 16423000. Mean Reward: 0.5585996240601503. Std of Reward: 0.6125521087546009.
Step: 16424000. Mean Reward: 0.5184633699633698. Std of Reward: 0.6150429476165215.
Step: 16425000. Mean Reward: 0.513626204238921. Std of Reward: 0.6251854468361276.
Step: 16426000. Mean Reward: 0.49654820415879014. Std of Reward: 0.6410595568485284.
Step: 16427000. Mean Reward: 0.5083171641791044. Std of Reward: 0.6259483612083514.
Step: 16428000. Mean Reward: 0.5202685714285713. Std of Reward: 0.6331587736352491.
Step: 16429000. Mean Reward: 0.5645242537313434. Std of Reward: 0.6028417834139128.
Step: 16430000. Mean Reward: 0.5254887525562372. Std of Reward: 0.6153000401788116.
Step: 16431000. Mean Reward: 0.5393240740740741. Std of Reward: 0.62504663868

Step: 16518000. Mean Reward: 0.5478738049713193. Std of Reward: 0.6025110039554749.
Step: 16519000. Mean Reward: 0.5180158415841585. Std of Reward: 0.6184460082299658.
Step: 16520000. Mean Reward: 0.5340831758034026. Std of Reward: 0.6054974806974304.
Step: 16521000. Mean Reward: 0.5617070895522388. Std of Reward: 0.5975783171883153.
Step: 16522000. Mean Reward: 0.5668632478632479. Std of Reward: 0.6105629475515074.
Step: 16523000. Mean Reward: 0.5151936090225563. Std of Reward: 0.6202933229533499.
Step: 16524000. Mean Reward: 0.5492703213610586. Std of Reward: 0.606655880980521.
Step: 16525000. Mean Reward: 0.554765037593985. Std of Reward: 0.6058162418199252.
Step: 16526000. Mean Reward: 0.5328052434456928. Std of Reward: 0.6050174971087301.
Step: 16527000. Mean Reward: 0.523718574108818. Std of Reward: 0.6175332119743846.
Step: 16528000. Mean Reward: 0.5592518796992481. Std of Reward: 0.5948142539541172.
Step: 16529000. Mean Reward: 0.5499330922242315. Std of Reward: 0.6102418880154

Step: 16616000. Mean Reward: 0.511584440227704. Std of Reward: 0.6021149379209678.
Step: 16617000. Mean Reward: 0.5279072356215214. Std of Reward: 0.6036738195818777.
Step: 16618000. Mean Reward: 0.5289740259740259. Std of Reward: 0.6044378913055862.
Step: 16619000. Mean Reward: 0.5647047970479705. Std of Reward: 0.5918576215784762.
Step: 16620000. Mean Reward: 0.563060263653484. Std of Reward: 0.5841134349104361.
Step: 16621000. Mean Reward: 0.5541389396709324. Std of Reward: 0.59622267940594.
Step: 16622000. Mean Reward: 0.5507560975609757. Std of Reward: 0.5886423758981083.
Step: 16623000. Mean Reward: 0.5390462962962964. Std of Reward: 0.589608404009708.
Step: 16624000. Mean Reward: 0.5180396226415094. Std of Reward: 0.6109030744353596.
Step: 16625000. Mean Reward: 0.48147407407407405. Std of Reward: 0.6171893680180444.
Step: 16626000. Mean Reward: 0.5352708333333334. Std of Reward: 0.5959437293430018.
Step: 16627000. Mean Reward: 0.49696564885496186. Std of Reward: 0.6059641478159

Step: 16714000. Mean Reward: 0.49996317829457365. Std of Reward: 0.6182012139200505.
Step: 16715000. Mean Reward: 0.5472168905950097. Std of Reward: 0.6114345682650028.
Step: 16716000. Mean Reward: 0.515332075471698. Std of Reward: 0.6204595764046972.
Step: 16717000. Mean Reward: 0.5183624031007752. Std of Reward: 0.6073342394052184.
Step: 16718000. Mean Reward: 0.518624513618677. Std of Reward: 0.6187389593891827.
Step: 16719000. Mean Reward: 0.4865417515274949. Std of Reward: 0.6210222633009752.
Step: 16720000. Mean Reward: 0.5407939508506617. Std of Reward: 0.6081282021784296.
Step: 16721000. Mean Reward: 0.5249154135338345. Std of Reward: 0.618147838598443.
Step: 16722000. Mean Reward: 0.5280547169811322. Std of Reward: 0.6092299336871028.
Step: 16723000. Mean Reward: 0.5358871287128713. Std of Reward: 0.6118250746034669.
Step: 16724000. Mean Reward: 0.47566129032258064. Std of Reward: 0.6205339907393076.
Step: 16725000. Mean Reward: 0.5111838235294117. Std of Reward: 0.62675374189

Step: 16812000. Mean Reward: 0.5378926441351888. Std of Reward: 0.6110391374052246.
Step: 16813000. Mean Reward: 0.5184464944649447. Std of Reward: 0.6183362063726556.
Step: 16814000. Mean Reward: 0.5584019417475728. Std of Reward: 0.5961511729143064.
Step: 16815000. Mean Reward: 0.5884071294559099. Std of Reward: 0.5902099932412012.
Step: 16816000. Mean Reward: 0.5947254174397032. Std of Reward: 0.5930277697849764.
Step: 16817000. Mean Reward: 0.5826407766990291. Std of Reward: 0.5817850744908876.
Step: 16818000. Mean Reward: 0.5318375241779498. Std of Reward: 0.6059542634112645.
Step: 16819000. Mean Reward: 0.5688870056497175. Std of Reward: 0.5979932467864039.
Step: 16820000. Mean Reward: 0.5088593155893536. Std of Reward: 0.6207148883370963.
Step: 16821000. Mean Reward: 0.5136541501976284. Std of Reward: 0.6148254491781259.
Step: 16822000. Mean Reward: 0.5747577519379845. Std of Reward: 0.5911240060602196.
Step: 16823000. Mean Reward: 0.5353231357552583. Std of Reward: 0.6011426983

Step: 16910000. Mean Reward: 0.5969116541353384. Std of Reward: 0.5817474984642773.
Step: 16911000. Mean Reward: 0.5382876712328767. Std of Reward: 0.6049701917457083.
Step: 16912000. Mean Reward: 0.4621463878326996. Std of Reward: 0.6420210865141248.
Step: 16913000. Mean Reward: 0.5280280373831776. Std of Reward: 0.6084581626229558.
Step: 16914000. Mean Reward: 0.535518737672584. Std of Reward: 0.6091114500029032.
Step: 16915000. Mean Reward: 0.5527074569789675. Std of Reward: 0.5963783723313132.
Step: 16916000. Mean Reward: 0.5106986564299424. Std of Reward: 0.6199366968009083.
Step: 16917000. Mean Reward: 0.5462283609576427. Std of Reward: 0.6083652425091871.
Step: 16918000. Mean Reward: 0.5417760910815939. Std of Reward: 0.6040951858010954.
Step: 16919000. Mean Reward: 0.5499105058365759. Std of Reward: 0.6113496294451329.
Step: 16920000. Mean Reward: 0.5018683729433272. Std of Reward: 0.6257939316033186.
Step: 16921000. Mean Reward: 0.5363148854961832. Std of Reward: 0.61422916541

Step: 17008000. Mean Reward: 0.5441409001956948. Std of Reward: 0.5997153888819362.
Step: 17009000. Mean Reward: 0.5209579349904397. Std of Reward: 0.6112340572974683.
Step: 17010000. Mean Reward: 0.4943996212121212. Std of Reward: 0.6262869971529721.
Step: 17011000. Mean Reward: 0.4871573705179283. Std of Reward: 0.626404993810216.
Step: 17012000. Mean Reward: 0.518803370786517. Std of Reward: 0.6151508270242655.
Step: 17013000. Mean Reward: 0.5283886718749999. Std of Reward: 0.6054100909557493.
Step: 17014000. Mean Reward: 0.6002898832684824. Std of Reward: 0.5785932285629489.
Step: 17015000. Mean Reward: 0.517327619047619. Std of Reward: 0.6228496442420324.
Step: 17016000. Mean Reward: 0.5527554671968191. Std of Reward: 0.5923505438875424.
Step: 17017000. Mean Reward: 0.553708984375. Std of Reward: 0.5968745543824775.
Step: 17018000. Mean Reward: 0.549882470119522. Std of Reward: 0.6022308958562147.
Step: 17019000. Mean Reward: 0.5432875939849624. Std of Reward: 0.5887210251767713.


Step: 17107000. Mean Reward: 0.5203949416342413. Std of Reward: 0.6138643382158372.
Step: 17108000. Mean Reward: 0.5205749063670412. Std of Reward: 0.6155135526758229.
Step: 17109000. Mean Reward: 0.5196694214876033. Std of Reward: 0.605755639552847.
Step: 17110000. Mean Reward: 0.48424599999999995. Std of Reward: 0.6276499864446744.
Step: 17111000. Mean Reward: 0.47955705996131526. Std of Reward: 0.6290288015699352.
Step: 17112000. Mean Reward: 0.487625968992248. Std of Reward: 0.6281780422272528.
Step: 17113000. Mean Reward: 0.509488416988417. Std of Reward: 0.6176566304884107.
Step: 17114000. Mean Reward: 0.5395973782771535. Std of Reward: 0.6179947220426136.
Step: 17115000. Mean Reward: 0.5839480769230769. Std of Reward: 0.5881316238048917.
Step: 17116000. Mean Reward: 0.5316717850287908. Std of Reward: 0.6110403526940116.
Step: 17117000. Mean Reward: 0.532009671179884. Std of Reward: 0.6131986576281077.
Step: 17118000. Mean Reward: 0.548158964879852. Std of Reward: 0.6136250975248

Step: 17205000. Mean Reward: 0.5185402930402931. Std of Reward: 0.612164052465554.
Step: 17206000. Mean Reward: 0.48563565891472865. Std of Reward: 0.6307808552981077.
Step: 17207000. Mean Reward: 0.5227753623188406. Std of Reward: 0.6192021575033091.
Step: 17208000. Mean Reward: 0.5220956022944552. Std of Reward: 0.6158076218991392.
Step: 17209000. Mean Reward: 0.5049942084942085. Std of Reward: 0.6248189065558312.
Step: 17210000. Mean Reward: 0.5028905660377359. Std of Reward: 0.6222692520014725.
Step: 17211000. Mean Reward: 0.5029904580152672. Std of Reward: 0.6258556356113667.
Step: 17212000. Mean Reward: 0.5280616570327552. Std of Reward: 0.6115122920178866.
Step: 17213000. Mean Reward: 0.5358983050847458. Std of Reward: 0.607315665318918.
Step: 17214000. Mean Reward: 0.5511836734693878. Std of Reward: 0.6044915752881076.
Step: 17215000. Mean Reward: 0.4832082551594747. Std of Reward: 0.6290287755995171.
Step: 17216000. Mean Reward: 0.5533866666666667. Std of Reward: 0.59953789904

Step: 17303000. Mean Reward: 0.5726. Std of Reward: 0.5850195438330814.
Step: 17304000. Mean Reward: 0.5835526315789472. Std of Reward: 0.58008614463702.
Step: 17305000. Mean Reward: 0.5283001876172608. Std of Reward: 0.613768582092585.
Step: 17306000. Mean Reward: 0.4725343511450381. Std of Reward: 0.6354931312078559.
Step: 17307000. Mean Reward: 0.5998745247148288. Std of Reward: 0.5700405357453406.
Step: 17308000. Mean Reward: 0.5363705772811919. Std of Reward: 0.6097622262342305.
Step: 17309000. Mean Reward: 0.5281489757914338. Std of Reward: 0.6129009478269567.
Step: 17310000. Mean Reward: 0.5262662942271881. Std of Reward: 0.6112385066037386.
Step: 17311000. Mean Reward: 0.5521156862745098. Std of Reward: 0.5870742381730694.
Step: 17312000. Mean Reward: 0.532552783109405. Std of Reward: 0.6114940590537975.
Step: 17313000. Mean Reward: 0.5498831417624521. Std of Reward: 0.6105363383483158.
Step: 17314000. Mean Reward: 0.540229445506692. Std of Reward: 0.6081471098359309.
Step: 173

Step: 17401000. Mean Reward: 0.473556862745098. Std of Reward: 0.6108950294139793.
Step: 17402000. Mean Reward: 0.5526564885496182. Std of Reward: 0.5902880784732737.
Step: 17403000. Mean Reward: 0.5490888468809074. Std of Reward: 0.583329283164942.
Step: 17404000. Mean Reward: 0.5546500956022944. Std of Reward: 0.5809983445928956.
Step: 17405000. Mean Reward: 0.5488333333333333. Std of Reward: 0.5916563647661148.
Step: 17406000. Mean Reward: 0.5017741935483871. Std of Reward: 0.6074052258618095.
Step: 17407000. Mean Reward: 0.5908187134502925. Std of Reward: 0.5670317058501054.
Step: 17408000. Mean Reward: 0.5747343173431734. Std of Reward: 0.5788501258167897.
Step: 17409000. Mean Reward: 0.5503287401574803. Std of Reward: 0.5830597984290137.
Step: 17410000. Mean Reward: 0.5627570093457944. Std of Reward: 0.5906525021861158.
Step: 17411000. Mean Reward: 0.5150186567164179. Std of Reward: 0.5994455551028166.
Step: 17412000. Mean Reward: 0.5354474187380498. Std of Reward: 0.597061294905

Step: 17499000. Mean Reward: 0.5402392156862745. Std of Reward: 0.5891362750832719.
Step: 17500000. Mean Reward: 0.5464255319148935. Std of Reward: 0.5894535783674987.
Saved Model
Step: 17501000. Mean Reward: 0.5239252873563218. Std of Reward: 0.6093545641410072.
Step: 17502000. Mean Reward: 0.5347014925373135. Std of Reward: 0.6045844697331784.
Step: 17503000. Mean Reward: 0.554527724665392. Std of Reward: 0.5806187152026535.
Step: 17504000. Mean Reward: 0.5098562992125985. Std of Reward: 0.6005640172362166.
Step: 17505000. Mean Reward: 0.503773156899811. Std of Reward: 0.6094024169792.
Step: 17506000. Mean Reward: 0.5391787072243346. Std of Reward: 0.5902329004799505.
Step: 17507000. Mean Reward: 0.577878277153558. Std of Reward: 0.577802567734799.
Step: 17508000. Mean Reward: 0.5416395563770795. Std of Reward: 0.5937024758114506.
Step: 17509000. Mean Reward: 0.620516544117647. Std of Reward: 0.5629572550823211.
Step: 17510000. Mean Reward: 0.5416839186691313. Std of Reward: 0.599026

Step: 17597000. Mean Reward: 0.5412399267399267. Std of Reward: 0.5985336164779779.
Step: 17598000. Mean Reward: 0.5891518518518518. Std of Reward: 0.5679830976034739.
Step: 17599000. Mean Reward: 0.5018689788053949. Std of Reward: 0.6038258261332552.
Step: 17600000. Mean Reward: 0.5654829545454546. Std of Reward: 0.5878998668968477.
Saved Model
Step: 17601000. Mean Reward: 0.5703121387283236. Std of Reward: 0.5820717159688131.
Step: 17602000. Mean Reward: 0.5877834274952918. Std of Reward: 0.5673995860028116.
Step: 17603000. Mean Reward: 0.5305820610687022. Std of Reward: 0.6031443997336816.
Step: 17604000. Mean Reward: 0.5816425925925925. Std of Reward: 0.5863663278259533.
Step: 17605000. Mean Reward: 0.5299845857418112. Std of Reward: 0.6084251091486567.
Step: 17606000. Mean Reward: 0.5857684824902724. Std of Reward: 0.5724741671731368.
Step: 17607000. Mean Reward: 0.5055936920222635. Std of Reward: 0.6183637214913924.
Step: 17608000. Mean Reward: 0.54353861003861. Std of Reward: 0.

Step: 17695000. Mean Reward: 0.5587221135029354. Std of Reward: 0.594331215747559.
Step: 17696000. Mean Reward: 0.49795294117647054. Std of Reward: 0.6118388658670411.
Step: 17697000. Mean Reward: 0.5456309523809524. Std of Reward: 0.6053933414985988.
Step: 17698000. Mean Reward: 0.518203094777563. Std of Reward: 0.6052149604129404.
Step: 17699000. Mean Reward: 0.5374788732394367. Std of Reward: 0.6052229514668387.
Step: 17700000. Mean Reward: 0.4898775100401606. Std of Reward: 0.6200331244035721.
Saved Model
Step: 17701000. Mean Reward: 0.5450747663551402. Std of Reward: 0.6046224696937671.
Step: 17702000. Mean Reward: 0.5314783464566929. Std of Reward: 0.6102580788154062.
Step: 17703000. Mean Reward: 0.5267342256214148. Std of Reward: 0.6094411006624013.
Step: 17704000. Mean Reward: 0.5711932938856016. Std of Reward: 0.5851839796625052.
Step: 17705000. Mean Reward: 0.5489464285714286. Std of Reward: 0.5969149416180897.
Step: 17706000. Mean Reward: 0.5493862332695983. Std of Reward: 0

Step: 17793000. Mean Reward: 0.5592504854368933. Std of Reward: 0.5844674028755035.
Step: 17794000. Mean Reward: 0.5673227611940298. Std of Reward: 0.591649340798249.
Step: 17795000. Mean Reward: 0.5274981060606061. Std of Reward: 0.6029883378660599.
Step: 17796000. Mean Reward: 0.5613085714285715. Std of Reward: 0.5955516338599008.
Step: 17797000. Mean Reward: 0.5544346978557504. Std of Reward: 0.6014187969479958.
Step: 17798000. Mean Reward: 0.5673219696969697. Std of Reward: 0.5858693379790404.
Step: 17799000. Mean Reward: 0.5328944337811901. Std of Reward: 0.6113855307142245.
Step: 17800000. Mean Reward: 0.567467680608365. Std of Reward: 0.5902124914614482.
Saved Model
Step: 17801000. Mean Reward: 0.5805352941176471. Std of Reward: 0.5919808271681756.
Step: 17802000. Mean Reward: 0.5230307692307692. Std of Reward: 0.613119974311234.
Step: 17803000. Mean Reward: 0.5492840236686389. Std of Reward: 0.600929167293227.
Step: 17804000. Mean Reward: 0.58613671875. Std of Reward: 0.5789482

Step: 17892000. Mean Reward: 0.5411644359464627. Std of Reward: 0.5849042703431047.
Step: 17893000. Mean Reward: 0.5396146245059288. Std of Reward: 0.5956825026604753.
Step: 17894000. Mean Reward: 0.571094696969697. Std of Reward: 0.5771621589286243.
Step: 17895000. Mean Reward: 0.5347559523809524. Std of Reward: 0.597840065579573.
Step: 17896000. Mean Reward: 0.5921686046511628. Std of Reward: 0.5727257625529442.
Step: 17897000. Mean Reward: 0.5409299610894942. Std of Reward: 0.5895750818985809.
Step: 17898000. Mean Reward: 0.4971860465116279. Std of Reward: 0.6119201798208381.
Step: 17899000. Mean Reward: 0.5351906693711967. Std of Reward: 0.5984186769605806.
Step: 17900000. Mean Reward: 0.5621240458015268. Std of Reward: 0.5880348514510311.
Saved Model
Step: 17901000. Mean Reward: 0.5236921568627452. Std of Reward: 0.6033516073529165.
Step: 17902000. Mean Reward: 0.5095542857142857. Std of Reward: 0.6025253011132666.
Step: 17903000. Mean Reward: 0.5123415637860083. Std of Reward: 0.

Step: 17990000. Mean Reward: 0.5565869980879541. Std of Reward: 0.5934661847251385.
Step: 17991000. Mean Reward: 0.519894. Std of Reward: 0.6033572397543598.
Step: 17992000. Mean Reward: 0.5292573099415205. Std of Reward: 0.5986713652557378.
Step: 17993000. Mean Reward: 0.5282278978388997. Std of Reward: 0.5895134945387778.
Step: 17994000. Mean Reward: 0.6019308300395256. Std of Reward: 0.5594361195841384.
Step: 17995000. Mean Reward: 0.4816213991769547. Std of Reward: 0.6069076687518554.
Step: 17996000. Mean Reward: 0.5190306122448979. Std of Reward: 0.5784611227139314.
Step: 17997000. Mean Reward: 0.5267292490118577. Std of Reward: 0.5953305155468962.
Step: 17998000. Mean Reward: 0.5357170542635659. Std of Reward: 0.5780101335749691.
Step: 17999000. Mean Reward: 0.5940199600798403. Std of Reward: 0.5670226892642987.
Step: 18000000. Mean Reward: 0.5207063492063493. Std of Reward: 0.5959369482662523.
Saved Model
Step: 18001000. Mean Reward: 0.528074297188755. Std of Reward: 0.597193708

Step: 18088000. Mean Reward: 0.5607302504816956. Std of Reward: 0.5778806075647107.
Step: 18089000. Mean Reward: 0.5664173076923078. Std of Reward: 0.585744957042264.
Step: 18090000. Mean Reward: 0.5420509803921569. Std of Reward: 0.5785660683450209.
Step: 18091000. Mean Reward: 0.5825325670498084. Std of Reward: 0.5752290230995091.
Step: 18092000. Mean Reward: 0.5425266272189349. Std of Reward: 0.5876841998207873.
Step: 18093000. Mean Reward: 0.5545834932821497. Std of Reward: 0.5790418961535001.
Step: 18094000. Mean Reward: 0.5860522243713734. Std of Reward: 0.5648497439579877.
Step: 18095000. Mean Reward: 0.545251923076923. Std of Reward: 0.5842273957940536.
Step: 18096000. Mean Reward: 0.5715467196819086. Std of Reward: 0.5778585349251665.
Step: 18097000. Mean Reward: 0.5546757894736842. Std of Reward: 0.5864603330626512.
Step: 18098000. Mean Reward: 0.5326564299424185. Std of Reward: 0.5947950751173043.
Step: 18099000. Mean Reward: 0.5289768339768339. Std of Reward: 0.599441840964

Step: 18186000. Mean Reward: 0.5398085539714867. Std of Reward: 0.5962018207002217.
Step: 18187000. Mean Reward: 0.5532834951456311. Std of Reward: 0.5848496954196387.
Step: 18188000. Mean Reward: 0.5784217311233887. Std of Reward: 0.5964617163313476.
Step: 18189000. Mean Reward: 0.5795933456561922. Std of Reward: 0.5771097765171312.
Step: 18190000. Mean Reward: 0.5406641509433963. Std of Reward: 0.5994243122140908.
Step: 18191000. Mean Reward: 0.5871007905138339. Std of Reward: 0.5722675026457079.
Step: 18192000. Mean Reward: 0.5668738049713193. Std of Reward: 0.5858010639803195.
Step: 18193000. Mean Reward: 0.5936448598130841. Std of Reward: 0.5781676834077895.
Step: 18194000. Mean Reward: 0.5485836575875486. Std of Reward: 0.5908156416166044.
Step: 18195000. Mean Reward: 0.5713542435424354. Std of Reward: 0.5884309833047446.
Step: 18196000. Mean Reward: 0.5774903474903474. Std of Reward: 0.5890957494587625.
Step: 18197000. Mean Reward: 0.5543793103448276. Std of Reward: 0.5879356034

Step: 18284000. Mean Reward: 0.5339014598540146. Std of Reward: 0.6168916943456229.
Step: 18285000. Mean Reward: 0.5491941923774954. Std of Reward: 0.6155196471060295.
Step: 18286000. Mean Reward: 0.5185605468750001. Std of Reward: 0.614340573366334.
Step: 18287000. Mean Reward: 0.5722470355731225. Std of Reward: 0.5855312680386044.
Step: 18288000. Mean Reward: 0.5577762376237624. Std of Reward: 0.6006367375747661.
Step: 18289000. Mean Reward: 0.5015703125. Std of Reward: 0.6208627431545578.
Step: 18290000. Mean Reward: 0.5542611218568665. Std of Reward: 0.6089375339145716.
Step: 18291000. Mean Reward: 0.5467816764132554. Std of Reward: 0.6018227115326306.
Step: 18292000. Mean Reward: 0.5612175572519085. Std of Reward: 0.6069476806282295.
Step: 18293000. Mean Reward: 0.49814885496183203. Std of Reward: 0.6254272316720864.
Step: 18294000. Mean Reward: 0.5952744360902256. Std of Reward: 0.5840674623917018.
Step: 18295000. Mean Reward: 0.5445500945179584. Std of Reward: 0.6142817880788893

Step: 18382000. Mean Reward: 0.5257737226277372. Std of Reward: 0.6250988592495167.
Step: 18383000. Mean Reward: 0.5388007181328546. Std of Reward: 0.6097643493375151.
Step: 18384000. Mean Reward: 0.585011320754717. Std of Reward: 0.5897721113006844.
Step: 18385000. Mean Reward: 0.5182367424242424. Std of Reward: 0.6254963898582884.
Step: 18386000. Mean Reward: 0.5177732342007435. Std of Reward: 0.6313629505625679.
Step: 18387000. Mean Reward: 0.5570389380530973. Std of Reward: 0.6179736987033815.
Step: 18388000. Mean Reward: 0.5896328413284133. Std of Reward: 0.5859461014711134.
Step: 18389000. Mean Reward: 0.5567478260869565. Std of Reward: 0.6120831805290671.
Step: 18390000. Mean Reward: 0.5707254901960783. Std of Reward: 0.6008499679642634.
Step: 18391000. Mean Reward: 0.5803202979515829. Std of Reward: 0.5999048448890262.
Step: 18392000. Mean Reward: 0.548232645403377. Std of Reward: 0.6128561975367719.
Step: 18393000. Mean Reward: 0.5167303370786517. Std of Reward: 0.612900813086

Step: 18480000. Mean Reward: 0.5293901192504258. Std of Reward: 0.626010876761743.
Step: 18481000. Mean Reward: 0.5301062717770034. Std of Reward: 0.6236358248340244.
Step: 18482000. Mean Reward: 0.5646505376344086. Std of Reward: 0.6110433213775185.
Step: 18483000. Mean Reward: 0.4965091575091575. Std of Reward: 0.6262891561084203.
Step: 18484000. Mean Reward: 0.5747736842105263. Std of Reward: 0.5985595073748998.
Step: 18485000. Mean Reward: 0.5720644567219152. Std of Reward: 0.6047501390259954.
Step: 18486000. Mean Reward: 0.5515671641791045. Std of Reward: 0.6113160924529618.
Step: 18487000. Mean Reward: 0.5288301526717557. Std of Reward: 0.6124804684276227.
Step: 18488000. Mean Reward: 0.5577527881040891. Std of Reward: 0.6114709155159928.
Step: 18489000. Mean Reward: 0.5506783088235293. Std of Reward: 0.6098626453121555.
Step: 18490000. Mean Reward: 0.5354055555555556. Std of Reward: 0.6156174921457337.
Step: 18491000. Mean Reward: 0.5304452296819787. Std of Reward: 0.61992658547

Step: 18578000. Mean Reward: 0.5550924528301886. Std of Reward: 0.6022222564547539.
Step: 18579000. Mean Reward: 0.5589965277777778. Std of Reward: 0.6110407674062248.
Step: 18580000. Mean Reward: 0.5166447876447876. Std of Reward: 0.6156881585455558.
Step: 18581000. Mean Reward: 0.5199204545454545. Std of Reward: 0.6087086814101108.
Step: 18582000. Mean Reward: 0.5274735849056604. Std of Reward: 0.6174710139559814.
Step: 18583000. Mean Reward: 0.5139539594843461. Std of Reward: 0.6273501550435174.
Step: 18584000. Mean Reward: 0.5384731800766283. Std of Reward: 0.6047654561427322.
Step: 18585000. Mean Reward: 0.5745801526717557. Std of Reward: 0.5918132855467128.
Step: 18586000. Mean Reward: 0.5078181818181818. Std of Reward: 0.6144456598580816.
Step: 18587000. Mean Reward: 0.5689416195856872. Std of Reward: 0.5946429227959207.
Step: 18588000. Mean Reward: 0.5985732368896927. Std of Reward: 0.5819023197546265.
Step: 18589000. Mean Reward: 0.5334143126177024. Std of Reward: 0.6243859761

Step: 18676000. Mean Reward: 0.5218867924528302. Std of Reward: 0.6172800337077492.
Step: 18677000. Mean Reward: 0.5217846441947567. Std of Reward: 0.6116807847732577.
Step: 18678000. Mean Reward: 0.5378250950570341. Std of Reward: 0.6088680264152244.
Step: 18679000. Mean Reward: 0.5205684007707129. Std of Reward: 0.6154627710798629.
Step: 18680000. Mean Reward: 0.5445407407407408. Std of Reward: 0.6147280731313217.
Step: 18681000. Mean Reward: 0.5791771428571428. Std of Reward: 0.5918407484091908.
Step: 18682000. Mean Reward: 0.585907942238267. Std of Reward: 0.5838612173524258.
Step: 18683000. Mean Reward: 0.5458957952468008. Std of Reward: 0.5932160528022206.
Step: 18684000. Mean Reward: 0.581585046728972. Std of Reward: 0.594632738111806.
Step: 18685000. Mean Reward: 0.49544029850746274. Std of Reward: 0.6306504787015372.
Step: 18686000. Mean Reward: 0.5295882352941177. Std of Reward: 0.6199654786971631.
Step: 18687000. Mean Reward: 0.5669811676082863. Std of Reward: 0.597942936093

Step: 18774000. Mean Reward: 0.5719406474820145. Std of Reward: 0.588693418276341.
Step: 18775000. Mean Reward: 0.5586290018832392. Std of Reward: 0.5926930747158611.
Step: 18776000. Mean Reward: 0.4974075471698113. Std of Reward: 0.6200280151458334.
Step: 18777000. Mean Reward: 0.5394755639097744. Std of Reward: 0.5957485445641021.
Step: 18778000. Mean Reward: 0.5273496376811594. Std of Reward: 0.6203502247152141.
Step: 18779000. Mean Reward: 0.5413395872420262. Std of Reward: 0.5943781014280729.
Step: 18780000. Mean Reward: 0.5672007233273056. Std of Reward: 0.5969149116831576.
Step: 18781000. Mean Reward: 0.5513445065176908. Std of Reward: 0.5985365505615258.
Step: 18782000. Mean Reward: 0.5371273062730627. Std of Reward: 0.6059166978048399.
Step: 18783000. Mean Reward: 0.5152247619047619. Std of Reward: 0.6158664813215763.
Step: 18784000. Mean Reward: 0.5613618677042801. Std of Reward: 0.5879534919876577.
Step: 18785000. Mean Reward: 0.5478759541984732. Std of Reward: 0.60521636238

Step: 18872000. Mean Reward: 0.5665066413662239. Std of Reward: 0.6000079976170893.
Step: 18873000. Mean Reward: 0.5043614931237721. Std of Reward: 0.619224265142118.
Step: 18874000. Mean Reward: 0.5843176691729324. Std of Reward: 0.5710504687049132.
Step: 18875000. Mean Reward: 0.539279296875. Std of Reward: 0.5998009485259511.
Step: 18876000. Mean Reward: 0.5337779850746269. Std of Reward: 0.6030752047423643.
Step: 18877000. Mean Reward: 0.5344418145956608. Std of Reward: 0.6051669419916685.
Step: 18878000. Mean Reward: 0.5448648111332008. Std of Reward: 0.5954864473357482.
Step: 18879000. Mean Reward: 0.47045756457564575. Std of Reward: 0.6256866641576836.
Step: 18880000. Mean Reward: 0.5174825581395349. Std of Reward: 0.6108152336801871.
Step: 18881000. Mean Reward: 0.5132146892655367. Std of Reward: 0.5963415134072512.
Step: 18882000. Mean Reward: 0.5378926553672317. Std of Reward: 0.6017257492961546.
Step: 18883000. Mean Reward: 0.5482739726027397. Std of Reward: 0.59837452625174

Step: 18970000. Mean Reward: 0.5396468253968253. Std of Reward: 0.5883961910555269.
Step: 18971000. Mean Reward: 0.5467410358565736. Std of Reward: 0.5900709025103384.
Step: 18972000. Mean Reward: 0.4821137724550898. Std of Reward: 0.6177580166179542.
Step: 18973000. Mean Reward: 0.512609375. Std of Reward: 0.5953405240697205.
Step: 18974000. Mean Reward: 0.5944999999999999. Std of Reward: 0.57024184293505.
Step: 18975000. Mean Reward: 0.5542859813084112. Std of Reward: 0.5828888982880026.
Step: 18976000. Mean Reward: 0.5608783783783784. Std of Reward: 0.5900132276146905.
Step: 18977000. Mean Reward: 0.5581594488188977. Std of Reward: 0.5867711261619253.
Step: 18978000. Mean Reward: 0.5308857677902622. Std of Reward: 0.587867198231238.
Step: 18979000. Mean Reward: 0.5301770623742456. Std of Reward: 0.5840750661812486.
Step: 18980000. Mean Reward: 0.4876907020872866. Std of Reward: 0.612850085188414.
Step: 18981000. Mean Reward: 0.545476370510397. Std of Reward: 0.5991103264667338.
Step

Step: 19068000. Mean Reward: 0.5618301158301159. Std of Reward: 0.5840050825038546.
Step: 19069000. Mean Reward: 0.5271517509727626. Std of Reward: 0.5947900230536584.
Step: 19070000. Mean Reward: 0.47830632411067187. Std of Reward: 0.6097515004750272.
Step: 19071000. Mean Reward: 0.5294371584699454. Std of Reward: 0.6130343163388553.
Step: 19072000. Mean Reward: 0.5360220994475138. Std of Reward: 0.6099703850173143.
Step: 19073000. Mean Reward: 0.5484858223062382. Std of Reward: 0.595275650821213.
Step: 19074000. Mean Reward: 0.5165674676524953. Std of Reward: 0.6008137469898309.
Step: 19075000. Mean Reward: 0.5443454894433782. Std of Reward: 0.6008575602423458.
Step: 19076000. Mean Reward: 0.5554232283464566. Std of Reward: 0.5830474819029384.
Step: 19077000. Mean Reward: 0.5520970873786408. Std of Reward: 0.5893819654974629.
Step: 19078000. Mean Reward: 0.49823135755258136. Std of Reward: 0.6197277219406533.
Step: 19079000. Mean Reward: 0.524816091954023. Std of Reward: 0.5909599864

Step: 19166000. Mean Reward: 0.5274811320754716. Std of Reward: 0.6032772858365095.
Step: 19167000. Mean Reward: 0.5673250000000001. Std of Reward: 0.5946467084215118.
Step: 19168000. Mean Reward: 0.5485920792079207. Std of Reward: 0.5995708398650396.
Step: 19169000. Mean Reward: 0.5613834586466167. Std of Reward: 0.584097111503073.
Step: 19170000. Mean Reward: 0.5152240325865581. Std of Reward: 0.6136388665714042.
Step: 19171000. Mean Reward: 0.541284023668639. Std of Reward: 0.6020027670695148.
Step: 19172000. Mean Reward: 0.5866519230769232. Std of Reward: 0.5797774374010933.
Step: 19173000. Mean Reward: 0.5458152380952381. Std of Reward: 0.6004720192174141.
Step: 19174000. Mean Reward: 0.5317576923076923. Std of Reward: 0.5953576066101118.
Step: 19175000. Mean Reward: 0.5735515267175573. Std of Reward: 0.5848537065215713.
Step: 19176000. Mean Reward: 0.591392857142857. Std of Reward: 0.5766360589968735.
Step: 19177000. Mean Reward: 0.5757972440944882. Std of Reward: 0.5797448497225

Step: 19264000. Mean Reward: 0.5415311355311355. Std of Reward: 0.6076191059934732.
Step: 19265000. Mean Reward: 0.5540167910447761. Std of Reward: 0.5916126311321249.
Step: 19266000. Mean Reward: 0.4724803149606299. Std of Reward: 0.6334033756041031.
Step: 19267000. Mean Reward: 0.5428379446640317. Std of Reward: 0.5962770609556118.
Step: 19268000. Mean Reward: 0.506550284629981. Std of Reward: 0.6203292056422818.
Step: 19269000. Mean Reward: 0.5113480662983426. Std of Reward: 0.6244930101561025.
Step: 19270000. Mean Reward: 0.5434204545454545. Std of Reward: 0.60085727686146.
Step: 19271000. Mean Reward: 0.5636915887850468. Std of Reward: 0.6030534300225492.
Step: 19272000. Mean Reward: 0.5321608775137111. Std of Reward: 0.623063898235797.
Step: 19273000. Mean Reward: 0.502996336996337. Std of Reward: 0.6260413800304193.
Step: 19274000. Mean Reward: 0.5400632911392405. Std of Reward: 0.6240122303491613.
Step: 19275000. Mean Reward: 0.5454800724637682. Std of Reward: 0.601355711398735

Step: 19362000. Mean Reward: 0.5688197026022305. Std of Reward: 0.5941026213895365.
Step: 19363000. Mean Reward: 0.49830623818525516. Std of Reward: 0.6207766347292198.
Step: 19364000. Mean Reward: 0.5318218181818182. Std of Reward: 0.6083256641847236.
Step: 19365000. Mean Reward: 0.5455573440643863. Std of Reward: 0.6120501955580804.
Step: 19366000. Mean Reward: 0.5612222222222222. Std of Reward: 0.6052007703855039.
Step: 19367000. Mean Reward: 0.5297071823204419. Std of Reward: 0.6126310412563546.
Step: 19368000. Mean Reward: 0.48276583493282155. Std of Reward: 0.6317229576821556.
Step: 19369000. Mean Reward: 0.5560342960288809. Std of Reward: 0.5951028979198396.
Step: 19370000. Mean Reward: 0.49661948529411754. Std of Reward: 0.6275109124804826.
Step: 19371000. Mean Reward: 0.5388301158301156. Std of Reward: 0.6082617537711531.
Step: 19372000. Mean Reward: 0.5498634686346863. Std of Reward: 0.5928565035130425.
Step: 19373000. Mean Reward: 0.5434466911764706. Std of Reward: 0.6061312

Step: 19460000. Mean Reward: 0.514653024911032. Std of Reward: 0.6258019073917244.
Step: 19461000. Mean Reward: 0.5236387832699619. Std of Reward: 0.6046783473409696.
Step: 19462000. Mean Reward: 0.5304800759013283. Std of Reward: 0.6055694381982059.
Step: 19463000. Mean Reward: 0.5579347014925373. Std of Reward: 0.5978216843327891.
Step: 19464000. Mean Reward: 0.5511230769230768. Std of Reward: 0.6008533324861928.
Step: 19465000. Mean Reward: 0.5235925196850394. Std of Reward: 0.5990017493887781.
Step: 19466000. Mean Reward: 0.5413339552238805. Std of Reward: 0.6111186788123166.
Step: 19467000. Mean Reward: 0.5787550274223034. Std of Reward: 0.5876882656436774.
Step: 19468000. Mean Reward: 0.5524365671641791. Std of Reward: 0.601604362857444.
Step: 19469000. Mean Reward: 0.507825831702544. Std of Reward: 0.6098487725972712.
Step: 19470000. Mean Reward: 0.5103698884758363. Std of Reward: 0.6213374311364271.
Step: 19471000. Mean Reward: 0.5369838129496403. Std of Reward: 0.6026958424570

Step: 19558000. Mean Reward: 0.5547005870841487. Std of Reward: 0.586264994867587.
Step: 19559000. Mean Reward: 0.5772569169960474. Std of Reward: 0.5729804103060135.
Step: 19560000. Mean Reward: 0.5142715105162524. Std of Reward: 0.6016145158724362.
Step: 19561000. Mean Reward: 0.5686847195357834. Std of Reward: 0.5804183637692297.
Step: 19562000. Mean Reward: 0.547320235756385. Std of Reward: 0.6052773233130717.
Step: 19563000. Mean Reward: 0.5088219178082192. Std of Reward: 0.606104995642423.
Step: 19564000. Mean Reward: 0.5685127737226277. Std of Reward: 0.5861746316089954.
Step: 19565000. Mean Reward: 0.5284152046783626. Std of Reward: 0.6115049415107082.
Step: 19566000. Mean Reward: 0.5349529190207156. Std of Reward: 0.6052684884986542.
Step: 19567000. Mean Reward: 0.5606924603174603. Std of Reward: 0.5904609702626166.
Step: 19568000. Mean Reward: 0.5057069943289225. Std of Reward: 0.6090048276665889.
Step: 19569000. Mean Reward: 0.5379459459459458. Std of Reward: 0.5990368209954

Step: 19656000. Mean Reward: 0.49372452830188673. Std of Reward: 0.6336045814423832.
Step: 19657000. Mean Reward: 0.5064225092250922. Std of Reward: 0.6266178002669697.
Step: 19658000. Mean Reward: 0.5144490566037736. Std of Reward: 0.6311178568948616.
Step: 19659000. Mean Reward: 0.5282485768500949. Std of Reward: 0.6038943296301553.
Step: 19660000. Mean Reward: 0.5259744990892532. Std of Reward: 0.6238565053282797.
Step: 19661000. Mean Reward: 0.47956052141527. Std of Reward: 0.6326669104050849.
Step: 19662000. Mean Reward: 0.5198014981273408. Std of Reward: 0.5998107175823008.
Step: 19663000. Mean Reward: 0.5407158671586716. Std of Reward: 0.6098732646815447.
Step: 19664000. Mean Reward: 0.5443907156673113. Std of Reward: 0.5919266988237132.
Step: 19665000. Mean Reward: 0.5182941176470588. Std of Reward: 0.6197844600120422.
Step: 19666000. Mean Reward: 0.5302170542635659. Std of Reward: 0.6168545361581769.
Step: 19667000. Mean Reward: 0.5379631067961166. Std of Reward: 0.60906813159

Step: 19754000. Mean Reward: 0.5380340909090909. Std of Reward: 0.618325058222363.
Step: 19755000. Mean Reward: 0.5670237659963437. Std of Reward: 0.6039368961839382.
Step: 19756000. Mean Reward: 0.544062271062271. Std of Reward: 0.609026271835065.
Step: 19757000. Mean Reward: 0.584810408921933. Std of Reward: 0.596893473807354.
Step: 19758000. Mean Reward: 0.5567428571428571. Std of Reward: 0.6106349753848573.
Step: 19759000. Mean Reward: 0.5367108655616942. Std of Reward: 0.607108461773576.
Step: 19760000. Mean Reward: 0.5378021978021977. Std of Reward: 0.6149748984385098.
Step: 19761000. Mean Reward: 0.4846751824817518. Std of Reward: 0.6364914409067842.
Step: 19762000. Mean Reward: 0.535203036053131. Std of Reward: 0.6182174032149541.
Step: 19763000. Mean Reward: 0.5893722222222222. Std of Reward: 0.5990236843511116.
Step: 19764000. Mean Reward: 0.5009800724637681. Std of Reward: 0.6271901867854716.
Step: 19765000. Mean Reward: 0.5455923913043479. Std of Reward: 0.6199910899430526.

Step: 19852000. Mean Reward: 0.568756227758007. Std of Reward: 0.6115120772693442.
Step: 19853000. Mean Reward: 0.5428185117967331. Std of Reward: 0.6169928505163961.
Step: 19854000. Mean Reward: 0.5610617977528091. Std of Reward: 0.6043917036053938.
Step: 19855000. Mean Reward: 0.47220577617328513. Std of Reward: 0.6427223189813905.
Step: 19856000. Mean Reward: 0.537171270718232. Std of Reward: 0.6292247986830772.
Step: 19857000. Mean Reward: 0.557329650092081. Std of Reward: 0.6047634394256912.
Step: 19858000. Mean Reward: 0.5084808043875686. Std of Reward: 0.6382564179982754.
Step: 19859000. Mean Reward: 0.5348838951310861. Std of Reward: 0.6129617586814057.
Step: 19860000. Mean Reward: 0.5201575091575091. Std of Reward: 0.6223434478905175.
Step: 19861000. Mean Reward: 0.5345072463768116. Std of Reward: 0.6220945102294626.
Step: 19862000. Mean Reward: 0.516556621880998. Std of Reward: 0.6269755611501673.
Step: 19863000. Mean Reward: 0.5328840579710145. Std of Reward: 0.6170263109799

Step: 19950000. Mean Reward: 0.556438077634011. Std of Reward: 0.6002489088995677.
Saved Model
Step: 19951000. Mean Reward: 0.5970461811722912. Std of Reward: 0.5912966556559043.
Step: 19952000. Mean Reward: 0.5271600719424461. Std of Reward: 0.6176294795565226.
Step: 19953000. Mean Reward: 0.5469912126537785. Std of Reward: 0.6156492350280481.
Step: 19954000. Mean Reward: 0.5670593692022264. Std of Reward: 0.5992219911018039.
Step: 19955000. Mean Reward: 0.5594606946983546. Std of Reward: 0.6008818137723668.
Step: 19956000. Mean Reward: 0.5710371024734983. Std of Reward: 0.6017247944911239.
Step: 19957000. Mean Reward: 0.5506498194945849. Std of Reward: 0.6064679807455976.
Step: 19958000. Mean Reward: 0.5746315789473684. Std of Reward: 0.6032630872755226.
Step: 19959000. Mean Reward: 0.5118330308529945. Std of Reward: 0.614347225074021.
Step: 19960000. Mean Reward: 0.6038608058608059. Std of Reward: 0.5808125851023461.
Step: 19961000. Mean Reward: 0.5487953667953668. Std of Reward: 0.

Step: 20048000. Mean Reward: 0.5100490797546012. Std of Reward: 0.6182647410492657.
Step: 20049000. Mean Reward: 0.5312149362477231. Std of Reward: 0.6016065659362746.
Step: 20050000. Mean Reward: 0.5605360623781678. Std of Reward: 0.5983451050053893.
Saved Model
Step: 20051000. Mean Reward: 0.502890977443609. Std of Reward: 0.6105527381060698.
Step: 20052000. Mean Reward: 0.5217570621468927. Std of Reward: 0.6233365051144671.
Step: 20053000. Mean Reward: 0.5533307086614173. Std of Reward: 0.6014811122778977.
Step: 20054000. Mean Reward: 0.5298721374045802. Std of Reward: 0.6063134509696525.
Step: 20055000. Mean Reward: 0.5472563600782778. Std of Reward: 0.6065582694840704.
Step: 20056000. Mean Reward: 0.5190191938579655. Std of Reward: 0.6134023703587476.
Step: 20057000. Mean Reward: 0.5642887189292543. Std of Reward: 0.599521972422755.
Step: 20058000. Mean Reward: 0.5914893203883496. Std of Reward: 0.5841403819359486.
Step: 20059000. Mean Reward: 0.5297504835589942. Std of Reward: 0.

Step: 20146000. Mean Reward: 0.4876735537190082. Std of Reward: 0.6142721057309266.
Step: 20147000. Mean Reward: 0.5135094339622641. Std of Reward: 0.6163269783569619.
Step: 20148000. Mean Reward: 0.567349609375. Std of Reward: 0.5923329346808769.
Step: 20149000. Mean Reward: 0.5277103174603175. Std of Reward: 0.6046669844763911.
Step: 20150000. Mean Reward: 0.5337552986512524. Std of Reward: 0.6031287242706361.
Saved Model
Step: 20151000. Mean Reward: 0.5155752895752895. Std of Reward: 0.6173625377513108.
Step: 20152000. Mean Reward: 0.5126963249516441. Std of Reward: 0.6212446689745037.
Step: 20153000. Mean Reward: 0.486831983805668. Std of Reward: 0.6217049221552753.
Step: 20154000. Mean Reward: 0.5300754352030947. Std of Reward: 0.6114121461758413.
Step: 20155000. Mean Reward: 0.5018988326848249. Std of Reward: 0.611370252053462.
Step: 20156000. Mean Reward: 0.5467753036437246. Std of Reward: 0.5944985606209432.
Step: 20157000. Mean Reward: 0.5221187739463601. Std of Reward: 0.6092

Step: 20244000. Mean Reward: 0.5469542857142857. Std of Reward: 0.6011494337681171.
Step: 20245000. Mean Reward: 0.5141908396946564. Std of Reward: 0.6052624011676538.
Step: 20246000. Mean Reward: 0.5238929292929293. Std of Reward: 0.611041011576795.
Step: 20247000. Mean Reward: 0.499524500907441. Std of Reward: 0.6167572822638391.
Step: 20248000. Mean Reward: 0.5020577689243028. Std of Reward: 0.6172106405032246.
Step: 20249000. Mean Reward: 0.5434438305709024. Std of Reward: 0.6039309749761168.
Step: 20250000. Mean Reward: 0.5485342205323194. Std of Reward: 0.5952563956696384.
Saved Model
Step: 20251000. Mean Reward: 0.5643684210526315. Std of Reward: 0.5827608843097849.
Step: 20252000. Mean Reward: 0.5360721062618595. Std of Reward: 0.5998655814958094.
Step: 20253000. Mean Reward: 0.5682519685039371. Std of Reward: 0.5907280169885123.
Step: 20254000. Mean Reward: 0.5043234200743495. Std of Reward: 0.6139314374566567.
Step: 20255000. Mean Reward: 0.5504189189189188. Std of Reward: 0.

Step: 20342000. Mean Reward: 0.5573394833948339. Std of Reward: 0.5865708520339394.
Step: 20343000. Mean Reward: 0.5542414448669202. Std of Reward: 0.6017054948568096.
Step: 20344000. Mean Reward: 0.5529083665338645. Std of Reward: 0.5805825446953271.
Step: 20345000. Mean Reward: 0.508638671875. Std of Reward: 0.5999739189551168.
Step: 20346000. Mean Reward: 0.5280037243947858. Std of Reward: 0.5921874168465736.
Step: 20347000. Mean Reward: 0.5491324376199617. Std of Reward: 0.5888761635616953.
Step: 20348000. Mean Reward: 0.554335238095238. Std of Reward: 0.5854578525728636.
Step: 20349000. Mean Reward: 0.5190380952380953. Std of Reward: 0.6059423848740473.
Step: 20350000. Mean Reward: 0.5464933837429111. Std of Reward: 0.5911297476025451.
Saved Model
Step: 20351000. Mean Reward: 0.5428847583643123. Std of Reward: 0.5920227829796325.
Step: 20352000. Mean Reward: 0.5424906015037594. Std of Reward: 0.600534140304335.
Step: 20353000. Mean Reward: 0.5236254901960785. Std of Reward: 0.5955

Step: 20441000. Mean Reward: 0.4945187969924812. Std of Reward: 0.6075262499542841.
Step: 20442000. Mean Reward: 0.5306079545454544. Std of Reward: 0.5899166926396532.
Step: 20443000. Mean Reward: 0.5463425742574258. Std of Reward: 0.5831451048718042.
Step: 20444000. Mean Reward: 0.5402739463601534. Std of Reward: 0.5879051840332434.
Step: 20445000. Mean Reward: 0.5500094339622641. Std of Reward: 0.5770556183795919.
Step: 20446000. Mean Reward: 0.5159429133858267. Std of Reward: 0.5943851109566467.
Step: 20447000. Mean Reward: 0.525982. Std of Reward: 0.599891104848205.
Step: 20448000. Mean Reward: 0.5357890772128061. Std of Reward: 0.590794412950987.
Step: 20449000. Mean Reward: 0.554969696969697. Std of Reward: 0.5825531571452583.
Step: 20450000. Mean Reward: 0.5581048689138577. Std of Reward: 0.5903818990749292.
Saved Model
Step: 20451000. Mean Reward: 0.5183757225433525. Std of Reward: 0.6069726478588269.
Step: 20452000. Mean Reward: 0.5114675572519084. Std of Reward: 0.59634372044

Step: 20540000. Mean Reward: 0.5551791590493601. Std of Reward: 0.5852100209611045.
Step: 20541000. Mean Reward: 0.5221724137931034. Std of Reward: 0.6045911865389155.
Step: 20542000. Mean Reward: 0.5375850091407678. Std of Reward: 0.6044310888166721.
Step: 20543000. Mean Reward: 0.580616122840691. Std of Reward: 0.5717885184541776.
Step: 20544000. Mean Reward: 0.540183953033268. Std of Reward: 0.6025264660465349.
Step: 20545000. Mean Reward: 0.5963377358490566. Std of Reward: 0.5657441360225753.
Step: 20546000. Mean Reward: 0.5261760154738878. Std of Reward: 0.6013073007919253.
Step: 20547000. Mean Reward: 0.5413377110694184. Std of Reward: 0.5893481806804096.
Step: 20548000. Mean Reward: 0.5389715370018976. Std of Reward: 0.6006152705140599.
Step: 20549000. Mean Reward: 0.5343763837638376. Std of Reward: 0.606816675991573.
Step: 20550000. Mean Reward: 0.5504402985074628. Std of Reward: 0.5865531240486698.
Saved Model
Step: 20551000. Mean Reward: 0.5233242718446601. Std of Reward: 0.5

Step: 20638000. Mean Reward: 0.5289309701492537. Std of Reward: 0.6048279088628081.
Step: 20639000. Mean Reward: 0.5564885496183205. Std of Reward: 0.5911928246991491.
Step: 20640000. Mean Reward: 0.5271786407766991. Std of Reward: 0.5965436556833177.
Step: 20641000. Mean Reward: 0.5244440078585462. Std of Reward: 0.599249887452173.
Step: 20642000. Mean Reward: 0.5667702702702703. Std of Reward: 0.5938456562445056.
Step: 20643000. Mean Reward: 0.5546698113207548. Std of Reward: 0.5829877587261098.
Step: 20644000. Mean Reward: 0.5266477272727274. Std of Reward: 0.5955679147296644.
Step: 20645000. Mean Reward: 0.5457180952380951. Std of Reward: 0.5863075193073783.
Step: 20646000. Mean Reward: 0.5501870078740158. Std of Reward: 0.5885735579673039.
Step: 20647000. Mean Reward: 0.5651201550387597. Std of Reward: 0.5796443953819017.
Step: 20648000. Mean Reward: 0.5631412639405204. Std of Reward: 0.5877990945752916.
Step: 20649000. Mean Reward: 0.5327892720306513. Std of Reward: 0.59084751262

Step: 20736000. Mean Reward: 0.5546374045801526. Std of Reward: 0.577485618450302.
Step: 20737000. Mean Reward: 0.5343824091778202. Std of Reward: 0.594311317170834.
Step: 20738000. Mean Reward: 0.5479923224568138. Std of Reward: 0.5767007393296445.
Step: 20739000. Mean Reward: 0.5211115384615385. Std of Reward: 0.5971784869295605.
Step: 20740000. Mean Reward: 0.5388291262135921. Std of Reward: 0.5866991669925243.
Step: 20741000. Mean Reward: 0.5478654205607477. Std of Reward: 0.5804819014949053.
Step: 20742000. Mean Reward: 0.5676120218579236. Std of Reward: 0.5859119654504634.
Step: 20743000. Mean Reward: 0.5486920077972709. Std of Reward: 0.5647732304447183.
Step: 20744000. Mean Reward: 0.5577619961612283. Std of Reward: 0.587898783654553.
Step: 20745000. Mean Reward: 0.5593275862068964. Std of Reward: 0.5754279250278321.
Step: 20746000. Mean Reward: 0.5526412213740457. Std of Reward: 0.5779462265155353.
Step: 20747000. Mean Reward: 0.4952969348659003. Std of Reward: 0.6102210656757

Step: 20834000. Mean Reward: 0.5614214559386973. Std of Reward: 0.5727744892000167.
Step: 20835000. Mean Reward: 0.5125153846153847. Std of Reward: 0.5813938554439055.
Step: 20836000. Mean Reward: 0.5027866666666666. Std of Reward: 0.6071009802357221.
Step: 20837000. Mean Reward: 0.5758007380073801. Std of Reward: 0.5685537917991551.
Step: 20838000. Mean Reward: 0.5217291666666666. Std of Reward: 0.594906139577397.
Step: 20839000. Mean Reward: 0.5390777988614801. Std of Reward: 0.5849357735875619.
Step: 20840000. Mean Reward: 0.513801094890511. Std of Reward: 0.6026109967105359.
Step: 20841000. Mean Reward: 0.565124521072797. Std of Reward: 0.5725542335839335.
Step: 20842000. Mean Reward: 0.5650885122410546. Std of Reward: 0.575379178735112.
Step: 20843000. Mean Reward: 0.5380938628158846. Std of Reward: 0.6010105495916505.
Step: 20844000. Mean Reward: 0.5388825757575757. Std of Reward: 0.5905728002741648.
Step: 20845000. Mean Reward: 0.49906213592233006. Std of Reward: 0.6040488632129

Step: 20932000. Mean Reward: 0.5382246093749999. Std of Reward: 0.593079659703276.
Step: 20933000. Mean Reward: 0.5272354085603114. Std of Reward: 0.5850084812906831.
Step: 20934000. Mean Reward: 0.5171736641221375. Std of Reward: 0.59918091546635.
Step: 20935000. Mean Reward: 0.5258462998102468. Std of Reward: 0.5977248618987246.
Step: 20936000. Mean Reward: 0.5381527001862196. Std of Reward: 0.5859818090975757.
Step: 20937000. Mean Reward: 0.5594208695652174. Std of Reward: 0.5938891539813091.
Step: 20938000. Mean Reward: 0.5841177570093459. Std of Reward: 0.5713963018753605.
Step: 20939000. Mean Reward: 0.527017274472169. Std of Reward: 0.5854494686271362.
Step: 20940000. Mean Reward: 0.5128038834951456. Std of Reward: 0.6079656243351823.
Step: 20941000. Mean Reward: 0.5828758992805756. Std of Reward: 0.5694435445470865.
Step: 20942000. Mean Reward: 0.542406779661017. Std of Reward: 0.5833898215354582.
Step: 20943000. Mean Reward: 0.521144424131627. Std of Reward: 0.6036613540139621

Step: 21030000. Mean Reward: 0.49891493383742896. Std of Reward: 0.5985582254764252.
Step: 21031000. Mean Reward: 0.5076507352941176. Std of Reward: 0.6051643427124865.
Step: 21032000. Mean Reward: 0.5210401529636711. Std of Reward: 0.5973055819551462.
Step: 21033000. Mean Reward: 0.5009204771371769. Std of Reward: 0.5865326540886681.
Step: 21034000. Mean Reward: 0.5653805970149254. Std of Reward: 0.5805143087027217.
Step: 21035000. Mean Reward: 0.5484116575591985. Std of Reward: 0.5949257947105727.
Step: 21036000. Mean Reward: 0.486496336996337. Std of Reward: 0.6147847958632985.
Step: 21037000. Mean Reward: 0.5270076923076923. Std of Reward: 0.5821661637784532.
Step: 21038000. Mean Reward: 0.5297124304267161. Std of Reward: 0.5829970745362627.
Step: 21039000. Mean Reward: 0.5597649253731343. Std of Reward: 0.5740395066436463.
Step: 21040000. Mean Reward: 0.511192523364486. Std of Reward: 0.5936091057349017.
Step: 21041000. Mean Reward: 0.5683181818181817. Std of Reward: 0.57332675562

Step: 21128000. Mean Reward: 0.5683351749539595. Std of Reward: 0.5755743468274886.
Step: 21129000. Mean Reward: 0.5888435114503816. Std of Reward: 0.562673357795091.
Step: 21130000. Mean Reward: 0.52325. Std of Reward: 0.583370409358347.
Step: 21131000. Mean Reward: 0.5169743083003953. Std of Reward: 0.5937723021249531.
Step: 21132000. Mean Reward: 0.5405584905660377. Std of Reward: 0.5921267149680495.
Step: 21133000. Mean Reward: 0.5340527306967985. Std of Reward: 0.5886055763636984.
Step: 21134000. Mean Reward: 0.5276635859519409. Std of Reward: 0.593816984777037.
Step: 21135000. Mean Reward: 0.5191344696969697. Std of Reward: 0.5859917219601138.
Step: 21136000. Mean Reward: 0.5438797709923664. Std of Reward: 0.5779323295418924.
Step: 21137000. Mean Reward: 0.534513358778626. Std of Reward: 0.5854597510081719.
Step: 21138000. Mean Reward: 0.5294648956356737. Std of Reward: 0.5929994369804618.
Step: 21139000. Mean Reward: 0.5388745387453876. Std of Reward: 0.5941414368566637.
Step: 2

Step: 21227000. Mean Reward: 0.45947975708502026. Std of Reward: 0.6148615184227973.
Step: 21228000. Mean Reward: 0.5019545454545454. Std of Reward: 0.5907487042141711.
Step: 21229000. Mean Reward: 0.5352730844793714. Std of Reward: 0.5893477081508562.
Step: 21230000. Mean Reward: 0.5639847328244275. Std of Reward: 0.5683066649404777.
Step: 21231000. Mean Reward: 0.5363037974683544. Std of Reward: 0.6003499415945331.
Step: 21232000. Mean Reward: 0.5546067415730337. Std of Reward: 0.5954649324664282.
Step: 21233000. Mean Reward: 0.5378615107913669. Std of Reward: 0.5974146131702607.
Step: 21234000. Mean Reward: 0.548938202247191. Std of Reward: 0.5837071070474831.
Step: 21235000. Mean Reward: 0.5254718045112782. Std of Reward: 0.5931919083926028.
Step: 21236000. Mean Reward: 0.5415841209829867. Std of Reward: 0.5857072510601642.
Step: 21237000. Mean Reward: 0.5408280961182994. Std of Reward: 0.5907650777622178.
Step: 21238000. Mean Reward: 0.5149943820224718. Std of Reward: 0.5876906639

Step: 21325000. Mean Reward: 0.5171211538461539. Std of Reward: 0.5755298083694237.
Step: 21326000. Mean Reward: 0.5452110655737704. Std of Reward: 0.5753766322817723.
Step: 21327000. Mean Reward: 0.5167166666666667. Std of Reward: 0.5889749627609473.
Step: 21328000. Mean Reward: 0.540600386100386. Std of Reward: 0.5758736962684656.
Step: 21329000. Mean Reward: 0.5078171206225681. Std of Reward: 0.5891208460277009.
Step: 21330000. Mean Reward: 0.5356608527131783. Std of Reward: 0.5805922358087847.
Step: 21331000. Mean Reward: 0.5178342541436464. Std of Reward: 0.6041107265642965.
Step: 21332000. Mean Reward: 0.4943371868978805. Std of Reward: 0.6045850692484682.
Step: 21333000. Mean Reward: 0.5667564102564103. Std of Reward: 0.5658380872920603.
Step: 21334000. Mean Reward: 0.5443830188679246. Std of Reward: 0.5821884627221116.
Step: 21335000. Mean Reward: 0.5649847036328872. Std of Reward: 0.5640375376996475.
Step: 21336000. Mean Reward: 0.5409299242424243. Std of Reward: 0.58591242365

Step: 21423000. Mean Reward: 0.5263984374999999. Std of Reward: 0.5826320510172424.
Step: 21424000. Mean Reward: 0.5048710317460318. Std of Reward: 0.5872986122531629.
Step: 21425000. Mean Reward: 0.5458043875685558. Std of Reward: 0.5702572113942193.
Step: 21426000. Mean Reward: 0.4775964912280702. Std of Reward: 0.6014774588269849.
Step: 21427000. Mean Reward: 0.5577495429616087. Std of Reward: 0.5767076965439167.
Step: 21428000. Mean Reward: 0.55358984375. Std of Reward: 0.5743237699323881.
Step: 21429000. Mean Reward: 0.5320596421471172. Std of Reward: 0.5767450419043959.
Step: 21430000. Mean Reward: 0.5413462282398454. Std of Reward: 0.5773942115087747.
Step: 21431000. Mean Reward: 0.5010174418604652. Std of Reward: 0.5926420198140442.
Step: 21432000. Mean Reward: 0.5443201506591336. Std of Reward: 0.5854326768816427.
Step: 21433000. Mean Reward: 0.5383558052434457. Std of Reward: 0.5852567354899357.
Step: 21434000. Mean Reward: 0.5379980582524271. Std of Reward: 0.583683583981785

Step: 21521000. Mean Reward: 0.5521515151515152. Std of Reward: 0.5848959501464378.
Step: 21522000. Mean Reward: 0.5238582230623818. Std of Reward: 0.5976264146656382.
Step: 21523000. Mean Reward: 0.5341123388581953. Std of Reward: 0.5950556315935636.
Step: 21524000. Mean Reward: 0.5103545966228893. Std of Reward: 0.6002120044169607.
Step: 21525000. Mean Reward: 0.577034155597723. Std of Reward: 0.5778719374247175.
Step: 21526000. Mean Reward: 0.5201130268199233. Std of Reward: 0.5881875504502212.
Step: 21527000. Mean Reward: 0.506962962962963. Std of Reward: 0.6014593206674342.
Step: 21528000. Mean Reward: 0.48470192307692306. Std of Reward: 0.6148823289523291.
Step: 21529000. Mean Reward: 0.5710924214417745. Std of Reward: 0.5717578132422535.
Step: 21530000. Mean Reward: 0.5660640301318267. Std of Reward: 0.5867767067815686.
Step: 21531000. Mean Reward: 0.5020018450184501. Std of Reward: 0.6094505222829718.
Step: 21532000. Mean Reward: 0.4981842105263158. Std of Reward: 0.60962284647

Step: 21620000. Mean Reward: 0.5430297397769517. Std of Reward: 0.6081103945038678.
Step: 21621000. Mean Reward: 0.5288081180811808. Std of Reward: 0.6067949160897769.
Step: 21622000. Mean Reward: 0.5193891050583658. Std of Reward: 0.5991949498259014.
Step: 21623000. Mean Reward: 0.5247842778793419. Std of Reward: 0.6054092162287253.
Step: 21624000. Mean Reward: 0.5307055449330784. Std of Reward: 0.6040836757373845.
Step: 21625000. Mean Reward: 0.5227604355716879. Std of Reward: 0.6077650622741031.
Step: 21626000. Mean Reward: 0.5683421550094517. Std of Reward: 0.5973068275660187.
Step: 21627000. Mean Reward: 0.5609084112149533. Std of Reward: 0.5885670076147795.
Step: 21628000. Mean Reward: 0.5622260536398467. Std of Reward: 0.5847915326329679.
Step: 21629000. Mean Reward: 0.5469550561797754. Std of Reward: 0.6020478829883589.
Step: 21630000. Mean Reward: 0.5114514285714286. Std of Reward: 0.6026908265460169.
Step: 21631000. Mean Reward: 0.550848948374761. Std of Reward: 0.59810853083

Step: 21718000. Mean Reward: 0.5633924528301886. Std of Reward: 0.5961239809785857.
Step: 21719000. Mean Reward: 0.5492586872586873. Std of Reward: 0.5938001128389444.
Step: 21720000. Mean Reward: 0.5299458955223881. Std of Reward: 0.5951358546630687.
Step: 21721000. Mean Reward: 0.5309121756487026. Std of Reward: 0.5927415046113098.
Step: 21722000. Mean Reward: 0.516222024866785. Std of Reward: 0.6110368488347577.
Step: 21723000. Mean Reward: 0.5201515151515151. Std of Reward: 0.601182527680748.
Step: 21724000. Mean Reward: 0.5551716417910448. Std of Reward: 0.5991224493749718.
Step: 21725000. Mean Reward: 0.5698981132075471. Std of Reward: 0.5876687593799393.
Step: 21726000. Mean Reward: 0.5603669902912621. Std of Reward: 0.5897268211388671.
Step: 21727000. Mean Reward: 0.5479769673704414. Std of Reward: 0.5908733178782389.
Step: 21728000. Mean Reward: 0.5776803738317756. Std of Reward: 0.5829746060482022.
Step: 21729000. Mean Reward: 0.5167790927021695. Std of Reward: 0.602037279668

Step: 21816000. Mean Reward: 0.540725338491296. Std of Reward: 0.5924457601499592.
Step: 21817000. Mean Reward: 0.5397679245283019. Std of Reward: 0.5884920743839066.
Step: 21818000. Mean Reward: 0.5750095969289827. Std of Reward: 0.5699213350304322.
Step: 21819000. Mean Reward: 0.5334066797642436. Std of Reward: 0.5770147700255855.
Step: 21820000. Mean Reward: 0.5259148936170214. Std of Reward: 0.5933319029926964.
Step: 21821000. Mean Reward: 0.500036750483559. Std of Reward: 0.5992767470369182.
Step: 21822000. Mean Reward: 0.5906666666666667. Std of Reward: 0.5655285006782285.
Step: 21823000. Mean Reward: 0.5496280373831777. Std of Reward: 0.5798971242625056.
Step: 21824000. Mean Reward: 0.5548066914498141. Std of Reward: 0.5776562533362047.
Step: 21825000. Mean Reward: 0.5653267326732674. Std of Reward: 0.5711442173463166.
Step: 21826000. Mean Reward: 0.5665823529411764. Std of Reward: 0.5719479477913169.
Step: 21827000. Mean Reward: 0.5281121856866537. Std of Reward: 0.589327068446

Step: 21914000. Mean Reward: 0.5727954110898662. Std of Reward: 0.5673338085324985.
Step: 21915000. Mean Reward: 0.5630960451977401. Std of Reward: 0.5838852338372762.
Step: 21916000. Mean Reward: 0.5557382297551788. Std of Reward: 0.5651052942799524.
Step: 21917000. Mean Reward: 0.5341174242424243. Std of Reward: 0.5917940794091037.
Step: 21918000. Mean Reward: 0.5557863894139886. Std of Reward: 0.5731705553859547.
Step: 21919000. Mean Reward: 0.5664093457943925. Std of Reward: 0.5825428328133382.
Step: 21920000. Mean Reward: 0.5320722656250001. Std of Reward: 0.5888863601479528.
Step: 21921000. Mean Reward: 0.5593828571428571. Std of Reward: 0.5841539915136766.
Step: 21922000. Mean Reward: 0.5153953488372094. Std of Reward: 0.5934326228038227.
Step: 21923000. Mean Reward: 0.49886083499005957. Std of Reward: 0.5838739553554722.
Step: 21924000. Mean Reward: 0.5546639676113361. Std of Reward: 0.5780821009357708.
Step: 21925000. Mean Reward: 0.5503378640776698. Std of Reward: 0.574586972

Step: 22012000. Mean Reward: 0.5296685823754789. Std of Reward: 0.5899632426772459.
Step: 22013000. Mean Reward: 0.5602709923664122. Std of Reward: 0.5768406078532274.
Step: 22014000. Mean Reward: 0.5576917148362235. Std of Reward: 0.580767561798129.
Step: 22015000. Mean Reward: 0.5191314285714286. Std of Reward: 0.5896185949310244.
Step: 22016000. Mean Reward: 0.5569710424710425. Std of Reward: 0.5832887298797267.
Step: 22017000. Mean Reward: 0.5264520547945205. Std of Reward: 0.5960403205725465.
Step: 22018000. Mean Reward: 0.5206627450980392. Std of Reward: 0.5902727883703462.
Step: 22019000. Mean Reward: 0.5559959999999999. Std of Reward: 0.5779893805114416.
Step: 22020000. Mean Reward: 0.5421178781925343. Std of Reward: 0.5895520975216338.
Step: 22021000. Mean Reward: 0.5087241379310345. Std of Reward: 0.6049824104910548.
Step: 22022000. Mean Reward: 0.4962188099808061. Std of Reward: 0.5986426858869607.
Step: 22023000. Mean Reward: 0.5737390438247012. Std of Reward: 0.57375412716

Step: 22110000. Mean Reward: 0.5497415730337079. Std of Reward: 0.5784941454212515.
Step: 22111000. Mean Reward: 0.5557537878787878. Std of Reward: 0.569557882751028.
Step: 22112000. Mean Reward: 0.5711401273885351. Std of Reward: 0.5662225574900734.
Step: 22113000. Mean Reward: 0.4912093023255814. Std of Reward: 0.6003957341519719.
Step: 22114000. Mean Reward: 0.5448148854961832. Std of Reward: 0.5713601473875272.
Step: 22115000. Mean Reward: 0.5148596837944664. Std of Reward: 0.5942142469012783.
Step: 22116000. Mean Reward: 0.550630859375. Std of Reward: 0.5636261144928648.
Step: 22117000. Mean Reward: 0.548948. Std of Reward: 0.5776634533151634.
Step: 22118000. Mean Reward: 0.5287408163265306. Std of Reward: 0.5950611666102039.
Step: 22119000. Mean Reward: 0.5503832684824903. Std of Reward: 0.5732774392612802.
Step: 22120000. Mean Reward: 0.533046. Std of Reward: 0.5863918450012756.
Step: 22121000. Mean Reward: 0.5774962686567164. Std of Reward: 0.5630273249521675.
Step: 22122000. M

Step: 22208000. Mean Reward: 0.5215703564727954. Std of Reward: 0.5951702936520262.
Step: 22209000. Mean Reward: 0.5296439114391144. Std of Reward: 0.5926553983314558.
Step: 22210000. Mean Reward: 0.5585194805194805. Std of Reward: 0.5788634801537923.
Step: 22211000. Mean Reward: 0.502834879406308. Std of Reward: 0.598953113134354.
Step: 22212000. Mean Reward: 0.5650920000000001. Std of Reward: 0.5643209260128496.
Step: 22213000. Mean Reward: 0.5000209125475286. Std of Reward: 0.5919246862257274.
Step: 22214000. Mean Reward: 0.5996274131274131. Std of Reward: 0.5686765627258558.
Step: 22215000. Mean Reward: 0.559443359375. Std of Reward: 0.5773170339850883.
Step: 22216000. Mean Reward: 0.5708620037807183. Std of Reward: 0.575905403592552.
Step: 22217000. Mean Reward: 0.5732462962962963. Std of Reward: 0.5648979295586203.
Step: 22218000. Mean Reward: 0.586316287878788. Std of Reward: 0.561821675478737.
Step: 22219000. Mean Reward: 0.5798003802281368. Std of Reward: 0.5618420884868743.
S

Step: 22306000. Mean Reward: 0.5193032629558542. Std of Reward: 0.6036816541104315.
Step: 22307000. Mean Reward: 0.5604386617100372. Std of Reward: 0.5866549312154451.
Step: 22308000. Mean Reward: 0.5246296992481203. Std of Reward: 0.5966618481163798.
Step: 22309000. Mean Reward: 0.530396449704142. Std of Reward: 0.5969383478644201.
Step: 22310000. Mean Reward: 0.5554751908396945. Std of Reward: 0.5861031952577769.
Step: 22311000. Mean Reward: 0.5106974951830443. Std of Reward: 0.6073359266161179.
Step: 22312000. Mean Reward: 0.5251925233644859. Std of Reward: 0.5826479919822279.
Step: 22313000. Mean Reward: 0.4962437137330754. Std of Reward: 0.604316365344951.
Step: 22314000. Mean Reward: 0.4981996268656716. Std of Reward: 0.6134647103303957.
Step: 22315000. Mean Reward: 0.5275756385068763. Std of Reward: 0.5806067152856643.
Step: 22316000. Mean Reward: 0.48773694779116467. Std of Reward: 0.5979463155379275.
Step: 22317000. Mean Reward: 0.5139723247232472. Std of Reward: 0.59359500864

Step: 22404000. Mean Reward: 0.5370825147347741. Std of Reward: 0.5697910148966351.
Step: 22405000. Mean Reward: 0.5148644400785856. Std of Reward: 0.585229674066001.
Step: 22406000. Mean Reward: 0.5521775700934579. Std of Reward: 0.574206850173889.
Step: 22407000. Mean Reward: 0.5115352380952382. Std of Reward: 0.5816510172785171.
Step: 22408000. Mean Reward: 0.4856288461538461. Std of Reward: 0.5994553832234654.
Step: 22409000. Mean Reward: 0.48697227722772274. Std of Reward: 0.5917828267894677.
Step: 22410000. Mean Reward: 0.5334271653543307. Std of Reward: 0.5805662018873081.
Step: 22411000. Mean Reward: 0.5610038759689923. Std of Reward: 0.5729090698449085.
Step: 22412000. Mean Reward: 0.5721576923076923. Std of Reward: 0.567929994787465.
Step: 22413000. Mean Reward: 0.568904397705545. Std of Reward: 0.5785492987217303.
Step: 22414000. Mean Reward: 0.5167726432532347. Std of Reward: 0.5825733959061273.
Step: 22415000. Mean Reward: 0.5322178988326847. Std of Reward: 0.5813807521575

Step: 22502000. Mean Reward: 0.5342602739726029. Std of Reward: 0.5750918966530576.
Step: 22503000. Mean Reward: 0.5681091234347048. Std of Reward: 0.5705193760041726.
Step: 22504000. Mean Reward: 0.5467467411545623. Std of Reward: 0.5741403476846694.
Step: 22505000. Mean Reward: 0.5353773946360153. Std of Reward: 0.5727505368802899.
Step: 22506000. Mean Reward: 0.5405880077369439. Std of Reward: 0.5599914707240208.
Step: 22507000. Mean Reward: 0.5249577735124761. Std of Reward: 0.5686861968954237.
Step: 22508000. Mean Reward: 0.5488649155722326. Std of Reward: 0.5672657408803986.
Step: 22509000. Mean Reward: 0.5467630597014924. Std of Reward: 0.5804171185844386.
Step: 22510000. Mean Reward: 0.5301015037593985. Std of Reward: 0.5798006017738232.
Step: 22511000. Mean Reward: 0.5451578947368421. Std of Reward: 0.5786991484290955.
Step: 22512000. Mean Reward: 0.5362032193158954. Std of Reward: 0.5662609413262835.
Step: 22513000. Mean Reward: 0.508403738317757. Std of Reward: 0.58631192424

Step: 22600000. Mean Reward: 0.5616817359855335. Std of Reward: 0.573446217329174.
Saved Model
Step: 22601000. Mean Reward: 0.5840295202952029. Std of Reward: 0.5679044335299884.
Step: 22602000. Mean Reward: 0.5147883211678832. Std of Reward: 0.6064152227190184.
Step: 22603000. Mean Reward: 0.5637077534791253. Std of Reward: 0.5872638208741253.
Step: 22604000. Mean Reward: 0.5295598526703499. Std of Reward: 0.5910023009381257.
Step: 22605000. Mean Reward: 0.5513225806451613. Std of Reward: 0.5867134327882129.
Step: 22606000. Mean Reward: 0.5324665391969408. Std of Reward: 0.5756432301534377.
Step: 22607000. Mean Reward: 0.5295711645101664. Std of Reward: 0.5872750798623202.
Step: 22608000. Mean Reward: 0.5860859232175502. Std of Reward: 0.5557254937710459.
Step: 22609000. Mean Reward: 0.57493. Std of Reward: 0.5662557594409084.
Step: 22610000. Mean Reward: 0.5642657992565056. Std of Reward: 0.5600227541027569.
Step: 22611000. Mean Reward: 0.5572232824427481. Std of Reward: 0.5755045792

Step: 22698000. Mean Reward: 0.5057866666666667. Std of Reward: 0.5979723358892859.
Step: 22699000. Mean Reward: 0.5292786885245901. Std of Reward: 0.5971261050702654.
Step: 22700000. Mean Reward: 0.5139537892791127. Std of Reward: 0.6054224199285857.
Saved Model
Step: 22701000. Mean Reward: 0.5395018796992481. Std of Reward: 0.5843086477719873.
Step: 22702000. Mean Reward: 0.4620325047801147. Std of Reward: 0.6143376984516337.
Step: 22703000. Mean Reward: 0.5529647495361781. Std of Reward: 0.5866474959221172.
Step: 22704000. Mean Reward: 0.56492700729927. Std of Reward: 0.5799869289369882.
Step: 22705000. Mean Reward: 0.522541015625. Std of Reward: 0.5852240942623761.
Step: 22706000. Mean Reward: 0.4749648798521257. Std of Reward: 0.610851062819507.
Step: 22707000. Mean Reward: 0.5347985074626866. Std of Reward: 0.5926081963035362.
Step: 22708000. Mean Reward: 0.5366791744840524. Std of Reward: 0.5816411924961334.
Step: 22709000. Mean Reward: 0.5355719626168225. Std of Reward: 0.58540

Step: 22796000. Mean Reward: 0.5204970760233918. Std of Reward: 0.60590361730331.
Step: 22797000. Mean Reward: 0.556056974459725. Std of Reward: 0.5889370287780309.
Step: 22798000. Mean Reward: 0.5249043977055449. Std of Reward: 0.5956445120553487.
Step: 22799000. Mean Reward: 0.543762962962963. Std of Reward: 0.598709602610387.
Step: 22800000. Mean Reward: 0.5010132575757577. Std of Reward: 0.6094722271421152.
Saved Model
Step: 22801000. Mean Reward: 0.4796905222437137. Std of Reward: 0.6055279431291112.
Step: 22802000. Mean Reward: 0.540998046875. Std of Reward: 0.5894150453621881.
Step: 22803000. Mean Reward: 0.5302533081285443. Std of Reward: 0.5913032385806584.
Step: 22804000. Mean Reward: 0.5151603773584905. Std of Reward: 0.6046945702879319.
Step: 22805000. Mean Reward: 0.5381203703703704. Std of Reward: 0.5961452302174144.
Step: 22806000. Mean Reward: 0.48884456928838954. Std of Reward: 0.6099461081665422.
Step: 22807000. Mean Reward: 0.522890359168242. Std of Reward: 0.6129537

Step: 22894000. Mean Reward: 0.5239507042253522. Std of Reward: 0.6013918314159831.
Step: 22895000. Mean Reward: 0.5073712255772647. Std of Reward: 0.5917078773370867.
Step: 22896000. Mean Reward: 0.5700974658869397. Std of Reward: 0.567829572362112.
Step: 22897000. Mean Reward: 0.5701461988304094. Std of Reward: 0.5739442244486381.
Step: 22898000. Mean Reward: 0.5839209039548022. Std of Reward: 0.5719020412444751.
Step: 22899000. Mean Reward: 0.5339724770642201. Std of Reward: 0.5899089380623003.
Step: 22900000. Mean Reward: 0.5353847656249999. Std of Reward: 0.5826381897529022.
Saved Model
Step: 22901000. Mean Reward: 0.5691461988304093. Std of Reward: 0.5722345959629976.
Step: 22902000. Mean Reward: 0.5495173674588666. Std of Reward: 0.5891975961298626.
Step: 22903000. Mean Reward: 0.5672346368715084. Std of Reward: 0.5765941232221894.
Step: 22904000. Mean Reward: 0.5827115749525618. Std of Reward: 0.5794278931967355.
Step: 22905000. Mean Reward: 0.5277351851851853. Std of Reward: 0

Step: 22992000. Mean Reward: 0.5266156862745097. Std of Reward: 0.5780680275363992.
Step: 22993000. Mean Reward: 0.5181328273244783. Std of Reward: 0.5999680647764419.
Step: 22994000. Mean Reward: 0.4969901380670611. Std of Reward: 0.5930718795733044.
Step: 22995000. Mean Reward: 0.5646960784313726. Std of Reward: 0.5643166644958826.
Step: 22996000. Mean Reward: 0.525468253968254. Std of Reward: 0.5676144271255606.
Step: 22997000. Mean Reward: 0.5630863039399625. Std of Reward: 0.5742174183909131.
Step: 22998000. Mean Reward: 0.5030776515151515. Std of Reward: 0.5911675623486733.
Step: 22999000. Mean Reward: 0.5317129455909944. Std of Reward: 0.5736575896561665.
Step: 23000000. Mean Reward: 0.5485665399239544. Std of Reward: 0.5808783161695852.
Saved Model
Step: 23001000. Mean Reward: 0.5304243542435425. Std of Reward: 0.5752065253982654.
Step: 23002000. Mean Reward: 0.49434865900383135. Std of Reward: 0.5981440429038979.
Step: 23003000. Mean Reward: 0.538639097744361. Std of Reward: 0

Step: 23090000. Mean Reward: 0.5453278084714549. Std of Reward: 0.5768599059608337.
Step: 23091000. Mean Reward: 0.5191915708812261. Std of Reward: 0.5827248301457675.
Step: 23092000. Mean Reward: 0.5601866913123844. Std of Reward: 0.5704363283511412.
Step: 23093000. Mean Reward: 0.5055653775322284. Std of Reward: 0.5899275502995175.
Step: 23094000. Mean Reward: 0.5324658040665434. Std of Reward: 0.5876210389609353.
Step: 23095000. Mean Reward: 0.5476610800744878. Std of Reward: 0.5774813810578482.
Step: 23096000. Mean Reward: 0.5251992551210428. Std of Reward: 0.5799420308790166.
Step: 23097000. Mean Reward: 0.5000300751879698. Std of Reward: 0.612954762155305.
Step: 23098000. Mean Reward: 0.522934664246824. Std of Reward: 0.5989389190577721.
Step: 23099000. Mean Reward: 0.5162404580152672. Std of Reward: 0.5863140811219936.
Step: 23100000. Mean Reward: 0.5386525096525098. Std of Reward: 0.5813071567808105.
Saved Model
Step: 23101000. Mean Reward: 0.5434927797833935. Std of Reward: 0.

Step: 23188000. Mean Reward: 0.500102119460501. Std of Reward: 0.6065361675280319.
Step: 23189000. Mean Reward: 0.5192099236641221. Std of Reward: 0.6058403426791182.
Step: 23190000. Mean Reward: 0.5253576779026217. Std of Reward: 0.588349473020582.
Step: 23191000. Mean Reward: 0.5184207436399217. Std of Reward: 0.5917462904741001.
Step: 23192000. Mean Reward: 0.506935871743487. Std of Reward: 0.5944309617039275.
Step: 23193000. Mean Reward: 0.5107543520309478. Std of Reward: 0.5935431130798684.
Step: 23194000. Mean Reward: 0.5224368029739778. Std of Reward: 0.593844832147124.
Step: 23195000. Mean Reward: 0.5268307984790874. Std of Reward: 0.5909304352930522.
Step: 23196000. Mean Reward: 0.5301729323308271. Std of Reward: 0.5988766992641112.
Step: 23197000. Mean Reward: 0.5491433756805807. Std of Reward: 0.5837706049377022.
Step: 23198000. Mean Reward: 0.5369705340699815. Std of Reward: 0.5975034548064203.
Step: 23199000. Mean Reward: 0.5523240223463688. Std of Reward: 0.58059181201510

Step: 23286000. Mean Reward: 0.5779518518518519. Std of Reward: 0.566197872669166.
Step: 23287000. Mean Reward: 0.5247486818980668. Std of Reward: 0.6012941214536757.
Step: 23288000. Mean Reward: 0.5400890538033395. Std of Reward: 0.5913310474293022.
Step: 23289000. Mean Reward: 0.5309850746268656. Std of Reward: 0.5919619256498041.
Step: 23290000. Mean Reward: 0.5650556586270872. Std of Reward: 0.5815021497371016.
Step: 23291000. Mean Reward: 0.5131452830188679. Std of Reward: 0.6007389628901674.
Step: 23292000. Mean Reward: 0.5251401151631477. Std of Reward: 0.5926911116932808.
Step: 23293000. Mean Reward: 0.5681231617647058. Std of Reward: 0.5719167250097567.
Step: 23294000. Mean Reward: 0.534866788321168. Std of Reward: 0.591751253588247.
Step: 23295000. Mean Reward: 0.5615192660550459. Std of Reward: 0.5801413052287252.
Step: 23296000. Mean Reward: 0.5492859744990892. Std of Reward: 0.5854959538933703.
Step: 23297000. Mean Reward: 0.5557739463601532. Std of Reward: 0.5685179316036

Step: 23384000. Mean Reward: 0.5451109022556391. Std of Reward: 0.5787975519313661.
Step: 23385000. Mean Reward: 0.5701609848484848. Std of Reward: 0.558749149745621.
Step: 23386000. Mean Reward: 0.5353254437869822. Std of Reward: 0.5720953895265241.
Step: 23387000. Mean Reward: 0.5535286259541984. Std of Reward: 0.5712117770308335.
Step: 23388000. Mean Reward: 0.5259731800766283. Std of Reward: 0.5830015864947576.
Step: 23389000. Mean Reward: 0.5172600382409177. Std of Reward: 0.5885728772263461.
Step: 23390000. Mean Reward: 0.5373661417322834. Std of Reward: 0.5782482504673735.
Step: 23391000. Mean Reward: 0.5567178502879078. Std of Reward: 0.5636000261616643.
Step: 23392000. Mean Reward: 0.505778640776699. Std of Reward: 0.5805087202627237.
Step: 23393000. Mean Reward: 0.5009229357798165. Std of Reward: 0.581821355292112.
Step: 23394000. Mean Reward: 0.5471230769230769. Std of Reward: 0.578947616407891.
Step: 23395000. Mean Reward: 0.5106391554702495. Std of Reward: 0.59209092093933

Step: 23482000. Mean Reward: 0.5373416506717851. Std of Reward: 0.5581572525655445.
Step: 23483000. Mean Reward: 0.5681436672967864. Std of Reward: 0.5596328310916902.
Step: 23484000. Mean Reward: 0.5160092764378479. Std of Reward: 0.577688270885259.
Step: 23485000. Mean Reward: 0.5129448529411765. Std of Reward: 0.56768118762412.
Step: 23486000. Mean Reward: 0.5549422718808193. Std of Reward: 0.5648258662962389.
Step: 23487000. Mean Reward: 0.5541356589147287. Std of Reward: 0.5492248563539062.
Step: 23488000. Mean Reward: 0.5258860294117647. Std of Reward: 0.5779687283766253.
Step: 23489000. Mean Reward: 0.5755588235294118. Std of Reward: 0.5562141133791423.
Step: 23490000. Mean Reward: 0.5075344827586207. Std of Reward: 0.5627405230837015.
Step: 23491000. Mean Reward: 0.5317182835820895. Std of Reward: 0.5645205112094319.
Step: 23492000. Mean Reward: 0.5364261682242991. Std of Reward: 0.55440940510971.
Step: 23493000. Mean Reward: 0.5434354243542435. Std of Reward: 0.566023556483467

Step: 23580000. Mean Reward: 0.547949119373777. Std of Reward: 0.5470799064356534.
Step: 23581000. Mean Reward: 0.5232022263450834. Std of Reward: 0.5747973216358901.
Step: 23582000. Mean Reward: 0.5623610586011343. Std of Reward: 0.5461176985908728.
Step: 23583000. Mean Reward: 0.5555859375. Std of Reward: 0.5427709580727824.
Step: 23584000. Mean Reward: 0.5067312859884837. Std of Reward: 0.5630771270592939.
Step: 23585000. Mean Reward: 0.565429906542056. Std of Reward: 0.556597512464168.
Step: 23586000. Mean Reward: 0.5412922794117646. Std of Reward: 0.5617367052007589.
Step: 23587000. Mean Reward: 0.531623326959847. Std of Reward: 0.5539410630470157.
Step: 23588000. Mean Reward: 0.5295357798165137. Std of Reward: 0.5599882265346472.
Step: 23589000. Mean Reward: 0.48948576850094877. Std of Reward: 0.5722390353292759.
Step: 23590000. Mean Reward: 0.5208225190839696. Std of Reward: 0.5571076041687655.
Step: 23591000. Mean Reward: 0.5567443181818182. Std of Reward: 0.5495317333099943.
S

Step: 23678000. Mean Reward: 0.5457290076335878. Std of Reward: 0.5593630093074246.
Step: 23679000. Mean Reward: 0.5113977055449331. Std of Reward: 0.568720820321369.
Step: 23680000. Mean Reward: 0.5460999999999999. Std of Reward: 0.5551737193234664.
Step: 23681000. Mean Reward: 0.5378942486085343. Std of Reward: 0.5685081426832537.
Step: 23682000. Mean Reward: 0.4917444029850746. Std of Reward: 0.5752362979473185.
Step: 23683000. Mean Reward: 0.5132761904761906. Std of Reward: 0.565543974042659.
Step: 23684000. Mean Reward: 0.5771009174311926. Std of Reward: 0.552159313284549.
Step: 23685000. Mean Reward: 0.5311371115173675. Std of Reward: 0.5646185243972879.
Step: 23686000. Mean Reward: 0.5560836501901141. Std of Reward: 0.5571581072736397.
Step: 23687000. Mean Reward: 0.5154538461538462. Std of Reward: 0.5773906737881335.
Step: 23688000. Mean Reward: 0.568672932330827. Std of Reward: 0.5569442458870904.
Step: 23689000. Mean Reward: 0.5491792452830189. Std of Reward: 0.56831880166325

Step: 23776000. Mean Reward: 0.5506158088235293. Std of Reward: 0.5607537284238577.
Step: 23777000. Mean Reward: 0.5496858237547892. Std of Reward: 0.5637558105571642.
Step: 23778000. Mean Reward: 0.5916635859519408. Std of Reward: 0.5502070737267996.
Step: 23779000. Mean Reward: 0.5096711281070746. Std of Reward: 0.5738966426722055.
Step: 23780000. Mean Reward: 0.529187617260788. Std of Reward: 0.5744862608119338.
Step: 23781000. Mean Reward: 0.5198416833667335. Std of Reward: 0.5740820196771583.
Step: 23782000. Mean Reward: 0.5579532710280374. Std of Reward: 0.5652595662663795.
Step: 23783000. Mean Reward: 0.5295049115913556. Std of Reward: 0.5723165026879452.
Step: 23784000. Mean Reward: 0.5624152046783626. Std of Reward: 0.5551273927189915.
Step: 23785000. Mean Reward: 0.5180919540229885. Std of Reward: 0.5657535469050508.
Step: 23786000. Mean Reward: 0.5291031894934334. Std of Reward: 0.5731712730949379.
Step: 23787000. Mean Reward: 0.5283372549019608. Std of Reward: 0.55710048023

Step: 23874000. Mean Reward: 0.5345503731343284. Std of Reward: 0.5675017467260998.
Step: 23875000. Mean Reward: 0.5260847784200385. Std of Reward: 0.564657616637279.
Step: 23876000. Mean Reward: 0.5342. Std of Reward: 0.5745820377415554.
Step: 23877000. Mean Reward: 0.5720429906542056. Std of Reward: 0.5551055099021863.
Step: 23878000. Mean Reward: 0.5412975047984644. Std of Reward: 0.5678193038095858.
Step: 23879000. Mean Reward: 0.5215703564727955. Std of Reward: 0.555491109406536.
Step: 23880000. Mean Reward: 0.5562136279926335. Std of Reward: 0.5540135214842596.
Step: 23881000. Mean Reward: 0.5259826923076922. Std of Reward: 0.5715719431872993.
Step: 23882000. Mean Reward: 0.5494011406844107. Std of Reward: 0.5589430859279714.
Step: 23883000. Mean Reward: 0.5179719101123597. Std of Reward: 0.5834814706156333.
Step: 23884000. Mean Reward: 0.5587981308411215. Std of Reward: 0.5544451806213696.
Step: 23885000. Mean Reward: 0.5020524271844661. Std of Reward: 0.5880719782648399.
Step: 

Step: 23972000. Mean Reward: 0.5360907407407408. Std of Reward: 0.5680367107230667.
Step: 23973000. Mean Reward: 0.5189677419354838. Std of Reward: 0.56236283581974.
Step: 23974000. Mean Reward: 0.4888417132216016. Std of Reward: 0.5751739297231677.
Step: 23975000. Mean Reward: 0.5123460076045628. Std of Reward: 0.5743641963846837.
Step: 23976000. Mean Reward: 0.5653937621832359. Std of Reward: 0.5509617047645282.
Step: 23977000. Mean Reward: 0.5048897637795277. Std of Reward: 0.5818102144542078.
Step: 23978000. Mean Reward: 0.5344040968342644. Std of Reward: 0.573963399372593.
Step: 23979000. Mean Reward: 0.5393220973782772. Std of Reward: 0.559323639502625.
Step: 23980000. Mean Reward: 0.5350321361058602. Std of Reward: 0.5728161164938508.
Step: 23981000. Mean Reward: 0.5446596958174905. Std of Reward: 0.5587650561688201.
Step: 23982000. Mean Reward: 0.5504392523364486. Std of Reward: 0.5547615599208332.
Step: 23983000. Mean Reward: 0.580078125. Std of Reward: 0.553837427626767.
Step

Step: 24070000. Mean Reward: 0.5501311475409836. Std of Reward: 0.5580576165443856.
Step: 24071000. Mean Reward: 0.5797509157509158. Std of Reward: 0.5465393492708088.
Step: 24072000. Mean Reward: 0.5123454894433781. Std of Reward: 0.5582167415449623.
Step: 24073000. Mean Reward: 0.564875968992248. Std of Reward: 0.5490167357095568.
Step: 24074000. Mean Reward: 0.5518928571428572. Std of Reward: 0.5643033113868929.
Step: 24075000. Mean Reward: 0.5336254681647941. Std of Reward: 0.5748019771272755.
Step: 24076000. Mean Reward: 0.4945687732342008. Std of Reward: 0.5851899573321774.
Step: 24077000. Mean Reward: 0.5027005347593583. Std of Reward: 0.5776972414976019.
Step: 24078000. Mean Reward: 0.5259903846153846. Std of Reward: 0.5710762258024538.
Step: 24079000. Mean Reward: 0.5456767857142856. Std of Reward: 0.5686889001689611.
Step: 24080000. Mean Reward: 0.5366704119850187. Std of Reward: 0.555888956376342.
Step: 24081000. Mean Reward: 0.4989273422562142. Std of Reward: 0.573540977478

Step: 24168000. Mean Reward: 0.5617053231939163. Std of Reward: 0.5589449820988545.
Step: 24169000. Mean Reward: 0.509552380952381. Std of Reward: 0.5665588219106709.
Step: 24170000. Mean Reward: 0.5678699029126213. Std of Reward: 0.5619970325794843.
Step: 24171000. Mean Reward: 0.565326605504587. Std of Reward: 0.5476709483264298.
Step: 24172000. Mean Reward: 0.4948127490039841. Std of Reward: 0.5612822407503074.
Step: 24173000. Mean Reward: 0.5347848837209302. Std of Reward: 0.5617495426666824.
Step: 24174000. Mean Reward: 0.5518947368421052. Std of Reward: 0.5562509779264828.
Step: 24175000. Mean Reward: 0.5233072625698323. Std of Reward: 0.5699584327711275.
Step: 24176000. Mean Reward: 0.5601125703564729. Std of Reward: 0.555830067782404.
Step: 24177000. Mean Reward: 0.5336487523992323. Std of Reward: 0.5580182110307992.
Step: 24178000. Mean Reward: 0.5665956284153005. Std of Reward: 0.5658851766965918.
Step: 24179000. Mean Reward: 0.4999097888675623. Std of Reward: 0.5647356876029

Step: 24266000. Mean Reward: 0.5227221172022684. Std of Reward: 0.5550125531222746.
Step: 24267000. Mean Reward: 0.5930714285714286. Std of Reward: 0.5325889160924923.
Step: 24268000. Mean Reward: 0.5077941176470587. Std of Reward: 0.5643691869521973.
Step: 24269000. Mean Reward: 0.5274509803921569. Std of Reward: 0.5551725226900366.
Step: 24270000. Mean Reward: 0.5980247148288974. Std of Reward: 0.5269714833424076.
Step: 24271000. Mean Reward: 0.5394934086629002. Std of Reward: 0.5475766622057004.
Step: 24272000. Mean Reward: 0.5390038095238094. Std of Reward: 0.568548336919614.
Step: 24273000. Mean Reward: 0.5036691729323308. Std of Reward: 0.5625065676716602.
Step: 24274000. Mean Reward: 0.5548519230769231. Std of Reward: 0.5415345105809489.
Step: 24275000. Mean Reward: 0.5557686274509804. Std of Reward: 0.5585800391996064.
Step: 24276000. Mean Reward: 0.5389203036053131. Std of Reward: 0.5494763708054534.
Step: 24277000. Mean Reward: 0.56646. Std of Reward: 0.5670592411251708.
Step

Step: 24365000. Mean Reward: 0.5422564102564102. Std of Reward: 0.5604551592552536.
Step: 24366000. Mean Reward: 0.5417992424242424. Std of Reward: 0.5653755434672968.
Step: 24367000. Mean Reward: 0.5045923913043477. Std of Reward: 0.5865248139728412.
Step: 24368000. Mean Reward: 0.5312670565302143. Std of Reward: 0.5640344376094998.
Step: 24369000. Mean Reward: 0.5153473282442748. Std of Reward: 0.5678169269836869.
Step: 24370000. Mean Reward: 0.5230172413793103. Std of Reward: 0.5699976387072614.
Step: 24371000. Mean Reward: 0.5373432835820896. Std of Reward: 0.5716991707689235.
Step: 24372000. Mean Reward: 0.5374442413162706. Std of Reward: 0.5653287940910549.
Step: 24373000. Mean Reward: 0.5452242647058824. Std of Reward: 0.5664659869852947.
Step: 24374000. Mean Reward: 0.5277595419847328. Std of Reward: 0.5697372062316763.
Step: 24375000. Mean Reward: 0.46877840909090907. Std of Reward: 0.5881024716708372.
Step: 24376000. Mean Reward: 0.5412578740157481. Std of Reward: 0.559730751

Step: 24463000. Mean Reward: 0.5086216730038023. Std of Reward: 0.5809326540173697.
Step: 24464000. Mean Reward: 0.5310056390977443. Std of Reward: 0.5687047726098782.
Step: 24465000. Mean Reward: 0.5731423357664234. Std of Reward: 0.561038516473519.
Step: 24466000. Mean Reward: 0.5562854330708661. Std of Reward: 0.5585812660058396.
Step: 24467000. Mean Reward: 0.5277239382239381. Std of Reward: 0.5688623706687486.
Step: 24468000. Mean Reward: 0.5495532710280374. Std of Reward: 0.562650709582751.
Step: 24469000. Mean Reward: 0.5354836363636363. Std of Reward: 0.5711542401977489.
Step: 24470000. Mean Reward: 0.5687176015473887. Std of Reward: 0.5575957937316997.
Step: 24471000. Mean Reward: 0.585493358633776. Std of Reward: 0.5498607374609079.
Step: 24472000. Mean Reward: 0.5190170454545454. Std of Reward: 0.5739038115718742.
Step: 24473000. Mean Reward: 0.4885086372360844. Std of Reward: 0.5870164783842852.
Step: 24474000. Mean Reward: 0.5380855513307985. Std of Reward: 0.5627236642782

Step: 24561000. Mean Reward: 0.5412555147058823. Std of Reward: 0.5660704615441644.
Step: 24562000. Mean Reward: 0.5690204841713221. Std of Reward: 0.5565412855843801.
Step: 24563000. Mean Reward: 0.5448339694656489. Std of Reward: 0.5671022306349301.
Step: 24564000. Mean Reward: 0.5223132295719843. Std of Reward: 0.5751308843932441.
Step: 24565000. Mean Reward: 0.5367408123791103. Std of Reward: 0.5687654370449235.
Step: 24566000. Mean Reward: 0.48524629629629634. Std of Reward: 0.5897142343771459.
Step: 24567000. Mean Reward: 0.5465168316831683. Std of Reward: 0.5522288127197661.
Step: 24568000. Mean Reward: 0.5406030245746691. Std of Reward: 0.5619205667333098.
Step: 24569000. Mean Reward: 0.5547036328871893. Std of Reward: 0.5616294360586928.
Step: 24570000. Mean Reward: 0.5432153846153845. Std of Reward: 0.5598313720647755.
Step: 24571000. Mean Reward: 0.5454079696394686. Std of Reward: 0.5682669069931344.
Step: 24572000. Mean Reward: 0.5432734225621414. Std of Reward: 0.570452813

Step: 24659000. Mean Reward: 0.5665959409594096. Std of Reward: 0.569690257835627.
Step: 24660000. Mean Reward: 0.5829701492537314. Std of Reward: 0.5476950199972869.
Step: 24661000. Mean Reward: 0.5593996175908221. Std of Reward: 0.5645123574222748.
Step: 24662000. Mean Reward: 0.5698144876325089. Std of Reward: 0.5696725975611049.
Step: 24663000. Mean Reward: 0.5497587548638132. Std of Reward: 0.5748822347438913.
Step: 24664000. Mean Reward: 0.5526660412757974. Std of Reward: 0.5694991719555543.
Step: 24665000. Mean Reward: 0.5878376865671642. Std of Reward: 0.5566774248797659.
Step: 24666000. Mean Reward: 0.560601134215501. Std of Reward: 0.5735063691554363.
Step: 24667000. Mean Reward: 0.5530157790927022. Std of Reward: 0.5600860913567631.
Step: 24668000. Mean Reward: 0.6159961013645223. Std of Reward: 0.5379365093521375.
Step: 24669000. Mean Reward: 0.5743646616541354. Std of Reward: 0.5571710554820583.
Step: 24670000. Mean Reward: 0.5553071161048689. Std of Reward: 0.566800296481

Step: 24757000. Mean Reward: 0.5045275310834814. Std of Reward: 0.5836480123759419.
Step: 24758000. Mean Reward: 0.5459961685823755. Std of Reward: 0.577584479884064.
Step: 24759000. Mean Reward: 0.5536420664206643. Std of Reward: 0.5714782452540796.
Step: 24760000. Mean Reward: 0.5218971428571428. Std of Reward: 0.579130774680739.
Step: 24761000. Mean Reward: 0.5314812164579606. Std of Reward: 0.5828452934111006.
Step: 24762000. Mean Reward: 0.5571685393258428. Std of Reward: 0.5730835426666818.
Step: 24763000. Mean Reward: 0.5572150735294118. Std of Reward: 0.5826428785834126.
Step: 24764000. Mean Reward: 0.5137362637362637. Std of Reward: 0.591830824911121.
Step: 24765000. Mean Reward: 0.5678181818181819. Std of Reward: 0.5622831822135318.
Step: 24766000. Mean Reward: 0.538206106870229. Std of Reward: 0.5798908289932617.
Step: 24767000. Mean Reward: 0.5654307400379506. Std of Reward: 0.5596791626701466.
Step: 24768000. Mean Reward: 0.5655381818181819. Std of Reward: 0.57845588611277

Step: 24855000. Mean Reward: 0.5197442273534636. Std of Reward: 0.6020686588145177.
Step: 24856000. Mean Reward: 0.5521795841209829. Std of Reward: 0.5812065768311745.
Step: 24857000. Mean Reward: 0.5503061224489796. Std of Reward: 0.5854412104791267.
Step: 24858000. Mean Reward: 0.559636528028933. Std of Reward: 0.5731821992439048.
Step: 24859000. Mean Reward: 0.507717850287908. Std of Reward: 0.6036214978891961.
Step: 24860000. Mean Reward: 0.5880635514018692. Std of Reward: 0.5778236995286021.
Step: 24861000. Mean Reward: 0.5192728937728938. Std of Reward: 0.6052190478319887.
Step: 24862000. Mean Reward: 0.5340523255813954. Std of Reward: 0.5906071629310335.
Step: 24863000. Mean Reward: 0.530186440677966. Std of Reward: 0.5839121592194481.
Step: 24864000. Mean Reward: 0.5645156250000001. Std of Reward: 0.5774921635612118.
Step: 24865000. Mean Reward: 0.5655196261682243. Std of Reward: 0.5780486535547814.
Step: 24866000. Mean Reward: 0.550916820702403. Std of Reward: 0.57726465266633

Step: 24953000. Mean Reward: 0.5659888682745825. Std of Reward: 0.5797755946362566.
Step: 24954000. Mean Reward: 0.5594990583804142. Std of Reward: 0.586217031645451.
Step: 24955000. Mean Reward: 0.5411457943925233. Std of Reward: 0.5860491233725014.
Step: 24956000. Mean Reward: 0.5908922495274102. Std of Reward: 0.566510238560272.
Step: 24957000. Mean Reward: 0.5055223880597015. Std of Reward: 0.5905090988015609.
Step: 24958000. Mean Reward: 0.5902159709618874. Std of Reward: 0.5617975190841471.
Step: 24959000. Mean Reward: 0.5905895953757224. Std of Reward: 0.5706345715315573.
Step: 24960000. Mean Reward: 0.5233029739776951. Std of Reward: 0.5928713094925476.
Step: 24961000. Mean Reward: 0.5452067669172933. Std of Reward: 0.586492481564472.
Step: 24962000. Mean Reward: 0.5823962962962963. Std of Reward: 0.560039210255246.
Step: 24963000. Mean Reward: 0.538905482041588. Std of Reward: 0.5848101549582975.
Step: 24964000. Mean Reward: 0.5684364326375713. Std of Reward: 0.583166794078937

Saved Model
Step: 25051000. Mean Reward: 0.5376841121495326. Std of Reward: 0.5786220848164161.
Step: 25052000. Mean Reward: 0.5957695035460993. Std of Reward: 0.5643166080361514.
Step: 25053000. Mean Reward: 0.5669349112426035. Std of Reward: 0.5637659033597784.
Step: 25054000. Mean Reward: 0.5822518656716418. Std of Reward: 0.5559446025602931.
Step: 25055000. Mean Reward: 0.5691470588235293. Std of Reward: 0.5729464997789904.
Step: 25056000. Mean Reward: 0.5403522935779816. Std of Reward: 0.5832960385171371.
Step: 25057000. Mean Reward: 0.5545026737967915. Std of Reward: 0.5714562319525565.
Step: 25058000. Mean Reward: 0.6138054054054055. Std of Reward: 0.5554410778487422.
Step: 25059000. Mean Reward: 0.544937728937729. Std of Reward: 0.5761198108166001.
Step: 25060000. Mean Reward: 0.5727633027522936. Std of Reward: 0.5730938672431076.
Step: 25061000. Mean Reward: 0.5336545454545455. Std of Reward: 0.5883151634712876.
Step: 25062000. Mean Reward: 0.5391847619047619. Std of Reward: 0

Step: 25149000. Mean Reward: 0.5890531914893617. Std of Reward: 0.5728230520358928.
Step: 25150000. Mean Reward: 0.5276959847036329. Std of Reward: 0.5762146911436785.
Saved Model
Step: 25151000. Mean Reward: 0.5415089928057553. Std of Reward: 0.5849859663738791.
Step: 25152000. Mean Reward: 0.5983763636363637. Std of Reward: 0.5466587885305475.
Step: 25153000. Mean Reward: 0.5450625. Std of Reward: 0.5715862118388951.
Step: 25154000. Mean Reward: 0.5774358490566038. Std of Reward: 0.5727156137094075.
Step: 25155000. Mean Reward: 0.5738414179104476. Std of Reward: 0.570312207506402.
Step: 25156000. Mean Reward: 0.5251750000000001. Std of Reward: 0.583102069063752.
Step: 25157000. Mean Reward: 0.5587045454545455. Std of Reward: 0.56808704770239.
Step: 25158000. Mean Reward: 0.4992568306010929. Std of Reward: 0.5887672756651344.
Step: 25159000. Mean Reward: 0.5373091247672253. Std of Reward: 0.5827664210147686.
Step: 25160000. Mean Reward: 0.523778409090909. Std of Reward: 0.594023184480

Step: 25247000. Mean Reward: 0.5223407407407407. Std of Reward: 0.5932378064544258.
Step: 25248000. Mean Reward: 0.543896174863388. Std of Reward: 0.5706966807546267.
Step: 25249000. Mean Reward: 0.568438202247191. Std of Reward: 0.5791622993837563.
Step: 25250000. Mean Reward: 0.5755691202872532. Std of Reward: 0.5652343258047828.
Saved Model
Step: 25251000. Mean Reward: 0.5479616858237548. Std of Reward: 0.5828356649392068.
Step: 25252000. Mean Reward: 0.49534018691588777. Std of Reward: 0.5981076327448335.
Step: 25253000. Mean Reward: 0.565500945179584. Std of Reward: 0.5834844136420354.
Step: 25254000. Mean Reward: 0.5073123877917415. Std of Reward: 0.605543774863244.
Step: 25255000. Mean Reward: 0.5188649155722327. Std of Reward: 0.5909524894537606.
Step: 25256000. Mean Reward: 0.5693600746268656. Std of Reward: 0.5841663685051818.
Step: 25257000. Mean Reward: 0.5564014336917562. Std of Reward: 0.5703808921663507.
Step: 25258000. Mean Reward: 0.5077233644859812. Std of Reward: 0.5

Step: 25345000. Mean Reward: 0.5647186311787072. Std of Reward: 0.5781372150819951.
Step: 25346000. Mean Reward: 0.5728712686567164. Std of Reward: 0.5644489290474987.
Step: 25347000. Mean Reward: 0.5785457746478873. Std of Reward: 0.5704531414911097.
Step: 25348000. Mean Reward: 0.5515279279279278. Std of Reward: 0.580766037024008.
Step: 25349000. Mean Reward: 0.5426647727272726. Std of Reward: 0.5758628073160728.
Step: 25350000. Mean Reward: 0.5541946902654867. Std of Reward: 0.5769989380276891.
Saved Model
Step: 25351000. Mean Reward: 0.5511967213114753. Std of Reward: 0.5789826464017926.
Step: 25352000. Mean Reward: 0.5466795774647887. Std of Reward: 0.5793237249718972.
Step: 25353000. Mean Reward: 0.5232687140115163. Std of Reward: 0.5925623696775278.
Step: 25354000. Mean Reward: 0.48847999999999997. Std of Reward: 0.5962041980651228.
Step: 25355000. Mean Reward: 0.5437741347905283. Std of Reward: 0.5805988523146881.
Step: 25356000. Mean Reward: 0.6117586837294332. Std of Reward: 

Step: 25443000. Mean Reward: 0.5351916817359855. Std of Reward: 0.5950038455993404.
Step: 25444000. Mean Reward: 0.5220036363636363. Std of Reward: 0.5849761486866207.
Step: 25445000. Mean Reward: 0.5628208955223881. Std of Reward: 0.5745257661596106.
Step: 25446000. Mean Reward: 0.5571312607944733. Std of Reward: 0.5873974801583148.
Step: 25447000. Mean Reward: 0.5191870503597122. Std of Reward: 0.6036862401305452.
Step: 25448000. Mean Reward: 0.49719553072625705. Std of Reward: 0.602284147676501.
Step: 25449000. Mean Reward: 0.5522011173184358. Std of Reward: 0.5855702059271075.
Step: 25450000. Mean Reward: 0.5763071428571428. Std of Reward: 0.5857865761573258.
Saved Model
Step: 25451000. Mean Reward: 0.5813736462093864. Std of Reward: 0.5730797124121582.
Step: 25452000. Mean Reward: 0.5383978685612788. Std of Reward: 0.5883727399635578.
Step: 25453000. Mean Reward: 0.5533595113438046. Std of Reward: 0.5878222385973834.
Step: 25454000. Mean Reward: 0.5309787234042553. Std of Reward: 

Step: 25541000. Mean Reward: 0.5512140151515151. Std of Reward: 0.5860770078426191.
Step: 25542000. Mean Reward: 0.6013401869158879. Std of Reward: 0.5541170059071271.
Step: 25543000. Mean Reward: 0.5604014466546113. Std of Reward: 0.5717584545430768.
Step: 25544000. Mean Reward: 0.580787610619469. Std of Reward: 0.5763397266842429.
Step: 25545000. Mean Reward: 0.5272659176029961. Std of Reward: 0.5813038686563119.
Step: 25546000. Mean Reward: 0.49821699819168175. Std of Reward: 0.5918001348144456.
Step: 25547000. Mean Reward: 0.5712182163187856. Std of Reward: 0.5664387667526914.
Step: 25548000. Mean Reward: 0.5279392857142857. Std of Reward: 0.6022866959936259.
Step: 25549000. Mean Reward: 0.5048856589147287. Std of Reward: 0.5929743812097429.
Step: 25550000. Mean Reward: 0.5095842911877394. Std of Reward: 0.5856283797911733.
Saved Model
Step: 25551000. Mean Reward: 0.5758072727272726. Std of Reward: 0.5825587751399527.
Step: 25552000. Mean Reward: 0.5363483535528596. Std of Reward: 

Step: 25639000. Mean Reward: 0.5362563176895307. Std of Reward: 0.5771255605503817.
Step: 25640000. Mean Reward: 0.5597533718689788. Std of Reward: 0.5834752050405471.
Step: 25641000. Mean Reward: 0.5344702602230482. Std of Reward: 0.5921269236618203.
Step: 25642000. Mean Reward: 0.523074074074074. Std of Reward: 0.5902498333462163.
Step: 25643000. Mean Reward: 0.5582152380952381. Std of Reward: 0.574516380765423.
Step: 25644000. Mean Reward: 0.5400834845735026. Std of Reward: 0.5825219750958134.
Step: 25645000. Mean Reward: 0.5248875739644971. Std of Reward: 0.5870379505685503.
Step: 25646000. Mean Reward: 0.5333438661710036. Std of Reward: 0.5866860008048965.
Step: 25647000. Mean Reward: 0.567285447761194. Std of Reward: 0.5780142882318504.
Step: 25648000. Mean Reward: 0.5191374321880651. Std of Reward: 0.595620811502008.
Step: 25649000. Mean Reward: 0.5884934823091247. Std of Reward: 0.5596612540021533.
Step: 25650000. Mean Reward: 0.5438774319066148. Std of Reward: 0.58592003756872

Step: 25738000. Mean Reward: 0.5695913370998116. Std of Reward: 0.5603741928460095.
Step: 25739000. Mean Reward: 0.5536386233269598. Std of Reward: 0.5706151702033286.
Step: 25740000. Mean Reward: 0.5814789762340037. Std of Reward: 0.5666541244219115.
Step: 25741000. Mean Reward: 0.5218607594936708. Std of Reward: 0.5788237843928049.
Step: 25742000. Mean Reward: 0.5209224952741021. Std of Reward: 0.5776240491852604.
Step: 25743000. Mean Reward: 0.559407063197026. Std of Reward: 0.5668689158498662.
Step: 25744000. Mean Reward: 0.5503438661710038. Std of Reward: 0.5677368647140266.
Step: 25745000. Mean Reward: 0.6010564971751411. Std of Reward: 0.5579573695064887.
Step: 25746000. Mean Reward: 0.5507554347826087. Std of Reward: 0.5643632425447999.
Step: 25747000. Mean Reward: 0.539977229601518. Std of Reward: 0.5736222562280362.
Step: 25748000. Mean Reward: 0.5902919708029197. Std of Reward: 0.5565763096608388.
Step: 25749000. Mean Reward: 0.5382562141491396. Std of Reward: 0.575324996968

Step: 25836000. Mean Reward: 0.5708348623853211. Std of Reward: 0.5768208745818943.
Step: 25837000. Mean Reward: 0.5244333958724203. Std of Reward: 0.590668922934615.
Step: 25838000. Mean Reward: 0.5056723163841808. Std of Reward: 0.6017117500098428.
Step: 25839000. Mean Reward: 0.5422486865148862. Std of Reward: 0.5815394881980511.
Step: 25840000. Mean Reward: 0.5245131086142323. Std of Reward: 0.598619087254923.
Step: 25841000. Mean Reward: 0.5444990328820115. Std of Reward: 0.574292721594514.
Step: 25842000. Mean Reward: 0.5225567567567567. Std of Reward: 0.5867907049905914.
Step: 25843000. Mean Reward: 0.5176224677716391. Std of Reward: 0.5894741571629505.
Step: 25844000. Mean Reward: 0.5383675373134329. Std of Reward: 0.594394851858866.
Step: 25845000. Mean Reward: 0.5417670454545455. Std of Reward: 0.5768993241475299.
Step: 25846000. Mean Reward: 0.6047319778188539. Std of Reward: 0.5557930407395684.
Step: 25847000. Mean Reward: 0.5298399246704332. Std of Reward: 0.58654842701895

Step: 25934000. Mean Reward: 0.5663451492537314. Std of Reward: 0.5564990196371125.
Step: 25935000. Mean Reward: 0.5334092592592593. Std of Reward: 0.5815759441407072.
Step: 25936000. Mean Reward: 0.5548462962962963. Std of Reward: 0.5747986575711027.
Step: 25937000. Mean Reward: 0.5445559502664298. Std of Reward: 0.5776018144931698.
Step: 25938000. Mean Reward: 0.5483484848484849. Std of Reward: 0.568785681329605.
Step: 25939000. Mean Reward: 0.5197071428571428. Std of Reward: 0.5788545492908335.
Step: 25940000. Mean Reward: 0.579012987012987. Std of Reward: 0.566189028972591.
Step: 25941000. Mean Reward: 0.5544290976058932. Std of Reward: 0.5714732777826341.
Step: 25942000. Mean Reward: 0.527048417132216. Std of Reward: 0.5818640897020813.
Step: 25943000. Mean Reward: 0.5189584905660377. Std of Reward: 0.5908907619997029.
Step: 25944000. Mean Reward: 0.5346163636363637. Std of Reward: 0.5766469631376444.
Step: 25945000. Mean Reward: 0.5238852772466539. Std of Reward: 0.57001294996633

Step: 26032000. Mean Reward: 0.5189981981981981. Std of Reward: 0.5824323445970266.
Step: 26033000. Mean Reward: 0.5206962264150944. Std of Reward: 0.5692834590765947.
Step: 26034000. Mean Reward: 0.5294782608695652. Std of Reward: 0.5873916390434473.
Step: 26035000. Mean Reward: 0.5799474671669793. Std of Reward: 0.5512913860765943.
Step: 26036000. Mean Reward: 0.5403345864661654. Std of Reward: 0.570720261928035.
Step: 26037000. Mean Reward: 0.5838018348623853. Std of Reward: 0.5665219569469971.
Step: 26038000. Mean Reward: 0.573669741697417. Std of Reward: 0.5594392881526248.
Step: 26039000. Mean Reward: 0.6062303473491772. Std of Reward: 0.5508103403488016.
Step: 26040000. Mean Reward: 0.5276109090909091. Std of Reward: 0.5909647778068203.
Step: 26041000. Mean Reward: 0.5392602996254682. Std of Reward: 0.5742929129347681.
Step: 26042000. Mean Reward: 0.5717762237762237. Std of Reward: 0.5603030139190036.
Step: 26043000. Mean Reward: 0.5476884328358209. Std of Reward: 0.566887829740

Step: 26130000. Mean Reward: 0.5247845303867402. Std of Reward: 0.5738863822429128.
Step: 26131000. Mean Reward: 0.5187602996254682. Std of Reward: 0.5898509322862949.
Step: 26132000. Mean Reward: 0.5048066914498142. Std of Reward: 0.5695845611471856.
Step: 26133000. Mean Reward: 0.5574744525547445. Std of Reward: 0.5644203580561247.
Step: 26134000. Mean Reward: 0.529790990990991. Std of Reward: 0.5796571054673942.
Step: 26135000. Mean Reward: 0.5267863720073664. Std of Reward: 0.5741321246335143.
Step: 26136000. Mean Reward: 0.5176678966789668. Std of Reward: 0.585671523338654.
Step: 26137000. Mean Reward: 0.5853258426966292. Std of Reward: 0.5589787861266702.
Step: 26138000. Mean Reward: 0.540627416520211. Std of Reward: 0.5751805325398571.
Step: 26139000. Mean Reward: 0.5109618874773139. Std of Reward: 0.5847644230708289.
Step: 26140000. Mean Reward: 0.5245297297297297. Std of Reward: 0.5862580163449843.
Step: 26141000. Mean Reward: 0.5523788150807899. Std of Reward: 0.5829418844963

Step: 26228000. Mean Reward: 0.4983811320754717. Std of Reward: 0.5980387742388098.
Step: 26229000. Mean Reward: 0.5622184115523466. Std of Reward: 0.5681294765420013.
Step: 26230000. Mean Reward: 0.5539584905660376. Std of Reward: 0.5720820252026677.
Step: 26231000. Mean Reward: 0.4724283088235294. Std of Reward: 0.5945826440417424.
Step: 26232000. Mean Reward: 0.5198787878787878. Std of Reward: 0.580610112495351.
Step: 26233000. Mean Reward: 0.5295357798165137. Std of Reward: 0.5846682044290279.
Step: 26234000. Mean Reward: 0.5283068783068783. Std of Reward: 0.5831758919856455.
Step: 26235000. Mean Reward: 0.5229597806215721. Std of Reward: 0.5742396055001758.
Step: 26236000. Mean Reward: 0.5065909943714821. Std of Reward: 0.5891905167130278.
Step: 26237000. Mean Reward: 0.5393937728937729. Std of Reward: 0.5733765972136335.
Step: 26238000. Mean Reward: 0.5383915547024951. Std of Reward: 0.5725619297364883.
Step: 26239000. Mean Reward: 0.5257948717948717. Std of Reward: 0.59011781437

Step: 26326000. Mean Reward: 0.5488375451263537. Std of Reward: 0.574848615104111.
Step: 26327000. Mean Reward: 0.5882789373814042. Std of Reward: 0.5516622182358077.
Step: 26328000. Mean Reward: 0.5338105065666041. Std of Reward: 0.5858541844547012.
Step: 26329000. Mean Reward: 0.5410483271375465. Std of Reward: 0.5802836967106738.
Step: 26330000. Mean Reward: 0.5405706422018348. Std of Reward: 0.5857094459725082.
Step: 26331000. Mean Reward: 0.48910256410256403. Std of Reward: 0.5953335941988355.
Step: 26332000. Mean Reward: 0.5546762075134167. Std of Reward: 0.5737136040323663.
Step: 26333000. Mean Reward: 0.5462452830188679. Std of Reward: 0.5702007228726939.
Step: 26334000. Mean Reward: 0.5586575591985428. Std of Reward: 0.5846955112192534.
Step: 26335000. Mean Reward: 0.5632875457875458. Std of Reward: 0.5735759014753997.
Step: 26336000. Mean Reward: 0.5269092592592592. Std of Reward: 0.57828149071785.
Step: 26337000. Mean Reward: 0.5452416974169741. Std of Reward: 0.573663277518

Step: 26424000. Mean Reward: 0.539703237410072. Std of Reward: 0.5894762094438677.
Step: 26425000. Mean Reward: 0.5748014059753954. Std of Reward: 0.5740490762393398.
Step: 26426000. Mean Reward: 0.5609681647940076. Std of Reward: 0.5748361604371025.
Step: 26427000. Mean Reward: 0.533984934086629. Std of Reward: 0.581141337378702.
Step: 26428000. Mean Reward: 0.5534325842696629. Std of Reward: 0.5808606959304805.
Step: 26429000. Mean Reward: 0.5403919413919414. Std of Reward: 0.5864343989376845.
Step: 26430000. Mean Reward: 0.5630700179533213. Std of Reward: 0.5787448969794139.
Step: 26431000. Mean Reward: 0.5291936090225563. Std of Reward: 0.5841820144863936.
Step: 26432000. Mean Reward: 0.5668409506398537. Std of Reward: 0.5765357286126513.
Step: 26433000. Mean Reward: 0.5104018691588785. Std of Reward: 0.5978380154412954.
Step: 26434000. Mean Reward: 0.6193054003724395. Std of Reward: 0.5577561088284504.
Step: 26435000. Mean Reward: 0.5557971274685817. Std of Reward: 0.5754167767120

Step: 26522000. Mean Reward: 0.5493257575757575. Std of Reward: 0.5690256956999835.
Step: 26523000. Mean Reward: 0.527294921875. Std of Reward: 0.588661478992139.
Step: 26524000. Mean Reward: 0.5566855123674911. Std of Reward: 0.5772500331261766.
Step: 26525000. Mean Reward: 0.5574490500863557. Std of Reward: 0.5900514325039481.
Step: 26526000. Mean Reward: 0.5418780952380953. Std of Reward: 0.5867675912076348.
Step: 26527000. Mean Reward: 0.5601789667896678. Std of Reward: 0.5762524073481847.
Step: 26528000. Mean Reward: 0.5651790235081374. Std of Reward: 0.5840244571111118.
Step: 26529000. Mean Reward: 0.5597216117216117. Std of Reward: 0.5881583252324663.
Step: 26530000. Mean Reward: 0.5488686679174484. Std of Reward: 0.5843407741638291.
Step: 26531000. Mean Reward: 0.5557603773584906. Std of Reward: 0.5862992258255431.
Step: 26532000. Mean Reward: 0.5412178571428571. Std of Reward: 0.584013364300101.
Step: 26533000. Mean Reward: 0.5689240506329114. Std of Reward: 0.573757005118431.

Step: 26620000. Mean Reward: 0.5844981884057971. Std of Reward: 0.5687418322366032.
Step: 26621000. Mean Reward: 0.5525441176470588. Std of Reward: 0.5890671946546837.
Step: 26622000. Mean Reward: 0.5505532710280375. Std of Reward: 0.5881075816005346.
Step: 26623000. Mean Reward: 0.5519573283858998. Std of Reward: 0.5828976876949635.
Step: 26624000. Mean Reward: 0.5319184782608695. Std of Reward: 0.6013938841237517.
Step: 26625000. Mean Reward: 0.47788640595903165. Std of Reward: 0.6148742980206706.
Step: 26626000. Mean Reward: 0.569482439926063. Std of Reward: 0.5645299732757898.
Step: 26627000. Mean Reward: 0.5778728652751423. Std of Reward: 0.5734155638663813.
Step: 26628000. Mean Reward: 0.5295509838998211. Std of Reward: 0.5944385518659353.
Step: 26629000. Mean Reward: 0.5458368121442124. Std of Reward: 0.587186386020315.
Step: 26630000. Mean Reward: 0.5697816513761468. Std of Reward: 0.5747460564354475.
Step: 26631000. Mean Reward: 0.5135821428571429. Std of Reward: 0.59128684153

Step: 26718000. Mean Reward: 0.5480808270676691. Std of Reward: 0.5823333146331633.
Step: 26719000. Mean Reward: 0.5768140417457306. Std of Reward: 0.5655582350405519.
Step: 26720000. Mean Reward: 0.5580741410488246. Std of Reward: 0.5779204065538983.
Step: 26721000. Mean Reward: 0.5611939736346516. Std of Reward: 0.5798338729236777.
Step: 26722000. Mean Reward: 0.5394367816091954. Std of Reward: 0.5940365947384829.
Step: 26723000. Mean Reward: 0.5485722120658134. Std of Reward: 0.5880755369059085.
Step: 26724000. Mean Reward: 0.5085475285171103. Std of Reward: 0.6070780894385839.
Step: 26725000. Mean Reward: 0.5484218181818182. Std of Reward: 0.5886627096273257.
Step: 26726000. Mean Reward: 0.5820090909090909. Std of Reward: 0.5803499868097548.
Step: 26727000. Mean Reward: 0.5631398230088496. Std of Reward: 0.5827826959639527.
Step: 26728000. Mean Reward: 0.5367307001795332. Std of Reward: 0.5878874933662237.
Step: 26729000. Mean Reward: 0.5611395348837208. Std of Reward: 0.5944973450

Step: 26816000. Mean Reward: 0.5942632541133456. Std of Reward: 0.5646255624405054.
Step: 26817000. Mean Reward: 0.5318750000000001. Std of Reward: 0.5961892547597131.
Step: 26818000. Mean Reward: 0.5175522935779817. Std of Reward: 0.5977812259132705.
Step: 26819000. Mean Reward: 0.5613219424460432. Std of Reward: 0.5780128968656232.
Step: 26820000. Mean Reward: 0.521701252236136. Std of Reward: 0.6023895694626182.
Step: 26821000. Mean Reward: 0.5354014869888476. Std of Reward: 0.5924685006901688.
Step: 26822000. Mean Reward: 0.5194073394495413. Std of Reward: 0.5948231586260538.
Step: 26823000. Mean Reward: 0.5377733089579525. Std of Reward: 0.5931118997090149.
Step: 26824000. Mean Reward: 0.5334629294755877. Std of Reward: 0.5926460044583611.
Step: 26825000. Mean Reward: 0.5066746765249538. Std of Reward: 0.6013586323490177.
Step: 26826000. Mean Reward: 0.5450718132854578. Std of Reward: 0.5988263283319583.
Step: 26827000. Mean Reward: 0.5197388987566608. Std of Reward: 0.60188902250

Step: 26914000. Mean Reward: 0.5847891891891892. Std of Reward: 0.554239874460224.
Step: 26915000. Mean Reward: 0.5459425493716337. Std of Reward: 0.5752437593714812.
Step: 26916000. Mean Reward: 0.545903165735568. Std of Reward: 0.5803117363051657.
Step: 26917000. Mean Reward: 0.5394427767354597. Std of Reward: 0.5900760301835415.
Step: 26918000. Mean Reward: 0.5483936567164178. Std of Reward: 0.570336323824106.
Step: 26919000. Mean Reward: 0.5463480662983425. Std of Reward: 0.5825433364880971.
Step: 26920000. Mean Reward: 0.5590896309314586. Std of Reward: 0.5853062563811485.
Step: 26921000. Mean Reward: 0.5514640287769784. Std of Reward: 0.5757246767182471.
Step: 26922000. Mean Reward: 0.5637418181818181. Std of Reward: 0.5832635352256876.
Step: 26923000. Mean Reward: 0.5623577981651376. Std of Reward: 0.56599929203664.
Step: 26924000. Mean Reward: 0.5451336996336996. Std of Reward: 0.5873255726411495.
Step: 26925000. Mean Reward: 0.5623307839388146. Std of Reward: 0.569920107403576

Step: 27012000. Mean Reward: 0.5821350364963503. Std of Reward: 0.5667780690670411.
Step: 27013000. Mean Reward: 0.5725640074211502. Std of Reward: 0.567887937646982.
Step: 27014000. Mean Reward: 0.563763440860215. Std of Reward: 0.5697487385184434.
Step: 27015000. Mean Reward: 0.5358345864661654. Std of Reward: 0.5841009437485843.
Step: 27016000. Mean Reward: 0.5545350553505535. Std of Reward: 0.580565138263944.
Step: 27017000. Mean Reward: 0.5283157894736842. Std of Reward: 0.5874935672546334.
Step: 27018000. Mean Reward: 0.5428286778398511. Std of Reward: 0.5721000717217115.
Step: 27019000. Mean Reward: 0.5203981651376146. Std of Reward: 0.588918699079146.
Step: 27020000. Mean Reward: 0.5477322097378277. Std of Reward: 0.5717672038552757.
Step: 27021000. Mean Reward: 0.5575081967213115. Std of Reward: 0.57394875664259.
Step: 27022000. Mean Reward: 0.5688983050847458. Std of Reward: 0.5638455774700483.
Step: 27023000. Mean Reward: 0.5075009416195857. Std of Reward: 0.5901670751175345

Step: 27110000. Mean Reward: 0.6234714285714286. Std of Reward: 0.5537516293798298.
Step: 27111000. Mean Reward: 0.5567918871252205. Std of Reward: 0.5842138501013397.
Step: 27112000. Mean Reward: 0.5256703096539161. Std of Reward: 0.5958141348522236.
Step: 27113000. Mean Reward: 0.5646751361161524. Std of Reward: 0.5782284209589277.
Step: 27114000. Mean Reward: 0.5407967332123411. Std of Reward: 0.5860612958813803.
Step: 27115000. Mean Reward: 0.5436881918819187. Std of Reward: 0.5811998663923769.
Step: 27116000. Mean Reward: 0.5290486891385768. Std of Reward: 0.6017624469615797.
Step: 27117000. Mean Reward: 0.5587153846153846. Std of Reward: 0.5777651570373472.
Step: 27118000. Mean Reward: 0.5186176470588235. Std of Reward: 0.5996292489190046.
Step: 27119000. Mean Reward: 0.5821660649819494. Std of Reward: 0.5666095197520823.
Step: 27120000. Mean Reward: 0.5470770676691729. Std of Reward: 0.5808567011049016.
Step: 27121000. Mean Reward: 0.5566358490566037. Std of Reward: 0.5753507850

Step: 27208000. Mean Reward: 0.5232125693160813. Std of Reward: 0.5940566024092669.
Step: 27209000. Mean Reward: 0.5367911025145067. Std of Reward: 0.577916737256587.
Step: 27210000. Mean Reward: 0.5729834254143646. Std of Reward: 0.5688541629074566.
Step: 27211000. Mean Reward: 0.500796992481203. Std of Reward: 0.5947632878401149.
Step: 27212000. Mean Reward: 0.5143563218390805. Std of Reward: 0.5930279753503682.
Step: 27213000. Mean Reward: 0.527128801431127. Std of Reward: 0.5976684135916691.
Step: 27214000. Mean Reward: 0.5257030965391621. Std of Reward: 0.5951779862743604.
Step: 27215000. Mean Reward: 0.5384215328467153. Std of Reward: 0.5953211113296173.
Step: 27216000. Mean Reward: 0.5678646209386281. Std of Reward: 0.5737340590214024.
Step: 27217000. Mean Reward: 0.5661776937618148. Std of Reward: 0.5753311092311311.
Step: 27218000. Mean Reward: 0.5566817391304347. Std of Reward: 0.5871399577735846.
Step: 27219000. Mean Reward: 0.5478589743589743. Std of Reward: 0.5811471857802

Step: 27306000. Mean Reward: 0.56225. Std of Reward: 0.5789254625162337.
Step: 27307000. Mean Reward: 0.5555599999999999. Std of Reward: 0.5832761571424886.
Step: 27308000. Mean Reward: 0.5679679144385027. Std of Reward: 0.5841740385778996.
Step: 27309000. Mean Reward: 0.5410054545454546. Std of Reward: 0.5771158210267147.
Step: 27310000. Mean Reward: 0.5584845173041895. Std of Reward: 0.5805289515636904.
Step: 27311000. Mean Reward: 0.5439757009345795. Std of Reward: 0.5829451180209924.
Step: 27312000. Mean Reward: 0.5215018518518518. Std of Reward: 0.5898093618533907.
Step: 27313000. Mean Reward: 0.5495608465608466. Std of Reward: 0.5874219009229562.
Step: 27314000. Mean Reward: 0.5298402154398564. Std of Reward: 0.5973384005520663.
Step: 27315000. Mean Reward: 0.5056129032258063. Std of Reward: 0.6007175255447349.
Step: 27316000. Mean Reward: 0.4948540925266905. Std of Reward: 0.6062603354742682.
Step: 27317000. Mean Reward: 0.5512901459854015. Std of Reward: 0.5901162122137443.
Ste

Step: 27404000. Mean Reward: 0.5798223443223444. Std of Reward: 0.5734882464399428.
Step: 27405000. Mean Reward: 0.5194381818181819. Std of Reward: 0.6071476732127064.
Step: 27406000. Mean Reward: 0.5553267148014439. Std of Reward: 0.5798561035624837.
Step: 27407000. Mean Reward: 0.535239332096475. Std of Reward: 0.5844834265655401.
Step: 27408000. Mean Reward: 0.5215767097966728. Std of Reward: 0.6005862478929408.
Step: 27409000. Mean Reward: 0.5778978494623656. Std of Reward: 0.5737997589755446.
Step: 27410000. Mean Reward: 0.49530228136882126. Std of Reward: 0.6054452042241224.
Step: 27411000. Mean Reward: 0.5550359168241966. Std of Reward: 0.5743928598965623.
Step: 27412000. Mean Reward: 0.5495390476190476. Std of Reward: 0.5704438079242276.
Step: 27413000. Mean Reward: 0.5009049429657795. Std of Reward: 0.5936993888730143.
Step: 27414000. Mean Reward: 0.5377794117647059. Std of Reward: 0.5906629524522882.
Step: 27415000. Mean Reward: 0.5711418312387792. Std of Reward: 0.5789877624

Step: 27502000. Mean Reward: 0.5778501872659176. Std of Reward: 0.574043320761118.
Step: 27503000. Mean Reward: 0.5634657293497363. Std of Reward: 0.5864313413092881.
Step: 27504000. Mean Reward: 0.5244859813084113. Std of Reward: 0.6028145661854444.
Step: 27505000. Mean Reward: 0.5434151291512915. Std of Reward: 0.593062403137015.
Step: 27506000. Mean Reward: 0.523425. Std of Reward: 0.5952209176461891.
Step: 27507000. Mean Reward: 0.5535774907749077. Std of Reward: 0.5883146644136822.
Step: 27508000. Mean Reward: 0.5355244122965641. Std of Reward: 0.5947801756621014.
Step: 27509000. Mean Reward: 0.5325378927911275. Std of Reward: 0.5946301176740861.
Step: 27510000. Mean Reward: 0.5736300366300366. Std of Reward: 0.5780762356206801.
Step: 27511000. Mean Reward: 0.5395739436619719. Std of Reward: 0.5887406153289992.
Step: 27512000. Mean Reward: 0.5198962432915921. Std of Reward: 0.6049586865979854.
Step: 27513000. Mean Reward: 0.52296875. Std of Reward: 0.5938931289851654.
Step: 275140

Saved Model
Step: 27601000. Mean Reward: 0.5021180952380951. Std of Reward: 0.6057608465362976.
Step: 27602000. Mean Reward: 0.513816029143898. Std of Reward: 0.6108339165623075.
Step: 27603000. Mean Reward: 0.5219416058394161. Std of Reward: 0.5880526723816767.
Step: 27604000. Mean Reward: 0.5611146025878003. Std of Reward: 0.5864256573183476.
Step: 27605000. Mean Reward: 0.5358909774436091. Std of Reward: 0.5967071983772989.
Step: 27606000. Mean Reward: 0.52464440433213. Std of Reward: 0.607895717929178.
Step: 27607000. Mean Reward: 0.5047442748091603. Std of Reward: 0.6108001318081394.
Step: 27608000. Mean Reward: 0.5175607985480943. Std of Reward: 0.6070011103259453.
Step: 27609000. Mean Reward: 0.5442928176795581. Std of Reward: 0.5996465448199702.
Step: 27610000. Mean Reward: 0.5001084558823529. Std of Reward: 0.6075510215216723.
Step: 27611000. Mean Reward: 0.5637108655616944. Std of Reward: 0.5820295468281819.
Step: 27612000. Mean Reward: 0.5344378478664193. Std of Reward: 0.59

Step: 27699000. Mean Reward: 0.5459084380610413. Std of Reward: 0.5933854613988577.
Step: 27700000. Mean Reward: 0.5331970802919708. Std of Reward: 0.6023164722072651.
Saved Model
Step: 27701000. Mean Reward: 0.5441703296703296. Std of Reward: 0.597784902510194.
Step: 27702000. Mean Reward: 0.5315125448028674. Std of Reward: 0.5995419150544389.
Step: 27703000. Mean Reward: 0.5219005424954792. Std of Reward: 0.6059416468927739.
Step: 27704000. Mean Reward: 0.5026894075403949. Std of Reward: 0.6050314787239308.
Step: 27705000. Mean Reward: 0.5587733089579525. Std of Reward: 0.5831522764218603.
Step: 27706000. Mean Reward: 0.5503967093235832. Std of Reward: 0.5925929711522124.
Step: 27707000. Mean Reward: 0.5246424682395644. Std of Reward: 0.5958544514757392.
Step: 27708000. Mean Reward: 0.5209621928166351. Std of Reward: 0.5940909613891338.
Step: 27709000. Mean Reward: 0.5242135231316726. Std of Reward: 0.6188633973550287.
Step: 27710000. Mean Reward: 0.519304189435337. Std of Reward: 0.

Step: 27797000. Mean Reward: 0.4963909774436091. Std of Reward: 0.6082716280186246.
Step: 27798000. Mean Reward: 0.5544511970534071. Std of Reward: 0.5808057882290533.
Step: 27799000. Mean Reward: 0.5564947183098592. Std of Reward: 0.5808745985734601.
Step: 27800000. Mean Reward: 0.5837568058076226. Std of Reward: 0.5742229270019591.
Saved Model
Step: 27801000. Mean Reward: 0.5330329067641683. Std of Reward: 0.5897803721723685.
Step: 27802000. Mean Reward: 0.5554615384615385. Std of Reward: 0.5819133647657035.
Step: 27803000. Mean Reward: 0.5292696629213484. Std of Reward: 0.5931971256704062.
Step: 27804000. Mean Reward: 0.57393321299639. Std of Reward: 0.5739121565270526.
Step: 27805000. Mean Reward: 0.49491935483870964. Std of Reward: 0.611651664535733.
Step: 27806000. Mean Reward: 0.5324136622390891. Std of Reward: 0.5888472542566546.
Step: 27807000. Mean Reward: 0.5497315315315315. Std of Reward: 0.5911282646109715.
Step: 27808000. Mean Reward: 0.5049265536723163. Std of Reward: 0.

Step: 27895000. Mean Reward: 0.5341391304347826. Std of Reward: 0.5935727857197212.
Step: 27896000. Mean Reward: 0.5268288770053476. Std of Reward: 0.6044731616531774.
Step: 27897000. Mean Reward: 0.5639208103130755. Std of Reward: 0.5723556145702586.
Step: 27898000. Mean Reward: 0.5682347670250897. Std of Reward: 0.5769528164515064.
Step: 27899000. Mean Reward: 0.55444. Std of Reward: 0.5832840121081259.
Step: 27900000. Mean Reward: 0.5636487455197132. Std of Reward: 0.5766112863489764.
Saved Model
Step: 27901000. Mean Reward: 0.5263387387387387. Std of Reward: 0.6024097469945892.
Step: 27902000. Mean Reward: 0.6055333333333333. Std of Reward: 0.5513989254750276.
Step: 27903000. Mean Reward: 0.5211405109489051. Std of Reward: 0.5930285596768841.
Step: 27904000. Mean Reward: 0.5335096322241681. Std of Reward: 0.5982923110711895.
Step: 27905000. Mean Reward: 0.5321633027522936. Std of Reward: 0.5891274133088933.
Step: 27906000. Mean Reward: 0.572492673992674. Std of Reward: 0.5737571126

Step: 27993000. Mean Reward: 0.4991136363636364. Std of Reward: 0.5914615134860947.
Step: 27994000. Mean Reward: 0.5383408239700375. Std of Reward: 0.5806178679287459.
Step: 27995000. Mean Reward: 0.5378719397363465. Std of Reward: 0.5833149461055306.
Step: 27996000. Mean Reward: 0.5227771739130435. Std of Reward: 0.6082000671862398.
Step: 27997000. Mean Reward: 0.5166642201834862. Std of Reward: 0.5913710481666421.
Step: 27998000. Mean Reward: 0.5356756756756758. Std of Reward: 0.5736963995933709.
Step: 27999000. Mean Reward: 0.5526390977443609. Std of Reward: 0.5681556308595219.
Step: 28000000. Mean Reward: 0.5143603773584905. Std of Reward: 0.5916915823456723.
Saved Model
Step: 28001000. Mean Reward: 0.592372513562387. Std of Reward: 0.5744859535111074.
Step: 28002000. Mean Reward: 0.5452018018018018. Std of Reward: 0.5860476126741563.
Step: 28003000. Mean Reward: 0.5418952380952381. Std of Reward: 0.582817137438159.
Step: 28004000. Mean Reward: 0.5193669724770642. Std of Reward: 0.

Step: 28091000. Mean Reward: 0.5533850467289719. Std of Reward: 0.5700434360895107.
Step: 28092000. Mean Reward: 0.4916392523364487. Std of Reward: 0.6068939881493122.
Step: 28093000. Mean Reward: 0.577125. Std of Reward: 0.5748419274181878.
Step: 28094000. Mean Reward: 0.5320907473309608. Std of Reward: 0.5903850398880723.
Step: 28095000. Mean Reward: 0.538860294117647. Std of Reward: 0.5942763075766796.
Step: 28096000. Mean Reward: 0.5082958801498128. Std of Reward: 0.5940707142912639.
Step: 28097000. Mean Reward: 0.5001361940298507. Std of Reward: 0.6014659564345215.
Step: 28098000. Mean Reward: 0.5590276243093923. Std of Reward: 0.5767854856655692.
Step: 28099000. Mean Reward: 0.5165018248175183. Std of Reward: 0.5802406324987155.
Step: 28100000. Mean Reward: 0.5536315789473685. Std of Reward: 0.5856211851842492.
Saved Model
Step: 28101000. Mean Reward: 0.5596843971631207. Std of Reward: 0.5805974023859052.
Step: 28102000. Mean Reward: 0.5482047101449276. Std of Reward: 0.581239523

Step: 28189000. Mean Reward: 0.5604462659380691. Std of Reward: 0.5834742213388103.
Step: 28190000. Mean Reward: 0.5290236363636363. Std of Reward: 0.5914751830538582.
Step: 28191000. Mean Reward: 0.5915027624309392. Std of Reward: 0.570348263201311.
Step: 28192000. Mean Reward: 0.5515171755725191. Std of Reward: 0.5795641473124764.
Step: 28193000. Mean Reward: 0.5646145454545455. Std of Reward: 0.5793852012476616.
Step: 28194000. Mean Reward: 0.5767304189435337. Std of Reward: 0.5754370827913098.
Step: 28195000. Mean Reward: 0.5851133455210238. Std of Reward: 0.5701431408398772.
Step: 28196000. Mean Reward: 0.5844710144927537. Std of Reward: 0.5660909045403562.
Step: 28197000. Mean Reward: 0.5406690647482014. Std of Reward: 0.5860280291193535.
Step: 28198000. Mean Reward: 0.5388468634686346. Std of Reward: 0.5896624500190655.
Step: 28199000. Mean Reward: 0.5615824175824176. Std of Reward: 0.5808701676211738.
Step: 28200000. Mean Reward: 0.570110909090909. Std of Reward: 0.571426111082

Step: 28287000. Mean Reward: 0.5307087198515771. Std of Reward: 0.5926966975335635.
Step: 28288000. Mean Reward: 0.5792393822393822. Std of Reward: 0.5668357887625058.
Step: 28289000. Mean Reward: 0.5857875. Std of Reward: 0.5737170652117458.
Step: 28290000. Mean Reward: 0.5993890909090909. Std of Reward: 0.5621199398616656.
Step: 28291000. Mean Reward: 0.5275137614678899. Std of Reward: 0.5840156403378645.
Step: 28292000. Mean Reward: 0.5532402234636872. Std of Reward: 0.5784078062555693.
Step: 28293000. Mean Reward: 0.5605641509433962. Std of Reward: 0.5772466205868996.
Step: 28294000. Mean Reward: 0.5905378927911276. Std of Reward: 0.5646428319538738.
Step: 28295000. Mean Reward: 0.5640072727272727. Std of Reward: 0.5778742045728844.
Step: 28296000. Mean Reward: 0.5349706422018348. Std of Reward: 0.5951610645162377.
Step: 28297000. Mean Reward: 0.5063298245614034. Std of Reward: 0.6081335206474454.
Step: 28298000. Mean Reward: 0.5517251773049645. Std of Reward: 0.5870807186555926.
S

Step: 28385000. Mean Reward: 0.5757509225092251. Std of Reward: 0.5782507681581864.
Step: 28386000. Mean Reward: 0.5286385321100917. Std of Reward: 0.596975864757054.
Step: 28387000. Mean Reward: 0.5767607843137256. Std of Reward: 0.5622585212663652.
Step: 28388000. Mean Reward: 0.5487274401473297. Std of Reward: 0.5841038273662548.
Step: 28389000. Mean Reward: 0.5602670250896057. Std of Reward: 0.5741135308110457.
Step: 28390000. Mean Reward: 0.5226517055655296. Std of Reward: 0.5972433268257127.
Step: 28391000. Mean Reward: 0.5710236363636363. Std of Reward: 0.5793714818549294.
Step: 28392000. Mean Reward: 0.588781914893617. Std of Reward: 0.5688040772756566.
Step: 28393000. Mean Reward: 0.5656378676470588. Std of Reward: 0.5830366841890473.
Step: 28394000. Mean Reward: 0.5499112318840581. Std of Reward: 0.5811117977360705.
Step: 28395000. Mean Reward: 0.5863779816513762. Std of Reward: 0.5718906775233603.
Step: 28396000. Mean Reward: 0.5379195804195805. Std of Reward: 0.586050843728

Step: 28483000. Mean Reward: 0.540659050966608. Std of Reward: 0.5967776473523583.
Step: 28484000. Mean Reward: 0.5664982078853047. Std of Reward: 0.5818680544526287.
Step: 28485000. Mean Reward: 0.5158994413407821. Std of Reward: 0.5893653572603397.
Step: 28486000. Mean Reward: 0.5319906542056075. Std of Reward: 0.5925566638834265.
Step: 28487000. Mean Reward: 0.5517736185383244. Std of Reward: 0.5887121541507687.
Step: 28488000. Mean Reward: 0.5578289703315881. Std of Reward: 0.579506212804526.
Step: 28489000. Mean Reward: 0.5391115241635688. Std of Reward: 0.5832751034446956.
Step: 28490000. Mean Reward: 0.56852865064695. Std of Reward: 0.5711631560620192.
Step: 28491000. Mean Reward: 0.5612189781021898. Std of Reward: 0.5736231973051186.
Step: 28492000. Mean Reward: 0.5566177024482108. Std of Reward: 0.5833480537183774.
Step: 28493000. Mean Reward: 0.5825623869801085. Std of Reward: 0.5755579541499368.
Step: 28494000. Mean Reward: 0.5726884955752213. Std of Reward: 0.58740761077388

Step: 28581000. Mean Reward: 0.5837618181818182. Std of Reward: 0.5583376687635633.
Step: 28582000. Mean Reward: 0.5520528169014085. Std of Reward: 0.5801738362844076.
Step: 28583000. Mean Reward: 0.5780950570342205. Std of Reward: 0.570425448665356.
Step: 28584000. Mean Reward: 0.5583144876325088. Std of Reward: 0.5816297834399944.
Step: 28585000. Mean Reward: 0.5491083032490975. Std of Reward: 0.5914527294980513.
Step: 28586000. Mean Reward: 0.5463112338858194. Std of Reward: 0.5803663039666217.
Step: 28587000. Mean Reward: 0.5179280303030304. Std of Reward: 0.5896921437206174.
Step: 28588000. Mean Reward: 0.5245879017013233. Std of Reward: 0.584584311235324.
Step: 28589000. Mean Reward: 0.5371189591078067. Std of Reward: 0.5857991402254525.
Step: 28590000. Mean Reward: 0.5553403041825095. Std of Reward: 0.5751093565799144.
Step: 28591000. Mean Reward: 0.5401090573012939. Std of Reward: 0.58468597598152.
Step: 28592000. Mean Reward: 0.5188057553956835. Std of Reward: 0.59454695136460

Step: 28679000. Mean Reward: 0.5731892393320964. Std of Reward: 0.5663249175521268.
Step: 28680000. Mean Reward: 0.5496666666666666. Std of Reward: 0.5736090765481424.
Step: 28681000. Mean Reward: 0.5668842105263159. Std of Reward: 0.575063749872446.
Step: 28682000. Mean Reward: 0.5322253787878789. Std of Reward: 0.5873830189466037.
Step: 28683000. Mean Reward: 0.5465047258979207. Std of Reward: 0.5810863501976135.
Step: 28684000. Mean Reward: 0.5092408759124087. Std of Reward: 0.5966489421988193.
Step: 28685000. Mean Reward: 0.5539784946236559. Std of Reward: 0.5696514933801882.
Step: 28686000. Mean Reward: 0.5503391915641477. Std of Reward: 0.5840479124482844.
Step: 28687000. Mean Reward: 0.5554319066147859. Std of Reward: 0.5713689175268638.
Step: 28688000. Mean Reward: 0.5647343453510436. Std of Reward: 0.5828336049178491.
Step: 28689000. Mean Reward: 0.5713171171171171. Std of Reward: 0.5695857957463593.
Step: 28690000. Mean Reward: 0.5339797794117648. Std of Reward: 0.58567666492

Step: 28777000. Mean Reward: 0.5550161870503596. Std of Reward: 0.5817406324188161.
Step: 28778000. Mean Reward: 0.5174054545454545. Std of Reward: 0.5926235300204374.
Step: 28779000. Mean Reward: 0.5256921675774135. Std of Reward: 0.5969538262134022.
Step: 28780000. Mean Reward: 0.5543069852941176. Std of Reward: 0.5719116031896628.
Step: 28781000. Mean Reward: 0.5582859778597786. Std of Reward: 0.5721855501719.
Step: 28782000. Mean Reward: 0.5338015414258188. Std of Reward: 0.5842829615103492.
Step: 28783000. Mean Reward: 0.525992857142857. Std of Reward: 0.5908862683222862.
Step: 28784000. Mean Reward: 0.5330056179775281. Std of Reward: 0.5853196395925759.
Step: 28785000. Mean Reward: 0.5673398230088496. Std of Reward: 0.574877514855725.
Step: 28786000. Mean Reward: 0.5586672661870503. Std of Reward: 0.5838601350270638.
Step: 28787000. Mean Reward: 0.5368942652329749. Std of Reward: 0.5882175382736423.
Step: 28788000. Mean Reward: 0.5435914972273567. Std of Reward: 0.581676868331002

Step: 28875000. Mean Reward: 0.5416998191681736. Std of Reward: 0.5721219223017656.
Step: 28876000. Mean Reward: 0.5705589353612168. Std of Reward: 0.5720633351774783.
Step: 28877000. Mean Reward: 0.580859022556391. Std of Reward: 0.5615354293328579.
Step: 28878000. Mean Reward: 0.5773253676470588. Std of Reward: 0.5750917628344296.
Step: 28879000. Mean Reward: 0.5569694244604316. Std of Reward: 0.5819131348811013.
Step: 28880000. Mean Reward: 0.5613165467625899. Std of Reward: 0.5909458554848268.
Step: 28881000. Mean Reward: 0.5396231617647058. Std of Reward: 0.5856765126571482.
Step: 28882000. Mean Reward: 0.5055371024734983. Std of Reward: 0.5944446241388478.
Step: 28883000. Mean Reward: 0.5184578754578754. Std of Reward: 0.5991098365165364.
Step: 28884000. Mean Reward: 0.5365836236933799. Std of Reward: 0.5849111512475786.
Step: 28885000. Mean Reward: 0.5409873417721519. Std of Reward: 0.5871267686931675.
Step: 28886000. Mean Reward: 0.5208684210526315. Std of Reward: 0.59463094131

Step: 28973000. Mean Reward: 0.5756808118081181. Std of Reward: 0.5711469327441974.
Step: 28974000. Mean Reward: 0.5471569664902998. Std of Reward: 0.5886365931995601.
Step: 28975000. Mean Reward: 0.5578681318681319. Std of Reward: 0.5832597146200892.
Step: 28976000. Mean Reward: 0.5092727272727272. Std of Reward: 0.5955078917881667.
Step: 28977000. Mean Reward: 0.5467561904761905. Std of Reward: 0.5887515456129844.
Step: 28978000. Mean Reward: 0.5580587155963304. Std of Reward: 0.5868338872554804.
Step: 28979000. Mean Reward: 0.4924651600753295. Std of Reward: 0.5949348760914476.
Step: 28980000. Mean Reward: 0.5264745454545454. Std of Reward: 0.5946977951311778.
Step: 28981000. Mean Reward: 0.5458886861313869. Std of Reward: 0.5946533465580819.
Step: 28982000. Mean Reward: 0.49568953068592053. Std of Reward: 0.6089506459216312.
Step: 28983000. Mean Reward: 0.5424354243542435. Std of Reward: 0.5775138586216756.
Step: 28984000. Mean Reward: 0.5614952741020793. Std of Reward: 0.576564062

Step: 29071000. Mean Reward: 0.5553009009009009. Std of Reward: 0.5924817475419472.
Step: 29072000. Mean Reward: 0.5236727272727273. Std of Reward: 0.5959057263617659.
Step: 29073000. Mean Reward: 0.5230694698354662. Std of Reward: 0.596907072897328.
Step: 29074000. Mean Reward: 0.6018023032629558. Std of Reward: 0.5606730439922809.
Step: 29075000. Mean Reward: 0.5677960288808664. Std of Reward: 0.5784147793337746.
Step: 29076000. Mean Reward: 0.5558474576271187. Std of Reward: 0.5834160629698166.
Step: 29077000. Mean Reward: 0.5450317164179104. Std of Reward: 0.5904923800183542.
Step: 29078000. Mean Reward: 0.5939169675090252. Std of Reward: 0.5727494808682478.
Step: 29079000. Mean Reward: 0.5462994454713493. Std of Reward: 0.589661012754552.
Step: 29080000. Mean Reward: 0.509887900355872. Std of Reward: 0.5966020060524777.
Step: 29081000. Mean Reward: 0.551647619047619. Std of Reward: 0.591857859077223.
Step: 29082000. Mean Reward: 0.5040507518796992. Std of Reward: 0.602757032855967

Step: 29169000. Mean Reward: 0.554975791433892. Std of Reward: 0.5812316959097692.
Step: 29170000. Mean Reward: 0.5742716049382716. Std of Reward: 0.5836761414066332.
Step: 29171000. Mean Reward: 0.5671154562383612. Std of Reward: 0.5811345143145304.
Step: 29172000. Mean Reward: 0.4964366972477064. Std of Reward: 0.6019962480697112.
Step: 29173000. Mean Reward: 0.5665151515151515. Std of Reward: 0.5796851644832277.
Step: 29174000. Mean Reward: 0.5290187265917604. Std of Reward: 0.5863669483789032.
Step: 29175000. Mean Reward: 0.5274871794871795. Std of Reward: 0.5941180930115603.
Step: 29176000. Mean Reward: 0.5254157303370787. Std of Reward: 0.5881913681747235.
Step: 29177000. Mean Reward: 0.5272443280977313. Std of Reward: 0.5918094679979218.
Step: 29178000. Mean Reward: 0.5360055147058824. Std of Reward: 0.5869200814036628.
Step: 29179000. Mean Reward: 0.5036895874263262. Std of Reward: 0.5787110448130667.
Step: 29180000. Mean Reward: 0.5248207547169811. Std of Reward: 0.59522142696

Step: 29267000. Mean Reward: 0.5698010849909585. Std of Reward: 0.5885682172254434.
Step: 29268000. Mean Reward: 0.5743018518518518. Std of Reward: 0.5737566909125688.
Step: 29269000. Mean Reward: 0.5157638376383763. Std of Reward: 0.5946244058897494.
Step: 29270000. Mean Reward: 0.5372448979591836. Std of Reward: 0.5959607725654965.
Step: 29271000. Mean Reward: 0.5621149012567325. Std of Reward: 0.5747888077478922.
Step: 29272000. Mean Reward: 0.5206579439252337. Std of Reward: 0.6028956147211374.
Step: 29273000. Mean Reward: 0.5345425925925926. Std of Reward: 0.604361444495989.
Step: 29274000. Mean Reward: 0.48494830659536536. Std of Reward: 0.6043316478915364.
Step: 29275000. Mean Reward: 0.5871007326007326. Std of Reward: 0.5652609521852694.
Step: 29276000. Mean Reward: 0.5661538461538461. Std of Reward: 0.5837339813191841.
Step: 29277000. Mean Reward: 0.533211009174312. Std of Reward: 0.5921092490533971.
Step: 29278000. Mean Reward: 0.5383532710280374. Std of Reward: 0.58407540838

Step: 29365000. Mean Reward: 0.5358479427549194. Std of Reward: 0.5845692664909963.
Step: 29366000. Mean Reward: 0.5499400749063671. Std of Reward: 0.5841627673269502.
Step: 29367000. Mean Reward: 0.5310247349823322. Std of Reward: 0.6005152317450341.
Step: 29368000. Mean Reward: 0.48935883424408017. Std of Reward: 0.6082053421904074.
Step: 29369000. Mean Reward: 0.5317269439421337. Std of Reward: 0.5973746870770987.
Step: 29370000. Mean Reward: 0.546654478976234. Std of Reward: 0.5874146897437444.
Step: 29371000. Mean Reward: 0.6044667896678967. Std of Reward: 0.5738273794820175.
Step: 29372000. Mean Reward: 0.5566472727272729. Std of Reward: 0.5861425665405506.
Step: 29373000. Mean Reward: 0.5290355805243445. Std of Reward: 0.5855114674447522.
Step: 29374000. Mean Reward: 0.5839089253187614. Std of Reward: 0.5666521975752655.
Step: 29375000. Mean Reward: 0.5536720142602496. Std of Reward: 0.5748990233752019.
Step: 29376000. Mean Reward: 0.5121706422018348. Std of Reward: 0.6038930173

Step: 29463000. Mean Reward: 0.5109482142857144. Std of Reward: 0.6101871017187712.
Step: 29464000. Mean Reward: 0.5745185873605948. Std of Reward: 0.5737947256650051.
Step: 29465000. Mean Reward: 0.5447299270072993. Std of Reward: 0.5815378116087135.
Step: 29466000. Mean Reward: 0.5198740601503758. Std of Reward: 0.5960122093447637.
Step: 29467000. Mean Reward: 0.5817025089605735. Std of Reward: 0.56844586376325.
Step: 29468000. Mean Reward: 0.5445415896487985. Std of Reward: 0.5830629974464066.
Step: 29469000. Mean Reward: 0.570110497237569. Std of Reward: 0.5730811868507119.
Step: 29470000. Mean Reward: 0.5736141304347826. Std of Reward: 0.5756121990028621.
Step: 29471000. Mean Reward: 0.5543713235294118. Std of Reward: 0.5844698436602596.
Step: 29472000. Mean Reward: 0.5168531598513011. Std of Reward: 0.5828258926891462.
Step: 29473000. Mean Reward: 0.5622732240437158. Std of Reward: 0.5871637223330745.
Step: 29474000. Mean Reward: 0.5699485294117648. Std of Reward: 0.5811838554376

Step: 29561000. Mean Reward: 0.5454092592592593. Std of Reward: 0.5800248534804409.
Step: 29562000. Mean Reward: 0.5338451730418943. Std of Reward: 0.5927877546916787.
Step: 29563000. Mean Reward: 0.5528692449355432. Std of Reward: 0.5855919636678096.
Step: 29564000. Mean Reward: 0.5303081180811808. Std of Reward: 0.5771275088928032.
Step: 29565000. Mean Reward: 0.5743333333333334. Std of Reward: 0.5755789716669231.
Step: 29566000. Mean Reward: 0.5707943396226416. Std of Reward: 0.5647607250612721.
Step: 29567000. Mean Reward: 0.525980461811723. Std of Reward: 0.5980887777906783.
Step: 29568000. Mean Reward: 0.553135687732342. Std of Reward: 0.575064449426361.
Step: 29569000. Mean Reward: 0.5037406082289804. Std of Reward: 0.600783563175707.
Step: 29570000. Mean Reward: 0.5233141263940521. Std of Reward: 0.5880706181082587.
Step: 29571000. Mean Reward: 0.49726703499079183. Std of Reward: 0.5963962898824067.
Step: 29572000. Mean Reward: 0.5872022471910113. Std of Reward: 0.5689812799414

Step: 29659000. Mean Reward: 0.5986010928961749. Std of Reward: 0.5569714777524707.
Step: 29660000. Mean Reward: 0.5338194174757283. Std of Reward: 0.5751878345895842.
Step: 29661000. Mean Reward: 0.5529245283018867. Std of Reward: 0.5734338358654323.
Step: 29662000. Mean Reward: 0.554935064935065. Std of Reward: 0.5808992816102237.
Step: 29663000. Mean Reward: 0.5947236363636362. Std of Reward: 0.5586231547990881.
Step: 29664000. Mean Reward: 0.5568510242085661. Std of Reward: 0.5811732371881195.
Step: 29665000. Mean Reward: 0.5759925925925925. Std of Reward: 0.5701513898540788.
Step: 29666000. Mean Reward: 0.5167084870848709. Std of Reward: 0.5958477509620166.
Step: 29667000. Mean Reward: 0.5428690909090909. Std of Reward: 0.5845842323232742.
Step: 29668000. Mean Reward: 0.5274671532846715. Std of Reward: 0.5875119355966639.
Step: 29669000. Mean Reward: 0.5408724584103511. Std of Reward: 0.5903277923342879.
Step: 29670000. Mean Reward: 0.5438446428571428. Std of Reward: 0.58648194693

Step: 29757000. Mean Reward: 0.5986131386861314. Std of Reward: 0.5636982225488627.
Step: 29758000. Mean Reward: 0.5748579335793358. Std of Reward: 0.5790973508916718.
Step: 29759000. Mean Reward: 0.5779443378119001. Std of Reward: 0.5688241793363659.
Step: 29760000. Mean Reward: 0.5780353982300885. Std of Reward: 0.5709107477818279.
Step: 29761000. Mean Reward: 0.5497064220183486. Std of Reward: 0.5770996877418886.
Step: 29762000. Mean Reward: 0.5734358974358974. Std of Reward: 0.5683990599148688.
Step: 29763000. Mean Reward: 0.5772844036697248. Std of Reward: 0.5612790977192817.
Step: 29764000. Mean Reward: 0.5309287054409005. Std of Reward: 0.5869217009935809.
Step: 29765000. Mean Reward: 0.5670348623853211. Std of Reward: 0.5661059717594209.
Step: 29766000. Mean Reward: 0.5136641074856045. Std of Reward: 0.5938196444154447.
Step: 29767000. Mean Reward: 0.4982254545454545. Std of Reward: 0.5956391678366669.
Step: 29768000. Mean Reward: 0.5416395759717314. Std of Reward: 0.5998361738

Step: 29855000. Mean Reward: 0.534260147601476. Std of Reward: 0.5939357661530793.
Step: 29856000. Mean Reward: 0.5391375464684015. Std of Reward: 0.5779344675125041.
Step: 29857000. Mean Reward: 0.5616654064272212. Std of Reward: 0.5746806621702804.
Step: 29858000. Mean Reward: 0.5195454545454545. Std of Reward: 0.5935620057019482.
Step: 29859000. Mean Reward: 0.5464018181818181. Std of Reward: 0.5816003754503318.
Step: 29860000. Mean Reward: 0.5416303142329021. Std of Reward: 0.5889604991993025.
Step: 29861000. Mean Reward: 0.542337614678899. Std of Reward: 0.5775256964439123.
Step: 29862000. Mean Reward: 0.5479652014652014. Std of Reward: 0.5830123999504827.
Step: 29863000. Mean Reward: 0.563310606060606. Std of Reward: 0.5733088157536432.
Step: 29864000. Mean Reward: 0.5386064400715562. Std of Reward: 0.5826345375305714.
Step: 29865000. Mean Reward: 0.5583945454545454. Std of Reward: 0.578570173921942.
Step: 29866000. Mean Reward: 0.5580955056179775. Std of Reward: 0.57221074031174

Step: 29953000. Mean Reward: 0.5575398886827458. Std of Reward: 0.5701861914636259.
Step: 29954000. Mean Reward: 0.5191220825852783. Std of Reward: 0.6021809042715249.
Step: 29955000. Mean Reward: 0.5415494699646644. Std of Reward: 0.5865345794782668.
Step: 29956000. Mean Reward: 0.5435295202952029. Std of Reward: 0.5830533369924219.
Step: 29957000. Mean Reward: 0.5668533834586467. Std of Reward: 0.5865401879133469.
Step: 29958000. Mean Reward: 0.5627357142857143. Std of Reward: 0.585651345093823.
Step: 29959000. Mean Reward: 0.5750432432432433. Std of Reward: 0.5801814114586006.
Step: 29960000. Mean Reward: 0.5248839779005524. Std of Reward: 0.5875898578522633.
Step: 29961000. Mean Reward: 0.5682389705882354. Std of Reward: 0.5809996471185516.
Step: 29962000. Mean Reward: 0.5712361623616237. Std of Reward: 0.5798100143274725.
Step: 29963000. Mean Reward: 0.5257541284403671. Std of Reward: 0.5889888942321643.
Step: 29964000. Mean Reward: 0.5618785578747628. Std of Reward: 0.57643488193

Saved Model
Step: 30051000. Mean Reward: 0.5750559566787004. Std of Reward: 0.5727530753444311.
Step: 30052000. Mean Reward: 0.5456535714285714. Std of Reward: 0.5892502239421011.
Step: 30053000. Mean Reward: 0.5416686507936508. Std of Reward: 0.5885719461906285.
Step: 30054000. Mean Reward: 0.5380757855822551. Std of Reward: 0.5828754395154867.
Step: 30055000. Mean Reward: 0.51131931166348. Std of Reward: 0.5968043189957599.
Step: 30056000. Mean Reward: 0.56055938697318. Std of Reward: 0.5746968093859562.
Step: 30057000. Mean Reward: 0.5358021978021978. Std of Reward: 0.5901337941011551.
Step: 30058000. Mean Reward: 0.5422912280701754. Std of Reward: 0.5961269611271598.
Step: 30059000. Mean Reward: 0.5996953405017921. Std of Reward: 0.5556376333403128.
Step: 30060000. Mean Reward: 0.5585601436265709. Std of Reward: 0.5802169736472877.
Step: 30061000. Mean Reward: 0.5395121951219513. Std of Reward: 0.5828217366232554.
Step: 30062000. Mean Reward: 0.5275266055045871. Std of Reward: 0.59

Step: 30149000. Mean Reward: 0.5140148975791433. Std of Reward: 0.5974100804701372.
Step: 30150000. Mean Reward: 0.5534558823529413. Std of Reward: 0.5827580514956829.
Saved Model
Step: 30151000. Mean Reward: 0.5912924187725632. Std of Reward: 0.5603132417957786.
Step: 30152000. Mean Reward: 0.5903045045045044. Std of Reward: 0.567955276025582.
Step: 30153000. Mean Reward: 0.5439214285714286. Std of Reward: 0.587294344769264.
Step: 30154000. Mean Reward: 0.5648867256637167. Std of Reward: 0.5754447125983697.
Step: 30155000. Mean Reward: 0.5909502868068833. Std of Reward: 0.5658951836738598.
Step: 30156000. Mean Reward: 0.5537125. Std of Reward: 0.5825404503314096.
Step: 30157000. Mean Reward: 0.5352883548983365. Std of Reward: 0.589112222544769.
Step: 30158000. Mean Reward: 0.5438. Std of Reward: 0.5902506124135285.
Step: 30159000. Mean Reward: 0.565815693430657. Std of Reward: 0.5763297483253933.
Step: 30160000. Mean Reward: 0.5038836772983114. Std of Reward: 0.6056905704602402.
Step:

Step: 30247000. Mean Reward: 0.5593351851851852. Std of Reward: 0.5830033548145034.
Step: 30248000. Mean Reward: 0.5338502581755593. Std of Reward: 0.5949852425489662.
Step: 30249000. Mean Reward: 0.5792118491921006. Std of Reward: 0.5805677358189648.
Step: 30250000. Mean Reward: 0.5878242753623188. Std of Reward: 0.5702918270796936.
Saved Model
Step: 30251000. Mean Reward: 0.5543241758241757. Std of Reward: 0.58989273505111.
Step: 30252000. Mean Reward: 0.5971971326164874. Std of Reward: 0.5766908073687372.
Step: 30253000. Mean Reward: 0.5656081560283688. Std of Reward: 0.5855305716689874.
Step: 30254000. Mean Reward: 0.540411214953271. Std of Reward: 0.5998718640944654.
Step: 30255000. Mean Reward: 0.5704964788732395. Std of Reward: 0.579557465331441.
Step: 30256000. Mean Reward: 0.5697315315315316. Std of Reward: 0.5768531235270202.
Step: 30257000. Mean Reward: 0.6085716911764706. Std of Reward: 0.55854143232627.
Step: 30258000. Mean Reward: 0.5616559714795009. Std of Reward: 0.5844

Step: 30345000. Mean Reward: 0.5387915129151291. Std of Reward: 0.5892611820457689.
Step: 30346000. Mean Reward: 0.5928608058608058. Std of Reward: 0.5780738818346872.
Step: 30347000. Mean Reward: 0.5816535162950257. Std of Reward: 0.5774558918655545.
Step: 30348000. Mean Reward: 0.5410272727272727. Std of Reward: 0.5916050964666288.
Step: 30349000. Mean Reward: 0.5644199288256226. Std of Reward: 0.594437275239019.
Step: 30350000. Mean Reward: 0.5549609665427508. Std of Reward: 0.5846786970411929.
Saved Model
Step: 30351000. Mean Reward: 0.5531694599627561. Std of Reward: 0.5788987837466565.
Step: 30352000. Mean Reward: 0.5470499040307101. Std of Reward: 0.5794759044215498.
Step: 30353000. Mean Reward: 0.5402900763358778. Std of Reward: 0.5865959210856697.
Step: 30354000. Mean Reward: 0.5673142329020332. Std of Reward: 0.5750974755928633.
Step: 30355000. Mean Reward: 0.5407712177121771. Std of Reward: 0.584332788261332.
Step: 30356000. Mean Reward: 0.5350537634408602. Std of Reward: 0.

Step: 30443000. Mean Reward: 0.5690598911070781. Std of Reward: 0.5807727704809442.
Step: 30444000. Mean Reward: 0.5398713768115943. Std of Reward: 0.5878108210594852.
Step: 30445000. Mean Reward: 0.5402841121495326. Std of Reward: 0.5979894551237213.
Step: 30446000. Mean Reward: 0.5299405405405405. Std of Reward: 0.5906359514076359.
Step: 30447000. Mean Reward: 0.5764748603351956. Std of Reward: 0.5753394203625529.
Step: 30448000. Mean Reward: 0.579901610017889. Std of Reward: 0.5762808144311995.
Step: 30449000. Mean Reward: 0.5873877551020409. Std of Reward: 0.576779019235292.
Step: 30450000. Mean Reward: 0.5322518382352941. Std of Reward: 0.5906845394797839.
Saved Model
Step: 30451000. Mean Reward: 0.5756666666666668. Std of Reward: 0.5920360159720334.
Step: 30452000. Mean Reward: 0.5483339517625232. Std of Reward: 0.5784443020327389.
Step: 30453000. Mean Reward: 0.5488802946593002. Std of Reward: 0.585783389181357.
Step: 30454000. Mean Reward: 0.5322762923351159. Std of Reward: 0.6

Step: 30541000. Mean Reward: 0.577. Std of Reward: 0.575626324018454.
Step: 30542000. Mean Reward: 0.5438615664845172. Std of Reward: 0.5918824334952356.
Step: 30543000. Mean Reward: 0.5807240740740741. Std of Reward: 0.5686959740510921.
Step: 30544000. Mean Reward: 0.5722833638025594. Std of Reward: 0.5708459149758596.
Step: 30545000. Mean Reward: 0.5504758364312268. Std of Reward: 0.5887999770591836.
Step: 30546000. Mean Reward: 0.5402941176470588. Std of Reward: 0.5976595715001.
Step: 30547000. Mean Reward: 0.5892823315118397. Std of Reward: 0.5686609350805906.
Step: 30548000. Mean Reward: 0.5864298401420959. Std of Reward: 0.5750789736154903.
Step: 30549000. Mean Reward: 0.5736588021778585. Std of Reward: 0.576321512807635.
Step: 30550000. Mean Reward: 0.546636197440585. Std of Reward: 0.5851841326541763.
Saved Model
Step: 30551000. Mean Reward: 0.572183970856102. Std of Reward: 0.5756463673901839.
Step: 30552000. Mean Reward: 0.5747446808510639. Std of Reward: 0.5772894557006875.


Step: 30639000. Mean Reward: 0.5408949275362319. Std of Reward: 0.589123000102383.
Step: 30640000. Mean Reward: 0.5136648745519713. Std of Reward: 0.6033546576335238.
Step: 30641000. Mean Reward: 0.5878208409506399. Std of Reward: 0.5715099543012981.
Step: 30642000. Mean Reward: 0.5352287822878228. Std of Reward: 0.580605968650079.
Step: 30643000. Mean Reward: 0.5436805807622505. Std of Reward: 0.5885055233095998.
Step: 30644000. Mean Reward: 0.5589079422382671. Std of Reward: 0.5890412609052393.
Step: 30645000. Mean Reward: 0.5183686131386861. Std of Reward: 0.5922255339838499.
Step: 30646000. Mean Reward: 0.5276014625228519. Std of Reward: 0.606783329828702.
Step: 30647000. Mean Reward: 0.5269730769230769. Std of Reward: 0.5791421135927575.
Step: 30648000. Mean Reward: 0.5705505415162454. Std of Reward: 0.5773736958241782.
Step: 30649000. Mean Reward: 0.5936954128440366. Std of Reward: 0.5751651943612774.
Step: 30650000. Mean Reward: 0.515391150442478. Std of Reward: 0.59646713082262

Step: 30737000. Mean Reward: 0.5778610108303249. Std of Reward: 0.5802259305662798.
Step: 30738000. Mean Reward: 0.5247594696969696. Std of Reward: 0.5932630977315393.
Step: 30739000. Mean Reward: 0.5127046263345195. Std of Reward: 0.594659430280309.
Step: 30740000. Mean Reward: 0.5701473880597014. Std of Reward: 0.5768690960894235.
Step: 30741000. Mean Reward: 0.5445129629629629. Std of Reward: 0.5887554527690549.
Step: 30742000. Mean Reward: 0.5151858407079647. Std of Reward: 0.5916186308774297.
Step: 30743000. Mean Reward: 0.5824558303886925. Std of Reward: 0.5765469288126079.
Step: 30744000. Mean Reward: 0.5931571428571428. Std of Reward: 0.5743739265051814.
Step: 30745000. Mean Reward: 0.5915868372943327. Std of Reward: 0.5730107895542397.
Step: 30746000. Mean Reward: 0.5267145488029465. Std of Reward: 0.5951069185318424.
Step: 30747000. Mean Reward: 0.5548703703703703. Std of Reward: 0.5935385552173443.
Step: 30748000. Mean Reward: 0.5763882783882783. Std of Reward: 0.57385400959

Step: 30835000. Mean Reward: 0.5526494464944648. Std of Reward: 0.5799106519086458.
Step: 30836000. Mean Reward: 0.5484147727272727. Std of Reward: 0.5868617328586758.
Step: 30837000. Mean Reward: 0.5386207513416815. Std of Reward: 0.5958113747438966.
Step: 30838000. Mean Reward: 0.5515837837837838. Std of Reward: 0.5861014222935411.
Step: 30839000. Mean Reward: 0.5994493554327808. Std of Reward: 0.5551902273124661.
Step: 30840000. Mean Reward: 0.5685318352059925. Std of Reward: 0.5815796114825017.
Step: 30841000. Mean Reward: 0.5441029962546816. Std of Reward: 0.5929992157803551.
Step: 30842000. Mean Reward: 0.591011009174312. Std of Reward: 0.565389666608929.
Step: 30843000. Mean Reward: 0.5371072056239017. Std of Reward: 0.5909499404282832.
Step: 30844000. Mean Reward: 0.5787879924953097. Std of Reward: 0.5791483509979976.
Step: 30845000. Mean Reward: 0.557472172351885. Std of Reward: 0.576685534878109.
Step: 30846000. Mean Reward: 0.5225642023346303. Std of Reward: 0.58905288087040

Step: 30933000. Mean Reward: 0.609029520295203. Std of Reward: 0.5549204216870244.
Step: 30934000. Mean Reward: 0.576045540796964. Std of Reward: 0.571404951343086.
Step: 30935000. Mean Reward: 0.5318539325842696. Std of Reward: 0.5843425073792148.
Step: 30936000. Mean Reward: 0.5312760511882998. Std of Reward: 0.5901717986733239.
Step: 30937000. Mean Reward: 0.567888888888889. Std of Reward: 0.5693569314041385.
Step: 30938000. Mean Reward: 0.5110742753623189. Std of Reward: 0.6080493310977226.
Step: 30939000. Mean Reward: 0.5526086142322096. Std of Reward: 0.5960945228321713.
Step: 30940000. Mean Reward: 0.5502788104089219. Std of Reward: 0.5864104785918327.
Step: 30941000. Mean Reward: 0.5645742397137745. Std of Reward: 0.5870260615198373.
Step: 30942000. Mean Reward: 0.5763174311926604. Std of Reward: 0.5687643746898509.
Step: 30943000. Mean Reward: 0.5527471264367816. Std of Reward: 0.5850987712090042.
Step: 30944000. Mean Reward: 0.563515325670498. Std of Reward: 0.573208350413280

Step: 31031000. Mean Reward: 0.5912988929889299. Std of Reward: 0.5571627015303826.
Step: 31032000. Mean Reward: 0.5407288135593219. Std of Reward: 0.5864090776132778.
Step: 31033000. Mean Reward: 0.5861019677996422. Std of Reward: 0.5834615922870366.
Step: 31034000. Mean Reward: 0.5189980952380954. Std of Reward: 0.5971402818323749.
Step: 31035000. Mean Reward: 0.5783947858472998. Std of Reward: 0.5745941719106357.
Step: 31036000. Mean Reward: 0.5274051094890511. Std of Reward: 0.6004253663510851.
Step: 31037000. Mean Reward: 0.5477557251908397. Std of Reward: 0.5917385478673057.
Step: 31038000. Mean Reward: 0.5861202290076335. Std of Reward: 0.5750733332353544.
Step: 31039000. Mean Reward: 0.5227490566037736. Std of Reward: 0.6020378280983465.
Step: 31040000. Mean Reward: 0.5458019417475728. Std of Reward: 0.5858406453497422.
Step: 31041000. Mean Reward: 0.5690072727272726. Std of Reward: 0.5750237876042554.
Step: 31042000. Mean Reward: 0.5275651408450704. Std of Reward: 0.6024826652

Step: 31129000. Mean Reward: 0.5307475915221579. Std of Reward: 0.5903196905581528.
Step: 31130000. Mean Reward: 0.5086247689463955. Std of Reward: 0.6096953768459399.
Step: 31131000. Mean Reward: 0.5479349005424955. Std of Reward: 0.5799634702850989.
Step: 31132000. Mean Reward: 0.5691731123388583. Std of Reward: 0.5805486129062781.
Step: 31133000. Mean Reward: 0.570998254799302. Std of Reward: 0.5836242201652073.
Step: 31134000. Mean Reward: 0.5484022770398481. Std of Reward: 0.5840047677232334.
Step: 31135000. Mean Reward: 0.5610149812734082. Std of Reward: 0.5746083471239781.
Step: 31136000. Mean Reward: 0.5368406593406594. Std of Reward: 0.593076617207215.
Step: 31137000. Mean Reward: 0.5255875912408758. Std of Reward: 0.5871595407292471.
Step: 31138000. Mean Reward: 0.5599084112149534. Std of Reward: 0.5873210310219219.
Step: 31139000. Mean Reward: 0.5901901408450705. Std of Reward: 0.5784777094455172.
Step: 31140000. Mean Reward: 0.5450168224299066. Std of Reward: 0.591949051938

Step: 31227000. Mean Reward: 0.5514256880733943. Std of Reward: 0.5910661852139959.
Step: 31228000. Mean Reward: 0.5208435251798561. Std of Reward: 0.5982923892793451.
Step: 31229000. Mean Reward: 0.5847794117647059. Std of Reward: 0.5713934681622441.
Step: 31230000. Mean Reward: 0.5539838709677419. Std of Reward: 0.5828907643023948.
Step: 31231000. Mean Reward: 0.6024076086956521. Std of Reward: 0.5537887050759016.
Step: 31232000. Mean Reward: 0.5504767025089605. Std of Reward: 0.5900250665915228.
Step: 31233000. Mean Reward: 0.5648071161048689. Std of Reward: 0.5622736800351076.
Step: 31234000. Mean Reward: 0.5439525547445255. Std of Reward: 0.5926490276814106.
Step: 31235000. Mean Reward: 0.5979872029250457. Std of Reward: 0.5627107168356751.
Step: 31236000. Mean Reward: 0.6163714821763602. Std of Reward: 0.55475733154038.
Step: 31237000. Mean Reward: 0.5637150635208711. Std of Reward: 0.5818030804639464.
Step: 31238000. Mean Reward: 0.5691160220994474. Std of Reward: 0.576145523505

Step: 31325000. Mean Reward: 0.5393959854014597. Std of Reward: 0.5933631768309519.
Step: 31326000. Mean Reward: 0.5693704379562043. Std of Reward: 0.5737710379729849.
Step: 31327000. Mean Reward: 0.5879741697416975. Std of Reward: 0.5772635977830385.
Step: 31328000. Mean Reward: 0.5864781818181818. Std of Reward: 0.5760690224399122.
Step: 31329000. Mean Reward: 0.5612996453900709. Std of Reward: 0.5801192228282991.
Step: 31330000. Mean Reward: 0.5247101449275362. Std of Reward: 0.6041180984324193.
Step: 31331000. Mean Reward: 0.5707415094339623. Std of Reward: 0.5743272809430677.
Step: 31332000. Mean Reward: 0.5791793478260869. Std of Reward: 0.5821933715011401.
Step: 31333000. Mean Reward: 0.5630215827338129. Std of Reward: 0.5792208791750476.
Step: 31334000. Mean Reward: 0.559731046931408. Std of Reward: 0.5806431528792417.
Step: 31335000. Mean Reward: 0.5158706099815157. Std of Reward: 0.6047838203813052.
Step: 31336000. Mean Reward: 0.5082036697247706. Std of Reward: 0.59089019516

Step: 31423000. Mean Reward: 0.5508351449275363. Std of Reward: 0.5958254504366379.
Step: 31424000. Mean Reward: 0.5426804835924006. Std of Reward: 0.6027621651931774.
Step: 31425000. Mean Reward: 0.5579180633147113. Std of Reward: 0.5872029117610085.
Step: 31426000. Mean Reward: 0.554259927797834. Std of Reward: 0.5814003612752313.
Step: 31427000. Mean Reward: 0.5708073394495413. Std of Reward: 0.5851531294458409.
Step: 31428000. Mean Reward: 0.5960159574468086. Std of Reward: 0.5663922294194884.
Step: 31429000. Mean Reward: 0.5951791590493601. Std of Reward: 0.5650209852639896.
Step: 31430000. Mean Reward: 0.55121706864564. Std of Reward: 0.5833501950786266.
Step: 31431000. Mean Reward: 0.5416902173913043. Std of Reward: 0.594652318993217.
Step: 31432000. Mean Reward: 0.5589746835443038. Std of Reward: 0.5899370482331691.
Step: 31433000. Mean Reward: 0.5692757417102967. Std of Reward: 0.5809648057045984.
Step: 31434000. Mean Reward: 0.5940364963503649. Std of Reward: 0.56335504562144

Step: 31521000. Mean Reward: 0.56742. Std of Reward: 0.5734713751753042.
Step: 31522000. Mean Reward: 0.546264325323475. Std of Reward: 0.5781675930229547.
Step: 31523000. Mean Reward: 0.5915770609318997. Std of Reward: 0.5741721142840963.
Step: 31524000. Mean Reward: 0.614967213114754. Std of Reward: 0.5557061695089434.
Step: 31525000. Mean Reward: 0.5625137614678899. Std of Reward: 0.5820831621072604.
Step: 31526000. Mean Reward: 0.5331396226415094. Std of Reward: 0.6005189762887402.
Step: 31527000. Mean Reward: 0.557703237410072. Std of Reward: 0.5878088565682068.
Step: 31528000. Mean Reward: 0.5551772388059701. Std of Reward: 0.5846371086497044.
Step: 31529000. Mean Reward: 0.5353092979127135. Std of Reward: 0.6037036051364322.
Step: 31530000. Mean Reward: 0.5516690777576854. Std of Reward: 0.5909842595291902.
Step: 31531000. Mean Reward: 0.5780670289855072. Std of Reward: 0.5711835166017573.
Step: 31532000. Mean Reward: 0.5582421441774492. Std of Reward: 0.5803390227205647.
Step: 

Step: 31619000. Mean Reward: 0.5537080979284368. Std of Reward: 0.5851211726923894.
Step: 31620000. Mean Reward: 0.4936439114391144. Std of Reward: 0.6043246608489198.
Step: 31621000. Mean Reward: 0.5601698113207547. Std of Reward: 0.5886262376627235.
Step: 31622000. Mean Reward: 0.5464387947269304. Std of Reward: 0.5855486791634923.
Step: 31623000. Mean Reward: 0.5317775768535262. Std of Reward: 0.5886081225709643.
Step: 31624000. Mean Reward: 0.5628745583038869. Std of Reward: 0.5797260503476276.
Step: 31625000. Mean Reward: 0.5215925266903915. Std of Reward: 0.6078031143227621.
Step: 31626000. Mean Reward: 0.5831766784452297. Std of Reward: 0.5752209075300015.
Step: 31627000. Mean Reward: 0.6014025735294117. Std of Reward: 0.5584609905875995.
Step: 31628000. Mean Reward: 0.5271527001862197. Std of Reward: 0.5954540127819667.
Step: 31629000. Mean Reward: 0.5548. Std of Reward: 0.5768856141834808.
Step: 31630000. Mean Reward: 0.5238724954462659. Std of Reward: 0.6015706568185809.
Step

Step: 31717000. Mean Reward: 0.57021778584392. Std of Reward: 0.5770719410650699.
Step: 31718000. Mean Reward: 0.546669708029197. Std of Reward: 0.5776159870523451.
Step: 31719000. Mean Reward: 0.5603543165467626. Std of Reward: 0.5834822456035872.
Step: 31720000. Mean Reward: 0.5489911190053286. Std of Reward: 0.580278666068407.
Step: 31721000. Mean Reward: 0.5741201478743069. Std of Reward: 0.5571994688093151.
Step: 31722000. Mean Reward: 0.542963503649635. Std of Reward: 0.5759172076391517.
Step: 31723000. Mean Reward: 0.5174861878453038. Std of Reward: 0.6003063730479059.
Step: 31724000. Mean Reward: 0.5984752293577982. Std of Reward: 0.5583342163013453.
Step: 31725000. Mean Reward: 0.6072517857142857. Std of Reward: 0.5602531690134991.
Step: 31726000. Mean Reward: 0.5826282051282051. Std of Reward: 0.5691052799298485.
Step: 31727000. Mean Reward: 0.5344683301343569. Std of Reward: 0.5799947675444818.
Step: 31728000. Mean Reward: 0.5465807622504537. Std of Reward: 0.593609999493975

Step: 31815000. Mean Reward: 0.5294786476868327. Std of Reward: 0.592543276260551.
Step: 31816000. Mean Reward: 0.5660676156583629. Std of Reward: 0.5779928939560676.
Step: 31817000. Mean Reward: 0.5844809027777778. Std of Reward: 0.5743551785812276.
Step: 31818000. Mean Reward: 0.5978929889298893. Std of Reward: 0.5664806640938183.
Step: 31819000. Mean Reward: 0.5977093862815884. Std of Reward: 0.565010076602887.
Step: 31820000. Mean Reward: 0.5189731182795698. Std of Reward: 0.5983944776569018.
Step: 31821000. Mean Reward: 0.5809855072463769. Std of Reward: 0.5794652049113937.
Step: 31822000. Mean Reward: 0.528747311827957. Std of Reward: 0.5927798000487368.
Step: 31823000. Mean Reward: 0.5649475524475525. Std of Reward: 0.5964984106371205.
Step: 31824000. Mean Reward: 0.5691286764705882. Std of Reward: 0.5770657028052617.
Step: 31825000. Mean Reward: 0.5402785714285714. Std of Reward: 0.5962625089176883.
Step: 31826000. Mean Reward: 0.5639681978798586. Std of Reward: 0.5937449801574

Step: 31913000. Mean Reward: 0.59666. Std of Reward: 0.570425970688891.
Step: 31914000. Mean Reward: 0.5698047016274864. Std of Reward: 0.5806424466294247.
Step: 31915000. Mean Reward: 0.5629648798521256. Std of Reward: 0.594954379854905.
Step: 31916000. Mean Reward: 0.5130148975791434. Std of Reward: 0.6014948663670854.
Step: 31917000. Mean Reward: 0.5510337477797512. Std of Reward: 0.6025897313232744.
Step: 31918000. Mean Reward: 0.564344028520499. Std of Reward: 0.5932273322602676.
Step: 31919000. Mean Reward: 0.5959061371841154. Std of Reward: 0.5667569964233578.
Step: 31920000. Mean Reward: 0.5683712686567165. Std of Reward: 0.5903205825182273.
Step: 31921000. Mean Reward: 0.575263986013986. Std of Reward: 0.5781658440693702.
Step: 31922000. Mean Reward: 0.5339673913043478. Std of Reward: 0.5933159022023918.
Step: 31923000. Mean Reward: 0.5492793296089384. Std of Reward: 0.5898437505764982.
Step: 31924000. Mean Reward: 0.5741516245487365. Std of Reward: 0.5758227942408992.
Step: 3

Step: 32011000. Mean Reward: 0.5750585009140768. Std of Reward: 0.5720600416704507.
Step: 32012000. Mean Reward: 0.5548069216757741. Std of Reward: 0.5876581375105149.
Step: 32013000. Mean Reward: 0.5932254025044723. Std of Reward: 0.5704226183322161.
Step: 32014000. Mean Reward: 0.5588953900709219. Std of Reward: 0.5944736673599784.
Step: 32015000. Mean Reward: 0.558508078994614. Std of Reward: 0.5951342005977184.
Step: 32016000. Mean Reward: 0.5646406533575317. Std of Reward: 0.5798133388372365.
Step: 32017000. Mean Reward: 0.5394385026737968. Std of Reward: 0.5970752633496023.
Step: 32018000. Mean Reward: 0.5985385964912281. Std of Reward: 0.5602600487224851.
Step: 32019000. Mean Reward: 0.5419714285714285. Std of Reward: 0.5957461660990936.
Step: 32020000. Mean Reward: 0.5682421602787456. Std of Reward: 0.5921620213057118.
Step: 32021000. Mean Reward: 0.5365220458553792. Std of Reward: 0.5949511228489581.
Step: 32022000. Mean Reward: 0.5882351885098743. Std of Reward: 0.57251551167

Step: 32109000. Mean Reward: 0.5302828467153285. Std of Reward: 0.5930549160924183.
Step: 32110000. Mean Reward: 0.5430751341681574. Std of Reward: 0.5854273286630497.
Step: 32111000. Mean Reward: 0.578414364640884. Std of Reward: 0.5720963449748829.
Step: 32112000. Mean Reward: 0.5815652951699463. Std of Reward: 0.5700536919863551.
Step: 32113000. Mean Reward: 0.5203406593406593. Std of Reward: 0.6032996640839562.
Step: 32114000. Mean Reward: 0.5519796296296295. Std of Reward: 0.58450391560949.
Step: 32115000. Mean Reward: 0.5888025594149908. Std of Reward: 0.5699573457795545.
Step: 32116000. Mean Reward: 0.587090573012939. Std of Reward: 0.5721045019160491.
Step: 32117000. Mean Reward: 0.5360608856088561. Std of Reward: 0.5933710950497539.
Step: 32118000. Mean Reward: 0.5644014466546112. Std of Reward: 0.5779819053945799.
Step: 32119000. Mean Reward: 0.5532787769784172. Std of Reward: 0.5889543987391449.
Step: 32120000. Mean Reward: 0.538616513761468. Std of Reward: 0.588237680308423

Step: 32207000. Mean Reward: 0.5065394736842105. Std of Reward: 0.5951963790589084.
Step: 32208000. Mean Reward: 0.5528218181818182. Std of Reward: 0.5871121381786397.
Step: 32209000. Mean Reward: 0.5821567567567567. Std of Reward: 0.5773504211141207.
Step: 32210000. Mean Reward: 0.5669524680073126. Std of Reward: 0.582528091709765.
Step: 32211000. Mean Reward: 0.5647546816479401. Std of Reward: 0.5838565778419544.
Step: 32212000. Mean Reward: 0.5242801484230055. Std of Reward: 0.5851853213234003.
Step: 32213000. Mean Reward: 0.5384101123595505. Std of Reward: 0.58967613656995.
Step: 32214000. Mean Reward: 0.5092950819672132. Std of Reward: 0.5942841497394581.
Step: 32215000. Mean Reward: 0.5690614525139666. Std of Reward: 0.5825893350219489.
Step: 32216000. Mean Reward: 0.5439247311827957. Std of Reward: 0.5917918705005326.
Step: 32217000. Mean Reward: 0.5884803001876173. Std of Reward: 0.5714132426620782.
Step: 32218000. Mean Reward: 0.5550455373406192. Std of Reward: 0.5969062923004

Step: 32305000. Mean Reward: 0.5766924528301887. Std of Reward: 0.5802880853666668.
Step: 32306000. Mean Reward: 0.5354611307420495. Std of Reward: 0.5954752632353616.
Step: 32307000. Mean Reward: 0.5717481884057971. Std of Reward: 0.5757255032297787.
Step: 32308000. Mean Reward: 0.5794401408450705. Std of Reward: 0.5868046588810344.
Step: 32309000. Mean Reward: 0.5783393177737881. Std of Reward: 0.5840128943833327.
Step: 32310000. Mean Reward: 0.5697236842105263. Std of Reward: 0.579981767249035.
Step: 32311000. Mean Reward: 0.5579259927797834. Std of Reward: 0.5756576775054302.
Step: 32312000. Mean Reward: 0.4891336996336996. Std of Reward: 0.6141554271662952.
Step: 32313000. Mean Reward: 0.545051376146789. Std of Reward: 0.5874473133689053.
Step: 32314000. Mean Reward: 0.572111111111111. Std of Reward: 0.5840976916042222.
Step: 32315000. Mean Reward: 0.600386404293381. Std of Reward: 0.5694570406753502.
Step: 32316000. Mean Reward: 0.5844583333333333. Std of Reward: 0.56888340136696

Step: 32403000. Mean Reward: 0.5878180212014134. Std of Reward: 0.580328507942757.
Step: 32404000. Mean Reward: 0.5833339285714285. Std of Reward: 0.5864426542348321.
Step: 32405000. Mean Reward: 0.5560648648648648. Std of Reward: 0.5822706526261361.
Step: 32406000. Mean Reward: 0.579032667876588. Std of Reward: 0.5759787904002074.
Step: 32407000. Mean Reward: 0.5306925925925926. Std of Reward: 0.5969712645394845.
Step: 32408000. Mean Reward: 0.6146556776556776. Std of Reward: 0.5614359260187584.
Step: 32409000. Mean Reward: 0.5505921787709497. Std of Reward: 0.5844832056329314.
Step: 32410000. Mean Reward: 0.528166037735849. Std of Reward: 0.5843928689346397.
Step: 32411000. Mean Reward: 0.5571062618595826. Std of Reward: 0.5767409154768521.
Step: 32412000. Mean Reward: 0.574325. Std of Reward: 0.5830174416191534.
Step: 32413000. Mean Reward: 0.5588454376163874. Std of Reward: 0.5782830333091772.
Step: 32414000. Mean Reward: 0.5446963946869069. Std of Reward: 0.5888817811972883.
Step:

KeyboardInterrupt: 

### Export the trained Tensorflow graph
Once the model has been trained and saved, we can export it as a .bytes file which Unity can embed.

In [5]:
export_graph(model_path, env_name)

INFO:tensorflow:Restoring parameters from ./models/ppo\model-32400000.cptk


INFO:tensorflow:Restoring parameters from ./models/ppo\model-32400000.cptk


INFO:tensorflow:Froze 7 variables.


INFO:tensorflow:Froze 7 variables.


Converted 7 variables to const ops.
