# Lab 3-1: REINFORCE
    In this lab, you need to implement a REINFORCE algorithm with Tensorflow and solve OpenAI Gym CartPole-v0

In [1]:
from cartpole_env import *

import numpy as np
import tensorflow as tf

from collections import namedtuple, deque

# Define the data structure of experience
Experience = namedtuple('Experience', 'state action reward next_state done')

## Implement ```discount``` function to compute discounted reward

In [2]:
def discount(rewards, gamma):
    '''
    param rewards: a rewards numpy array
    param gamma: discount factor
    '''
    discounted_rewards = np.zeros_like(rewards)
    running_add = 0
    for t in reversed(range(len(rewards))):
        running_add = running_add * gamma + rewards[t]
        discounted_rewards[t] = running_add
    return discounted_rewards

## Implement ```do_rollout``` function to collect rollout

In [3]:
def do_rollout(env, policy, render=False):
    '''
    Collect a rollout from env with policy
    
    param env: RL Environment
    param policy: a function parameterized by environment state, return a action
    return a list of (state, action, reward, next_state, done)
    '''
    # Initialize done as False
    done = False
    
    # Reset the environment and get the initial state
    state = env.reset()
    
    # Empty list
    rollout = []
    
    while not done:
        action = policy(state)
        next_state, reward, done, info = env.step(action)
        
        # Render the environment (slow)
        if render:
            env.render()
        
        rollout.append(Experience(state, action, reward, next_state, done))
        state = next_state
        
    return rollout

## Implement ```ReinforceAgent``` following ```TODO```

In [4]:
class ReinforceAgent(object):
    def __init__(self, sess, n_states, n_actions, n_hiddens, lr, gamma):
        '''
        param sess: tf session
        param n_states: dim of states
        param n_actions: dim of actions space
        param n_hiddens: dim of hidden state
        '''
        self.sess = sess
        self.n_states = n_states
        self.n_actions = n_actions
        
        # Learning rate
        self.lr = lr
        
        # Discount factor
        self.gamma = gamma
       
        self.state = tf.placeholder(shape=[None, n_states], dtype=tf.float32)
        self.value = tf.placeholder(shape=[None], dtype=tf.float32)
        self.action = tf.placeholder(shape=[None], dtype=tf.int32)
        
        # Declare 1-st hidden layer
        # Define a fully-connected layer with:
        # input = self.state
        # n_units = self.n_hiddens
        # activation = relu
        # weight_initializer = random_normal(0.0, 0.1)
        # bias_initializer = constant (0.1)
        self.h = tf.layers.dense(
                inputs=self.state,
                units=n_hiddens,   
                activation=tf.nn.relu,
                kernel_initializer=tf.random_normal_initializer(0., .1), 
                bias_initializer=tf.constant_initializer(0.1), 
                name='h')
        
        # Declare 1-st hidden layer
        # Define a fully-connected layer with:
        # input = 1-st hidden layer
        # n_units = self.n_actions
        # activation = relu
        # weight_initializer = random_normal(0.0, 0.1)
        # bias_initializer = constant (0.1)
        self.policy = tf.layers.dense(
                inputs=self.h,
                units=n_actions,    
                activation=tf.nn.softmax,  
                kernel_initializer=tf.random_normal_initializer(0., .1), 
                bias_initializer=tf.constant_initializer(0.1),
                name='policy')
        
        # negative log probability 
        neglog_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.policy, labels=self.action)
         
        # TODO: policy gradient loss function
        self.loss = tf.reduce_mean(neglog_prob * self.value) 
        
        # TODO: Optimizer
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.train_op = self.optimizer.minimize(self.loss)
        
    def act(self, s):
        '''
        param s: a np.ndarray with shape [n_batches, n_states]
        return a batch of actions with shape [n_batches,]
        '''
        policy = self.sess.run(self.policy, feed_dict={self.state: s})
        return np.array([np.random.choice(range(self.n_actions), p=p) for p in policy])
    
    def train(self, rollout):
        '''
        param rollout: a list o
        '''
        states = np.array([ np.asarray(e.state) for e in rollout ])
        actions = np.squeeze(np.array([ e.action for e in rollout ]))
        rewards = np.array([ e.reward for e in rollout ])
        discounted_rewards = discount(rewards, gamma=self.gamma)
        
        self.sess.run(self.train_op, feed_dict={self.state: states,
                                                self.action: actions,
                                                self.value: discounted_rewards})
   
        

In [5]:
LR = 0.001
GAMMA = 0.99

sess = tf.InteractiveSession()
env = CartpoleEnvironment()
agent = ReinforceAgent(sess=sess, 
                       n_states=env.observation_space.shape[0],
                       n_actions=env.action_space.n,
                       n_hiddens=20,
                       lr=LR,
                       gamma=GAMMA)
init = tf.global_variables_initializer()
sess.run(init)

[2017-10-16 18:24:02,574] Making new env: CartPole-v0


In [6]:
def policy(s):
    return agent.act([s])[0]

In [7]:
def calculate_episode_reward(rollout):
    rewards = [ e.reward for e in rollout ]
    return sum(rewards)

In [None]:
def eval_history_reward(history):
    arr = np.asarray(history)
    return arr.mean()

In [None]:
MAX_ITERATIONS = 100000

episode_reward = 0.0
history_episode_rewards = deque(maxlen=100)

plot_history_episode_rewards = []

for iter in range(MAX_ITERATIONS):
    rollout = do_rollout(env=env, policy=policy, render=False)
    agent.train(rollout=rollout)
    
    episode_reward = calculate_episode_reward(rollout)
    history_episode_rewards.append(episode_reward)
    plot_history_episode_rewards.append(episode_reward)
    mean_rewards = eval_history_reward(history_episode_rewards)
    
    print('Episode %d: Reward = %f, Mean reward (over %d episodes) = %f' % (iter, 
                                                                            episode_reward,
                                                                            len(history_episode_rewards),
                                                                            mean_rewards))
    if mean_rewards > 195.0:
        print('Pass')
        break

Episode 0: Reward = -6.000000, Mean reward (over 1 episodes) = -6.000000
Episode 1: Reward = -10.000000, Mean reward (over 2 episodes) = -8.000000
Episode 2: Reward = 2.000000, Mean reward (over 3 episodes) = -4.666667
Episode 3: Reward = 1.000000, Mean reward (over 4 episodes) = -3.250000
Episode 4: Reward = -8.000000, Mean reward (over 5 episodes) = -4.200000
Episode 5: Reward = -4.000000, Mean reward (over 6 episodes) = -4.166667
Episode 6: Reward = 17.000000, Mean reward (over 7 episodes) = -1.142857
Episode 7: Reward = -2.000000, Mean reward (over 8 episodes) = -1.250000
Episode 8: Reward = 4.000000, Mean reward (over 9 episodes) = -0.666667
Episode 9: Reward = -9.000000, Mean reward (over 10 episodes) = -1.500000
Episode 10: Reward = -7.000000, Mean reward (over 11 episodes) = -2.000000
Episode 11: Reward = -6.000000, Mean reward (over 12 episodes) = -2.333333
Episode 12: Reward = 75.000000, Mean reward (over 13 episodes) = 3.615385
Episode 13: Reward = -6.000000, Mean reward (ov

Episode 111: Reward = -10.000000, Mean reward (over 100 episodes) = 2.180000
Episode 112: Reward = 7.000000, Mean reward (over 100 episodes) = 1.500000
Episode 113: Reward = 9.000000, Mean reward (over 100 episodes) = 1.650000
Episode 114: Reward = 15.000000, Mean reward (over 100 episodes) = 1.840000
Episode 115: Reward = -9.000000, Mean reward (over 100 episodes) = 1.690000
Episode 116: Reward = 11.000000, Mean reward (over 100 episodes) = 1.570000
Episode 117: Reward = 24.000000, Mean reward (over 100 episodes) = 1.700000
Episode 118: Reward = -6.000000, Mean reward (over 100 episodes) = 1.740000
Episode 119: Reward = -4.000000, Mean reward (over 100 episodes) = 1.790000
Episode 120: Reward = 31.000000, Mean reward (over 100 episodes) = 1.970000
Episode 121: Reward = -2.000000, Mean reward (over 100 episodes) = 1.850000
Episode 122: Reward = -10.000000, Mean reward (over 100 episodes) = 1.790000
Episode 123: Reward = 4.000000, Mean reward (over 100 episodes) = 1.890000
Episode 124: 

Episode 229: Reward = 3.000000, Mean reward (over 100 episodes) = 5.780000
Episode 230: Reward = 55.000000, Mean reward (over 100 episodes) = 6.290000
Episode 231: Reward = 18.000000, Mean reward (over 100 episodes) = 6.410000
Episode 232: Reward = 0.000000, Mean reward (over 100 episodes) = 6.380000
Episode 233: Reward = -3.000000, Mean reward (over 100 episodes) = 6.340000
Episode 234: Reward = -8.000000, Mean reward (over 100 episodes) = 6.320000
Episode 235: Reward = -8.000000, Mean reward (over 100 episodes) = 5.410000
Episode 236: Reward = 5.000000, Mean reward (over 100 episodes) = 5.570000
Episode 237: Reward = 15.000000, Mean reward (over 100 episodes) = 5.610000
Episode 238: Reward = 38.000000, Mean reward (over 100 episodes) = 6.000000
Episode 239: Reward = -7.000000, Mean reward (over 100 episodes) = 5.790000
Episode 240: Reward = -4.000000, Mean reward (over 100 episodes) = 5.510000
Episode 241: Reward = 48.000000, Mean reward (over 100 episodes) = 6.030000
Episode 242: Re

Episode 339: Reward = 2.000000, Mean reward (over 100 episodes) = 10.090000
Episode 340: Reward = 2.000000, Mean reward (over 100 episodes) = 10.150000
Episode 341: Reward = 10.000000, Mean reward (over 100 episodes) = 9.770000
Episode 342: Reward = -5.000000, Mean reward (over 100 episodes) = 9.640000
Episode 343: Reward = 5.000000, Mean reward (over 100 episodes) = 9.770000
Episode 344: Reward = 23.000000, Mean reward (over 100 episodes) = 10.010000
Episode 345: Reward = -8.000000, Mean reward (over 100 episodes) = 9.840000
Episode 346: Reward = 28.000000, Mean reward (over 100 episodes) = 9.760000
Episode 347: Reward = -1.000000, Mean reward (over 100 episodes) = 9.360000
Episode 348: Reward = 69.000000, Mean reward (over 100 episodes) = 10.070000
Episode 349: Reward = 32.000000, Mean reward (over 100 episodes) = 10.440000
Episode 350: Reward = -7.000000, Mean reward (over 100 episodes) = 9.980000
Episode 351: Reward = 22.000000, Mean reward (over 100 episodes) = 9.790000
Episode 35

Episode 446: Reward = 32.000000, Mean reward (over 100 episodes) = 22.880000
Episode 447: Reward = 0.000000, Mean reward (over 100 episodes) = 22.890000
Episode 448: Reward = 58.000000, Mean reward (over 100 episodes) = 22.780000
Episode 449: Reward = 23.000000, Mean reward (over 100 episodes) = 22.690000
Episode 450: Reward = 44.000000, Mean reward (over 100 episodes) = 23.200000
Episode 451: Reward = 24.000000, Mean reward (over 100 episodes) = 23.220000
Episode 452: Reward = 34.000000, Mean reward (over 100 episodes) = 23.510000
Episode 453: Reward = 101.000000, Mean reward (over 100 episodes) = 24.590000
Episode 454: Reward = 34.000000, Mean reward (over 100 episodes) = 24.730000
Episode 455: Reward = 22.000000, Mean reward (over 100 episodes) = 24.890000
Episode 456: Reward = 22.000000, Mean reward (over 100 episodes) = 24.820000
Episode 457: Reward = 103.000000, Mean reward (over 100 episodes) = 25.930000
Episode 458: Reward = 75.000000, Mean reward (over 100 episodes) = 26.73000

Episode 553: Reward = 36.000000, Mean reward (over 100 episodes) = 33.710000
Episode 554: Reward = 41.000000, Mean reward (over 100 episodes) = 33.780000
Episode 555: Reward = 15.000000, Mean reward (over 100 episodes) = 33.710000
Episode 556: Reward = 10.000000, Mean reward (over 100 episodes) = 33.590000
Episode 557: Reward = 13.000000, Mean reward (over 100 episodes) = 32.690000
Episode 558: Reward = 13.000000, Mean reward (over 100 episodes) = 32.070000
Episode 559: Reward = 90.000000, Mean reward (over 100 episodes) = 32.950000
Episode 560: Reward = 37.000000, Mean reward (over 100 episodes) = 32.980000
Episode 561: Reward = 19.000000, Mean reward (over 100 episodes) = 33.200000
Episode 562: Reward = 30.000000, Mean reward (over 100 episodes) = 33.480000
Episode 563: Reward = 6.000000, Mean reward (over 100 episodes) = 33.560000
Episode 564: Reward = 15.000000, Mean reward (over 100 episodes) = 33.020000
Episode 565: Reward = 31.000000, Mean reward (over 100 episodes) = 33.160000


Episode 663: Reward = 56.000000, Mean reward (over 100 episodes) = 44.670000
Episode 664: Reward = 40.000000, Mean reward (over 100 episodes) = 44.920000
Episode 665: Reward = 77.000000, Mean reward (over 100 episodes) = 45.380000
Episode 666: Reward = 24.000000, Mean reward (over 100 episodes) = 44.910000
Episode 667: Reward = 74.000000, Mean reward (over 100 episodes) = 45.130000
Episode 668: Reward = 57.000000, Mean reward (over 100 episodes) = 45.310000
Episode 669: Reward = 57.000000, Mean reward (over 100 episodes) = 45.740000
Episode 670: Reward = 62.000000, Mean reward (over 100 episodes) = 46.390000
Episode 671: Reward = 34.000000, Mean reward (over 100 episodes) = 46.000000
Episode 672: Reward = 56.000000, Mean reward (over 100 episodes) = 46.180000
Episode 673: Reward = 26.000000, Mean reward (over 100 episodes) = 45.740000
Episode 674: Reward = 47.000000, Mean reward (over 100 episodes) = 45.610000
Episode 675: Reward = 154.000000, Mean reward (over 100 episodes) = 46.52000

Episode 773: Reward = 32.000000, Mean reward (over 100 episodes) = 60.220000
Episode 774: Reward = 27.000000, Mean reward (over 100 episodes) = 60.020000
Episode 775: Reward = 41.000000, Mean reward (over 100 episodes) = 58.890000
Episode 776: Reward = 62.000000, Mean reward (over 100 episodes) = 58.980000
Episode 777: Reward = 54.000000, Mean reward (over 100 episodes) = 58.800000
Episode 778: Reward = 56.000000, Mean reward (over 100 episodes) = 58.870000
Episode 779: Reward = 41.000000, Mean reward (over 100 episodes) = 58.930000
Episode 780: Reward = 33.000000, Mean reward (over 100 episodes) = 58.780000
Episode 781: Reward = 49.000000, Mean reward (over 100 episodes) = 58.660000
Episode 782: Reward = 70.000000, Mean reward (over 100 episodes) = 59.120000
Episode 783: Reward = 83.000000, Mean reward (over 100 episodes) = 59.970000
Episode 784: Reward = 93.000000, Mean reward (over 100 episodes) = 60.470000
Episode 785: Reward = 43.000000, Mean reward (over 100 episodes) = 59.990000

Episode 881: Reward = 147.000000, Mean reward (over 100 episodes) = 71.320000
Episode 882: Reward = 91.000000, Mean reward (over 100 episodes) = 71.530000
Episode 883: Reward = 69.000000, Mean reward (over 100 episodes) = 71.390000
Episode 884: Reward = 104.000000, Mean reward (over 100 episodes) = 71.500000
Episode 885: Reward = 72.000000, Mean reward (over 100 episodes) = 71.790000
Episode 886: Reward = 55.000000, Mean reward (over 100 episodes) = 71.800000
Episode 887: Reward = 58.000000, Mean reward (over 100 episodes) = 71.130000
Episode 888: Reward = 170.000000, Mean reward (over 100 episodes) = 72.110000
Episode 889: Reward = 78.000000, Mean reward (over 100 episodes) = 71.750000
Episode 890: Reward = 78.000000, Mean reward (over 100 episodes) = 71.750000
Episode 891: Reward = 58.000000, Mean reward (over 100 episodes) = 71.800000
Episode 892: Reward = 123.000000, Mean reward (over 100 episodes) = 72.450000
Episode 893: Reward = 71.000000, Mean reward (over 100 episodes) = 72.57

Episode 991: Reward = 81.000000, Mean reward (over 100 episodes) = 74.200000
Episode 992: Reward = 61.000000, Mean reward (over 100 episodes) = 73.580000
Episode 993: Reward = 47.000000, Mean reward (over 100 episodes) = 73.340000
Episode 994: Reward = 80.000000, Mean reward (over 100 episodes) = 73.480000
Episode 995: Reward = 65.000000, Mean reward (over 100 episodes) = 73.310000
Episode 996: Reward = 74.000000, Mean reward (over 100 episodes) = 73.390000
Episode 997: Reward = 100.000000, Mean reward (over 100 episodes) = 73.920000
Episode 998: Reward = 79.000000, Mean reward (over 100 episodes) = 73.830000
Episode 999: Reward = 110.000000, Mean reward (over 100 episodes) = 73.900000
Episode 1000: Reward = 123.000000, Mean reward (over 100 episodes) = 74.520000
Episode 1001: Reward = 87.000000, Mean reward (over 100 episodes) = 74.180000
Episode 1002: Reward = 60.000000, Mean reward (over 100 episodes) = 74.080000
Episode 1003: Reward = 99.000000, Mean reward (over 100 episodes) = 74

Episode 1098: Reward = 37.000000, Mean reward (over 100 episodes) = 73.210000
Episode 1099: Reward = 77.000000, Mean reward (over 100 episodes) = 72.880000
Episode 1100: Reward = 85.000000, Mean reward (over 100 episodes) = 72.500000
Episode 1101: Reward = 56.000000, Mean reward (over 100 episodes) = 72.190000
Episode 1102: Reward = 172.000000, Mean reward (over 100 episodes) = 73.310000
Episode 1103: Reward = 87.000000, Mean reward (over 100 episodes) = 73.190000
Episode 1104: Reward = 65.000000, Mean reward (over 100 episodes) = 72.830000
Episode 1105: Reward = 200.000000, Mean reward (over 100 episodes) = 73.890000
Episode 1106: Reward = 69.000000, Mean reward (over 100 episodes) = 73.860000
Episode 1107: Reward = 86.000000, Mean reward (over 100 episodes) = 73.890000
Episode 1108: Reward = 78.000000, Mean reward (over 100 episodes) = 73.920000
Episode 1109: Reward = 59.000000, Mean reward (over 100 episodes) = 73.460000
Episode 1110: Reward = 33.000000, Mean reward (over 100 episod

Episode 1209: Reward = 52.000000, Mean reward (over 100 episodes) = 69.000000
Episode 1210: Reward = 32.000000, Mean reward (over 100 episodes) = 68.990000
Episode 1211: Reward = 41.000000, Mean reward (over 100 episodes) = 68.880000
Episode 1212: Reward = 42.000000, Mean reward (over 100 episodes) = 67.850000
Episode 1213: Reward = 47.000000, Mean reward (over 100 episodes) = 67.970000
Episode 1214: Reward = 60.000000, Mean reward (over 100 episodes) = 67.250000
Episode 1215: Reward = 70.000000, Mean reward (over 100 episodes) = 67.670000
Episode 1216: Reward = 48.000000, Mean reward (over 100 episodes) = 67.680000
Episode 1217: Reward = 62.000000, Mean reward (over 100 episodes) = 67.710000
Episode 1218: Reward = 51.000000, Mean reward (over 100 episodes) = 67.010000
Episode 1219: Reward = 38.000000, Mean reward (over 100 episodes) = 66.890000
Episode 1220: Reward = 130.000000, Mean reward (over 100 episodes) = 67.490000
Episode 1221: Reward = 46.000000, Mean reward (over 100 episode

Episode 1314: Reward = 42.000000, Mean reward (over 100 episodes) = 63.430000
Episode 1315: Reward = 52.000000, Mean reward (over 100 episodes) = 63.250000
Episode 1316: Reward = 35.000000, Mean reward (over 100 episodes) = 63.120000
Episode 1317: Reward = 86.000000, Mean reward (over 100 episodes) = 63.360000
Episode 1318: Reward = 34.000000, Mean reward (over 100 episodes) = 63.190000
Episode 1319: Reward = 41.000000, Mean reward (over 100 episodes) = 63.220000
Episode 1320: Reward = 25.000000, Mean reward (over 100 episodes) = 62.170000
Episode 1321: Reward = 74.000000, Mean reward (over 100 episodes) = 62.450000
Episode 1322: Reward = 60.000000, Mean reward (over 100 episodes) = 62.670000
Episode 1323: Reward = 129.000000, Mean reward (over 100 episodes) = 63.500000
Episode 1324: Reward = 52.000000, Mean reward (over 100 episodes) = 63.440000
Episode 1325: Reward = 140.000000, Mean reward (over 100 episodes) = 63.080000
Episode 1326: Reward = 26.000000, Mean reward (over 100 episod

Episode 1419: Reward = 100.000000, Mean reward (over 100 episodes) = 63.320000
Episode 1420: Reward = 122.000000, Mean reward (over 100 episodes) = 64.290000
Episode 1421: Reward = 114.000000, Mean reward (over 100 episodes) = 64.690000
Episode 1422: Reward = 43.000000, Mean reward (over 100 episodes) = 64.520000
Episode 1423: Reward = 48.000000, Mean reward (over 100 episodes) = 63.710000
Episode 1424: Reward = 74.000000, Mean reward (over 100 episodes) = 63.930000
Episode 1425: Reward = 36.000000, Mean reward (over 100 episodes) = 62.890000
Episode 1426: Reward = 27.000000, Mean reward (over 100 episodes) = 62.900000
Episode 1427: Reward = 51.000000, Mean reward (over 100 episodes) = 63.120000
Episode 1428: Reward = 32.000000, Mean reward (over 100 episodes) = 62.680000
Episode 1429: Reward = 44.000000, Mean reward (over 100 episodes) = 61.120000
Episode 1430: Reward = 58.000000, Mean reward (over 100 episodes) = 60.730000
Episode 1431: Reward = 60.000000, Mean reward (over 100 episo

Episode 1524: Reward = 53.000000, Mean reward (over 100 episodes) = 55.990000
Episode 1525: Reward = 17.000000, Mean reward (over 100 episodes) = 55.800000
Episode 1526: Reward = 23.000000, Mean reward (over 100 episodes) = 55.760000
Episode 1527: Reward = 22.000000, Mean reward (over 100 episodes) = 55.470000
Episode 1528: Reward = 29.000000, Mean reward (over 100 episodes) = 55.440000
Episode 1529: Reward = 57.000000, Mean reward (over 100 episodes) = 55.570000
Episode 1530: Reward = 29.000000, Mean reward (over 100 episodes) = 55.280000
Episode 1531: Reward = 42.000000, Mean reward (over 100 episodes) = 55.100000
Episode 1532: Reward = 51.000000, Mean reward (over 100 episodes) = 55.030000
Episode 1533: Reward = 52.000000, Mean reward (over 100 episodes) = 54.690000
Episode 1534: Reward = 61.000000, Mean reward (over 100 episodes) = 54.710000
Episode 1535: Reward = 58.000000, Mean reward (over 100 episodes) = 54.730000
Episode 1536: Reward = 146.000000, Mean reward (over 100 episode

Episode 1631: Reward = 39.000000, Mean reward (over 100 episodes) = 54.510000
Episode 1632: Reward = 71.000000, Mean reward (over 100 episodes) = 54.710000
Episode 1633: Reward = 41.000000, Mean reward (over 100 episodes) = 54.600000
Episode 1634: Reward = 28.000000, Mean reward (over 100 episodes) = 54.270000
Episode 1635: Reward = 41.000000, Mean reward (over 100 episodes) = 54.100000
Episode 1636: Reward = 34.000000, Mean reward (over 100 episodes) = 52.980000
Episode 1637: Reward = 88.000000, Mean reward (over 100 episodes) = 53.280000
Episode 1638: Reward = 46.000000, Mean reward (over 100 episodes) = 53.080000
Episode 1639: Reward = 87.000000, Mean reward (over 100 episodes) = 53.590000
Episode 1640: Reward = 62.000000, Mean reward (over 100 episodes) = 53.840000
Episode 1641: Reward = 40.000000, Mean reward (over 100 episodes) = 53.840000
Episode 1642: Reward = 40.000000, Mean reward (over 100 episodes) = 53.750000
Episode 1643: Reward = 30.000000, Mean reward (over 100 episodes

Episode 1740: Reward = 35.000000, Mean reward (over 100 episodes) = 46.850000
Episode 1741: Reward = 79.000000, Mean reward (over 100 episodes) = 47.240000
Episode 1742: Reward = 32.000000, Mean reward (over 100 episodes) = 47.160000
Episode 1743: Reward = 23.000000, Mean reward (over 100 episodes) = 47.090000
Episode 1744: Reward = 105.000000, Mean reward (over 100 episodes) = 47.570000
Episode 1745: Reward = 58.000000, Mean reward (over 100 episodes) = 47.880000
Episode 1746: Reward = 56.000000, Mean reward (over 100 episodes) = 47.330000
Episode 1747: Reward = 47.000000, Mean reward (over 100 episodes) = 47.410000
Episode 1748: Reward = 86.000000, Mean reward (over 100 episodes) = 47.600000
Episode 1749: Reward = 28.000000, Mean reward (over 100 episodes) = 47.510000
Episode 1750: Reward = 34.000000, Mean reward (over 100 episodes) = 47.640000
Episode 1751: Reward = 44.000000, Mean reward (over 100 episodes) = 47.750000
Episode 1752: Reward = 45.000000, Mean reward (over 100 episode

Episode 1851: Reward = 110.000000, Mean reward (over 100 episodes) = 44.300000
Episode 1852: Reward = 50.000000, Mean reward (over 100 episodes) = 44.350000
Episode 1853: Reward = 38.000000, Mean reward (over 100 episodes) = 44.450000
Episode 1854: Reward = 51.000000, Mean reward (over 100 episodes) = 44.550000
Episode 1855: Reward = 69.000000, Mean reward (over 100 episodes) = 44.460000
Episode 1856: Reward = 79.000000, Mean reward (over 100 episodes) = 44.850000
Episode 1857: Reward = 25.000000, Mean reward (over 100 episodes) = 44.640000
Episode 1858: Reward = 42.000000, Mean reward (over 100 episodes) = 44.760000
Episode 1859: Reward = 27.000000, Mean reward (over 100 episodes) = 44.730000
Episode 1860: Reward = 89.000000, Mean reward (over 100 episodes) = 45.360000
Episode 1861: Reward = 92.000000, Mean reward (over 100 episodes) = 45.420000
Episode 1862: Reward = 72.000000, Mean reward (over 100 episodes) = 45.650000
Episode 1863: Reward = 38.000000, Mean reward (over 100 episode

Episode 1959: Reward = 200.000000, Mean reward (over 100 episodes) = 54.580000
Episode 1960: Reward = 28.000000, Mean reward (over 100 episodes) = 53.970000
Episode 1961: Reward = 96.000000, Mean reward (over 100 episodes) = 54.010000
Episode 1962: Reward = 98.000000, Mean reward (over 100 episodes) = 54.270000
Episode 1963: Reward = 43.000000, Mean reward (over 100 episodes) = 54.320000
Episode 1964: Reward = 99.000000, Mean reward (over 100 episodes) = 54.600000
Episode 1965: Reward = 38.000000, Mean reward (over 100 episodes) = 53.830000
Episode 1966: Reward = 59.000000, Mean reward (over 100 episodes) = 54.150000
Episode 1967: Reward = 40.000000, Mean reward (over 100 episodes) = 53.630000
Episode 1968: Reward = 50.000000, Mean reward (over 100 episodes) = 53.830000
Episode 1969: Reward = 66.000000, Mean reward (over 100 episodes) = 53.990000
Episode 1970: Reward = 53.000000, Mean reward (over 100 episodes) = 54.090000
Episode 1971: Reward = 38.000000, Mean reward (over 100 episode

Episode 2065: Reward = 153.000000, Mean reward (over 100 episodes) = 54.440000
Episode 2066: Reward = 40.000000, Mean reward (over 100 episodes) = 54.250000
Episode 2067: Reward = 52.000000, Mean reward (over 100 episodes) = 54.370000
Episode 2068: Reward = 59.000000, Mean reward (over 100 episodes) = 54.460000
Episode 2069: Reward = 45.000000, Mean reward (over 100 episodes) = 54.250000
Episode 2070: Reward = 34.000000, Mean reward (over 100 episodes) = 54.060000
Episode 2071: Reward = 83.000000, Mean reward (over 100 episodes) = 54.510000
Episode 2072: Reward = 64.000000, Mean reward (over 100 episodes) = 54.540000
Episode 2073: Reward = 51.000000, Mean reward (over 100 episodes) = 54.640000
Episode 2074: Reward = 31.000000, Mean reward (over 100 episodes) = 54.440000
Episode 2075: Reward = 92.000000, Mean reward (over 100 episodes) = 55.010000
Episode 2076: Reward = 49.000000, Mean reward (over 100 episodes) = 55.190000
Episode 2077: Reward = 49.000000, Mean reward (over 100 episode

Episode 2173: Reward = 45.000000, Mean reward (over 100 episodes) = 54.390000
Episode 2174: Reward = 28.000000, Mean reward (over 100 episodes) = 54.360000
Episode 2175: Reward = 39.000000, Mean reward (over 100 episodes) = 53.830000
Episode 2176: Reward = 34.000000, Mean reward (over 100 episodes) = 53.680000
Episode 2177: Reward = 72.000000, Mean reward (over 100 episodes) = 53.910000
Episode 2178: Reward = 55.000000, Mean reward (over 100 episodes) = 54.010000
Episode 2179: Reward = 53.000000, Mean reward (over 100 episodes) = 54.060000
Episode 2180: Reward = 131.000000, Mean reward (over 100 episodes) = 55.110000
Episode 2181: Reward = 50.000000, Mean reward (over 100 episodes) = 55.060000
Episode 2182: Reward = 113.000000, Mean reward (over 100 episodes) = 55.660000
Episode 2183: Reward = 47.000000, Mean reward (over 100 episodes) = 55.780000
Episode 2184: Reward = 54.000000, Mean reward (over 100 episodes) = 56.000000
Episode 2185: Reward = 99.000000, Mean reward (over 100 episod

Episode 2278: Reward = 96.000000, Mean reward (over 100 episodes) = 57.910000
Episode 2279: Reward = 32.000000, Mean reward (over 100 episodes) = 57.700000
Episode 2280: Reward = 99.000000, Mean reward (over 100 episodes) = 57.380000
Episode 2281: Reward = 113.000000, Mean reward (over 100 episodes) = 58.010000
Episode 2282: Reward = 49.000000, Mean reward (over 100 episodes) = 57.370000
Episode 2283: Reward = 54.000000, Mean reward (over 100 episodes) = 57.440000
Episode 2284: Reward = 55.000000, Mean reward (over 100 episodes) = 57.450000
Episode 2285: Reward = 29.000000, Mean reward (over 100 episodes) = 56.750000
Episode 2286: Reward = 47.000000, Mean reward (over 100 episodes) = 56.400000
Episode 2287: Reward = 37.000000, Mean reward (over 100 episodes) = 56.150000
Episode 2288: Reward = 35.000000, Mean reward (over 100 episodes) = 56.030000
Episode 2289: Reward = 56.000000, Mean reward (over 100 episodes) = 56.220000
Episode 2290: Reward = 33.000000, Mean reward (over 100 episode

Episode 2385: Reward = 47.000000, Mean reward (over 100 episodes) = 53.510000
Episode 2386: Reward = 35.000000, Mean reward (over 100 episodes) = 53.390000
Episode 2387: Reward = 35.000000, Mean reward (over 100 episodes) = 53.370000
Episode 2388: Reward = 57.000000, Mean reward (over 100 episodes) = 53.590000
Episode 2389: Reward = 37.000000, Mean reward (over 100 episodes) = 53.400000
Episode 2390: Reward = 148.000000, Mean reward (over 100 episodes) = 54.550000
Episode 2391: Reward = 65.000000, Mean reward (over 100 episodes) = 54.830000
Episode 2392: Reward = 35.000000, Mean reward (over 100 episodes) = 54.600000
Episode 2393: Reward = 42.000000, Mean reward (over 100 episodes) = 54.700000
Episode 2394: Reward = 58.000000, Mean reward (over 100 episodes) = 54.850000
Episode 2395: Reward = 29.000000, Mean reward (over 100 episodes) = 54.670000
Episode 2396: Reward = 43.000000, Mean reward (over 100 episodes) = 54.750000
Episode 2397: Reward = 47.000000, Mean reward (over 100 episode

Episode 2490: Reward = 60.000000, Mean reward (over 100 episodes) = 62.470000
Episode 2491: Reward = 107.000000, Mean reward (over 100 episodes) = 62.890000
Episode 2492: Reward = 129.000000, Mean reward (over 100 episodes) = 63.830000
Episode 2493: Reward = 65.000000, Mean reward (over 100 episodes) = 64.060000
Episode 2494: Reward = 67.000000, Mean reward (over 100 episodes) = 64.150000
Episode 2495: Reward = 67.000000, Mean reward (over 100 episodes) = 64.530000
Episode 2496: Reward = 49.000000, Mean reward (over 100 episodes) = 64.590000
Episode 2497: Reward = 35.000000, Mean reward (over 100 episodes) = 64.470000
Episode 2498: Reward = 51.000000, Mean reward (over 100 episodes) = 64.310000
Episode 2499: Reward = 147.000000, Mean reward (over 100 episodes) = 65.210000
Episode 2500: Reward = 154.000000, Mean reward (over 100 episodes) = 65.520000
Episode 2501: Reward = 71.000000, Mean reward (over 100 episodes) = 65.770000
Episode 2502: Reward = 60.000000, Mean reward (over 100 epis

Episode 2600: Reward = 48.000000, Mean reward (over 100 episodes) = 73.400000
Episode 2601: Reward = 50.000000, Mean reward (over 100 episodes) = 73.190000
Episode 2602: Reward = 59.000000, Mean reward (over 100 episodes) = 73.180000
Episode 2603: Reward = 93.000000, Mean reward (over 100 episodes) = 73.600000
Episode 2604: Reward = 68.000000, Mean reward (over 100 episodes) = 73.890000
Episode 2605: Reward = 64.000000, Mean reward (over 100 episodes) = 73.980000
Episode 2606: Reward = 75.000000, Mean reward (over 100 episodes) = 74.160000
Episode 2607: Reward = 50.000000, Mean reward (over 100 episodes) = 73.970000
Episode 2608: Reward = 123.000000, Mean reward (over 100 episodes) = 74.550000
Episode 2609: Reward = 69.000000, Mean reward (over 100 episodes) = 73.790000
Episode 2610: Reward = 46.000000, Mean reward (over 100 episodes) = 73.760000
Episode 2611: Reward = 41.000000, Mean reward (over 100 episodes) = 73.760000
Episode 2612: Reward = 93.000000, Mean reward (over 100 episode

Episode 2710: Reward = 61.000000, Mean reward (over 100 episodes) = 74.490000
Episode 2711: Reward = 81.000000, Mean reward (over 100 episodes) = 74.890000
Episode 2712: Reward = 69.000000, Mean reward (over 100 episodes) = 74.650000
Episode 2713: Reward = 123.000000, Mean reward (over 100 episodes) = 75.270000
Episode 2714: Reward = 66.000000, Mean reward (over 100 episodes) = 75.300000
Episode 2715: Reward = 76.000000, Mean reward (over 100 episodes) = 75.390000
Episode 2716: Reward = 40.000000, Mean reward (over 100 episodes) = 74.940000
Episode 2717: Reward = 63.000000, Mean reward (over 100 episodes) = 74.950000
Episode 2718: Reward = 70.000000, Mean reward (over 100 episodes) = 74.680000
Episode 2719: Reward = 59.000000, Mean reward (over 100 episodes) = 74.300000
Episode 2720: Reward = 148.000000, Mean reward (over 100 episodes) = 75.000000
Episode 2721: Reward = 84.000000, Mean reward (over 100 episodes) = 75.330000
Episode 2722: Reward = 41.000000, Mean reward (over 100 episod

Episode 2821: Reward = 99.000000, Mean reward (over 100 episodes) = 71.970000
Episode 2822: Reward = 61.000000, Mean reward (over 100 episodes) = 72.170000
Episode 2823: Reward = 59.000000, Mean reward (over 100 episodes) = 71.900000
Episode 2824: Reward = 89.000000, Mean reward (over 100 episodes) = 72.350000
Episode 2825: Reward = 62.000000, Mean reward (over 100 episodes) = 72.330000
Episode 2826: Reward = 83.000000, Mean reward (over 100 episodes) = 72.530000
Episode 2827: Reward = 65.000000, Mean reward (over 100 episodes) = 72.390000
Episode 2828: Reward = 88.000000, Mean reward (over 100 episodes) = 72.530000
Episode 2829: Reward = 89.000000, Mean reward (over 100 episodes) = 72.400000
Episode 2830: Reward = 70.000000, Mean reward (over 100 episodes) = 72.420000
Episode 2831: Reward = 117.000000, Mean reward (over 100 episodes) = 72.990000
Episode 2832: Reward = 79.000000, Mean reward (over 100 episodes) = 72.710000
Episode 2833: Reward = 68.000000, Mean reward (over 100 episode

Episode 2928: Reward = 105.000000, Mean reward (over 100 episodes) = 84.480000
Episode 2929: Reward = 156.000000, Mean reward (over 100 episodes) = 85.150000
Episode 2930: Reward = 124.000000, Mean reward (over 100 episodes) = 85.690000
Episode 2931: Reward = 79.000000, Mean reward (over 100 episodes) = 85.310000
Episode 2932: Reward = 88.000000, Mean reward (over 100 episodes) = 85.400000
Episode 2933: Reward = 106.000000, Mean reward (over 100 episodes) = 85.780000
Episode 2934: Reward = 90.000000, Mean reward (over 100 episodes) = 85.840000
Episode 2935: Reward = 83.000000, Mean reward (over 100 episodes) = 86.070000
Episode 2936: Reward = 115.000000, Mean reward (over 100 episodes) = 85.900000
Episode 2937: Reward = 71.000000, Mean reward (over 100 episodes) = 85.710000
Episode 2938: Reward = 118.000000, Mean reward (over 100 episodes) = 86.220000
Episode 2939: Reward = 72.000000, Mean reward (over 100 episodes) = 86.280000
Episode 2940: Reward = 87.000000, Mean reward (over 100 ep

Episode 3033: Reward = 105.000000, Mean reward (over 100 episodes) = 110.350000
Episode 3034: Reward = 137.000000, Mean reward (over 100 episodes) = 110.820000
Episode 3035: Reward = 127.000000, Mean reward (over 100 episodes) = 111.260000
Episode 3036: Reward = 120.000000, Mean reward (over 100 episodes) = 111.310000
Episode 3037: Reward = 133.000000, Mean reward (over 100 episodes) = 111.930000
Episode 3038: Reward = 74.000000, Mean reward (over 100 episodes) = 111.490000
Episode 3039: Reward = 89.000000, Mean reward (over 100 episodes) = 111.660000
Episode 3040: Reward = 112.000000, Mean reward (over 100 episodes) = 111.910000
Episode 3041: Reward = 95.000000, Mean reward (over 100 episodes) = 111.910000
Episode 3042: Reward = 93.000000, Mean reward (over 100 episodes) = 111.730000
Episode 3043: Reward = 111.000000, Mean reward (over 100 episodes) = 111.770000
Episode 3044: Reward = 119.000000, Mean reward (over 100 episodes) = 111.850000
Episode 3045: Reward = 89.000000, Mean rewar

Episode 3138: Reward = 90.000000, Mean reward (over 100 episodes) = 106.300000
Episode 3139: Reward = 93.000000, Mean reward (over 100 episodes) = 106.340000
Episode 3140: Reward = 122.000000, Mean reward (over 100 episodes) = 106.440000
Episode 3141: Reward = 149.000000, Mean reward (over 100 episodes) = 106.980000
Episode 3142: Reward = 157.000000, Mean reward (over 100 episodes) = 107.620000
Episode 3143: Reward = 123.000000, Mean reward (over 100 episodes) = 107.740000
Episode 3144: Reward = 113.000000, Mean reward (over 100 episodes) = 107.680000
Episode 3145: Reward = 91.000000, Mean reward (over 100 episodes) = 107.700000
Episode 3146: Reward = 135.000000, Mean reward (over 100 episodes) = 107.890000
Episode 3147: Reward = 109.000000, Mean reward (over 100 episodes) = 108.110000
Episode 3148: Reward = 112.000000, Mean reward (over 100 episodes) = 108.250000
Episode 3149: Reward = 125.000000, Mean reward (over 100 episodes) = 108.520000
Episode 3150: Reward = 96.000000, Mean rewa

Episode 3243: Reward = 123.000000, Mean reward (over 100 episodes) = 127.080000
Episode 3244: Reward = 115.000000, Mean reward (over 100 episodes) = 127.100000
Episode 3245: Reward = 113.000000, Mean reward (over 100 episodes) = 127.320000
Episode 3246: Reward = 146.000000, Mean reward (over 100 episodes) = 127.430000
Episode 3247: Reward = 134.000000, Mean reward (over 100 episodes) = 127.680000
Episode 3248: Reward = 164.000000, Mean reward (over 100 episodes) = 128.200000
Episode 3249: Reward = 200.000000, Mean reward (over 100 episodes) = 128.950000
Episode 3250: Reward = 128.000000, Mean reward (over 100 episodes) = 129.270000
Episode 3251: Reward = 137.000000, Mean reward (over 100 episodes) = 128.930000
Episode 3252: Reward = 132.000000, Mean reward (over 100 episodes) = 129.170000
Episode 3253: Reward = 130.000000, Mean reward (over 100 episodes) = 129.390000
Episode 3254: Reward = 131.000000, Mean reward (over 100 episodes) = 129.570000
Episode 3255: Reward = 173.000000, Mean 

Episode 3346: Reward = 167.000000, Mean reward (over 100 episodes) = 137.250000
Episode 3347: Reward = 96.000000, Mean reward (over 100 episodes) = 136.870000
Episode 3348: Reward = 159.000000, Mean reward (over 100 episodes) = 136.820000
Episode 3349: Reward = 159.000000, Mean reward (over 100 episodes) = 136.410000
Episode 3350: Reward = 135.000000, Mean reward (over 100 episodes) = 136.480000
Episode 3351: Reward = 164.000000, Mean reward (over 100 episodes) = 136.750000
Episode 3352: Reward = 200.000000, Mean reward (over 100 episodes) = 137.430000
Episode 3353: Reward = 109.000000, Mean reward (over 100 episodes) = 137.220000
Episode 3354: Reward = 200.000000, Mean reward (over 100 episodes) = 137.910000
Episode 3355: Reward = 131.000000, Mean reward (over 100 episodes) = 137.490000
Episode 3356: Reward = 149.000000, Mean reward (over 100 episodes) = 137.380000
Episode 3357: Reward = 200.000000, Mean reward (over 100 episodes) = 137.850000
Episode 3358: Reward = 156.000000, Mean r

Episode 3452: Reward = 126.000000, Mean reward (over 100 episodes) = 154.350000
Episode 3453: Reward = 120.000000, Mean reward (over 100 episodes) = 154.460000
Episode 3454: Reward = 200.000000, Mean reward (over 100 episodes) = 154.460000
Episode 3455: Reward = 154.000000, Mean reward (over 100 episodes) = 154.690000
Episode 3456: Reward = 200.000000, Mean reward (over 100 episodes) = 155.200000
Episode 3457: Reward = 131.000000, Mean reward (over 100 episodes) = 154.510000
Episode 3458: Reward = 143.000000, Mean reward (over 100 episodes) = 154.380000
Episode 3459: Reward = 117.000000, Mean reward (over 100 episodes) = 153.550000
Episode 3460: Reward = 133.000000, Mean reward (over 100 episodes) = 153.800000
Episode 3461: Reward = 164.000000, Mean reward (over 100 episodes) = 154.090000
Episode 3462: Reward = 136.000000, Mean reward (over 100 episodes) = 154.040000
Episode 3463: Reward = 119.000000, Mean reward (over 100 episodes) = 154.160000
Episode 3464: Reward = 118.000000, Mean 

Episode 3557: Reward = 165.000000, Mean reward (over 100 episodes) = 148.320000
Episode 3558: Reward = 139.000000, Mean reward (over 100 episodes) = 148.280000
Episode 3559: Reward = 200.000000, Mean reward (over 100 episodes) = 149.110000
Episode 3560: Reward = 133.000000, Mean reward (over 100 episodes) = 149.110000
Episode 3561: Reward = 124.000000, Mean reward (over 100 episodes) = 148.710000
Episode 3562: Reward = 173.000000, Mean reward (over 100 episodes) = 149.080000
Episode 3563: Reward = 101.000000, Mean reward (over 100 episodes) = 148.900000
Episode 3564: Reward = 200.000000, Mean reward (over 100 episodes) = 149.720000
Episode 3565: Reward = 135.000000, Mean reward (over 100 episodes) = 149.570000
Episode 3566: Reward = 148.000000, Mean reward (over 100 episodes) = 149.320000
Episode 3567: Reward = 128.000000, Mean reward (over 100 episodes) = 149.190000
Episode 3568: Reward = 125.000000, Mean reward (over 100 episodes) = 148.860000
Episode 3569: Reward = 200.000000, Mean 

Episode 3660: Reward = 148.000000, Mean reward (over 100 episodes) = 150.080000
Episode 3661: Reward = 112.000000, Mean reward (over 100 episodes) = 149.960000
Episode 3662: Reward = 200.000000, Mean reward (over 100 episodes) = 150.230000
Episode 3663: Reward = 171.000000, Mean reward (over 100 episodes) = 150.930000
Episode 3664: Reward = 129.000000, Mean reward (over 100 episodes) = 150.220000
Episode 3665: Reward = 138.000000, Mean reward (over 100 episodes) = 150.250000
Episode 3666: Reward = 152.000000, Mean reward (over 100 episodes) = 150.290000
Episode 3667: Reward = 200.000000, Mean reward (over 100 episodes) = 151.010000
Episode 3668: Reward = 200.000000, Mean reward (over 100 episodes) = 151.760000
Episode 3669: Reward = 145.000000, Mean reward (over 100 episodes) = 151.210000
Episode 3670: Reward = 142.000000, Mean reward (over 100 episodes) = 151.370000
Episode 3671: Reward = 200.000000, Mean reward (over 100 episodes) = 152.160000
Episode 3672: Reward = 200.000000, Mean 

Episode 3764: Reward = 200.000000, Mean reward (over 100 episodes) = 157.680000
Episode 3765: Reward = 122.000000, Mean reward (over 100 episodes) = 157.520000
Episode 3766: Reward = 200.000000, Mean reward (over 100 episodes) = 158.000000
Episode 3767: Reward = 147.000000, Mean reward (over 100 episodes) = 157.470000
Episode 3768: Reward = 133.000000, Mean reward (over 100 episodes) = 156.800000
Episode 3769: Reward = 153.000000, Mean reward (over 100 episodes) = 156.880000
Episode 3770: Reward = 139.000000, Mean reward (over 100 episodes) = 156.850000
Episode 3771: Reward = 200.000000, Mean reward (over 100 episodes) = 156.850000
Episode 3772: Reward = 134.000000, Mean reward (over 100 episodes) = 156.190000
Episode 3773: Reward = 200.000000, Mean reward (over 100 episodes) = 156.860000
Episode 3774: Reward = 200.000000, Mean reward (over 100 episodes) = 157.290000
Episode 3775: Reward = 144.000000, Mean reward (over 100 episodes) = 157.190000
Episode 3776: Reward = 200.000000, Mean 

Episode 3868: Reward = 145.000000, Mean reward (over 100 episodes) = 173.140000
Episode 3869: Reward = 200.000000, Mean reward (over 100 episodes) = 173.610000
Episode 3870: Reward = 200.000000, Mean reward (over 100 episodes) = 174.220000
Episode 3871: Reward = 200.000000, Mean reward (over 100 episodes) = 174.220000
Episode 3872: Reward = 200.000000, Mean reward (over 100 episodes) = 174.880000
Episode 3873: Reward = 200.000000, Mean reward (over 100 episodes) = 174.880000
Episode 3874: Reward = 200.000000, Mean reward (over 100 episodes) = 174.880000
Episode 3875: Reward = 133.000000, Mean reward (over 100 episodes) = 174.770000
Episode 3876: Reward = 174.000000, Mean reward (over 100 episodes) = 174.510000
Episode 3877: Reward = 137.000000, Mean reward (over 100 episodes) = 174.440000
Episode 3878: Reward = 200.000000, Mean reward (over 100 episodes) = 175.190000
Episode 3879: Reward = 200.000000, Mean reward (over 100 episodes) = 175.190000
Episode 3880: Reward = 167.000000, Mean 

Episode 3973: Reward = 200.000000, Mean reward (over 100 episodes) = 184.010000
Episode 3974: Reward = 138.000000, Mean reward (over 100 episodes) = 183.390000
Episode 3975: Reward = 200.000000, Mean reward (over 100 episodes) = 184.060000
Episode 3976: Reward = 200.000000, Mean reward (over 100 episodes) = 184.320000
Episode 3977: Reward = 200.000000, Mean reward (over 100 episodes) = 184.950000
Episode 3978: Reward = 141.000000, Mean reward (over 100 episodes) = 184.360000
Episode 3979: Reward = 143.000000, Mean reward (over 100 episodes) = 183.790000
Episode 3980: Reward = 200.000000, Mean reward (over 100 episodes) = 184.120000
Episode 3981: Reward = 200.000000, Mean reward (over 100 episodes) = 184.480000
Episode 3982: Reward = 200.000000, Mean reward (over 100 episodes) = 184.480000
Episode 3983: Reward = 200.000000, Mean reward (over 100 episodes) = 184.480000
Episode 3984: Reward = 157.000000, Mean reward (over 100 episodes) = 184.050000
Episode 3985: Reward = 200.000000, Mean 

Episode 4076: Reward = 157.000000, Mean reward (over 100 episodes) = 185.700000
Episode 4077: Reward = 200.000000, Mean reward (over 100 episodes) = 185.700000
Episode 4078: Reward = 134.000000, Mean reward (over 100 episodes) = 185.630000
Episode 4079: Reward = 200.000000, Mean reward (over 100 episodes) = 186.200000
Episode 4080: Reward = 136.000000, Mean reward (over 100 episodes) = 185.560000
Episode 4081: Reward = 200.000000, Mean reward (over 100 episodes) = 185.560000
Episode 4082: Reward = 200.000000, Mean reward (over 100 episodes) = 185.560000
Episode 4083: Reward = 143.000000, Mean reward (over 100 episodes) = 184.990000
Episode 4084: Reward = 200.000000, Mean reward (over 100 episodes) = 185.420000
Episode 4085: Reward = 200.000000, Mean reward (over 100 episodes) = 185.420000
Episode 4086: Reward = 150.000000, Mean reward (over 100 episodes) = 184.920000
Episode 4087: Reward = 200.000000, Mean reward (over 100 episodes) = 184.920000
Episode 4088: Reward = 150.000000, Mean 

Episode 4181: Reward = 200.000000, Mean reward (over 100 episodes) = 185.900000
Episode 4182: Reward = 141.000000, Mean reward (over 100 episodes) = 185.310000
Episode 4183: Reward = 200.000000, Mean reward (over 100 episodes) = 185.880000
Episode 4184: Reward = 200.000000, Mean reward (over 100 episodes) = 185.880000
Episode 4185: Reward = 200.000000, Mean reward (over 100 episodes) = 185.880000
Episode 4186: Reward = 200.000000, Mean reward (over 100 episodes) = 186.380000
Episode 4187: Reward = 200.000000, Mean reward (over 100 episodes) = 186.380000
Episode 4188: Reward = 200.000000, Mean reward (over 100 episodes) = 186.880000
Episode 4189: Reward = 200.000000, Mean reward (over 100 episodes) = 187.560000
Episode 4190: Reward = 200.000000, Mean reward (over 100 episodes) = 187.970000
Episode 4191: Reward = 200.000000, Mean reward (over 100 episodes) = 188.190000
Episode 4192: Reward = 153.000000, Mean reward (over 100 episodes) = 187.720000
Episode 4193: Reward = 200.000000, Mean 

Episode 4286: Reward = 200.000000, Mean reward (over 100 episodes) = 187.700000
Episode 4287: Reward = 200.000000, Mean reward (over 100 episodes) = 187.700000
Episode 4288: Reward = 200.000000, Mean reward (over 100 episodes) = 187.700000
Episode 4289: Reward = 155.000000, Mean reward (over 100 episodes) = 187.250000
Episode 4290: Reward = 200.000000, Mean reward (over 100 episodes) = 187.250000
Episode 4291: Reward = 200.000000, Mean reward (over 100 episodes) = 187.250000
Episode 4292: Reward = 200.000000, Mean reward (over 100 episodes) = 187.720000
Episode 4293: Reward = 200.000000, Mean reward (over 100 episodes) = 187.720000
Episode 4294: Reward = 200.000000, Mean reward (over 100 episodes) = 188.180000
Episode 4295: Reward = 150.000000, Mean reward (over 100 episodes) = 187.680000
Episode 4296: Reward = 200.000000, Mean reward (over 100 episodes) = 188.220000
Episode 4297: Reward = 200.000000, Mean reward (over 100 episodes) = 188.890000
Episode 4298: Reward = 200.000000, Mean 

Episode 4391: Reward = 200.000000, Mean reward (over 100 episodes) = 189.060000
Episode 4392: Reward = 200.000000, Mean reward (over 100 episodes) = 189.060000
Episode 4393: Reward = 152.000000, Mean reward (over 100 episodes) = 188.580000
Episode 4394: Reward = 200.000000, Mean reward (over 100 episodes) = 188.580000
Episode 4395: Reward = 200.000000, Mean reward (over 100 episodes) = 189.080000
Episode 4396: Reward = 167.000000, Mean reward (over 100 episodes) = 188.750000
Episode 4397: Reward = 200.000000, Mean reward (over 100 episodes) = 188.750000
Episode 4398: Reward = 200.000000, Mean reward (over 100 episodes) = 188.750000
Episode 4399: Reward = 200.000000, Mean reward (over 100 episodes) = 188.750000
Episode 4400: Reward = 161.000000, Mean reward (over 100 episodes) = 188.360000
Episode 4401: Reward = 200.000000, Mean reward (over 100 episodes) = 188.360000
Episode 4402: Reward = 200.000000, Mean reward (over 100 episodes) = 188.610000
Episode 4403: Reward = 200.000000, Mean 

Episode 4496: Reward = 200.000000, Mean reward (over 100 episodes) = 190.160000
Episode 4497: Reward = 200.000000, Mean reward (over 100 episodes) = 190.160000
Episode 4498: Reward = 200.000000, Mean reward (over 100 episodes) = 190.160000
Episode 4499: Reward = 135.000000, Mean reward (over 100 episodes) = 189.510000
Episode 4500: Reward = 151.000000, Mean reward (over 100 episodes) = 189.410000
Episode 4501: Reward = 168.000000, Mean reward (over 100 episodes) = 189.090000
Episode 4502: Reward = 200.000000, Mean reward (over 100 episodes) = 189.090000
Episode 4503: Reward = 200.000000, Mean reward (over 100 episodes) = 189.090000
Episode 4504: Reward = 200.000000, Mean reward (over 100 episodes) = 189.090000
Episode 4505: Reward = 200.000000, Mean reward (over 100 episodes) = 189.660000
Episode 4506: Reward = 200.000000, Mean reward (over 100 episodes) = 189.660000
Episode 4507: Reward = 155.000000, Mean reward (over 100 episodes) = 189.590000
Episode 4508: Reward = 200.000000, Mean 

Episode 4601: Reward = 200.000000, Mean reward (over 100 episodes) = 190.000000
Episode 4602: Reward = 161.000000, Mean reward (over 100 episodes) = 189.610000
Episode 4603: Reward = 200.000000, Mean reward (over 100 episodes) = 189.610000
Episode 4604: Reward = 170.000000, Mean reward (over 100 episodes) = 189.310000
Episode 4605: Reward = 200.000000, Mean reward (over 100 episodes) = 189.310000
Episode 4606: Reward = 200.000000, Mean reward (over 100 episodes) = 189.310000
Episode 4607: Reward = 200.000000, Mean reward (over 100 episodes) = 189.760000
Episode 4608: Reward = 200.000000, Mean reward (over 100 episodes) = 189.760000
Episode 4609: Reward = 200.000000, Mean reward (over 100 episodes) = 190.270000
Episode 4610: Reward = 167.000000, Mean reward (over 100 episodes) = 189.940000
Episode 4611: Reward = 200.000000, Mean reward (over 100 episodes) = 189.940000
Episode 4612: Reward = 200.000000, Mean reward (over 100 episodes) = 189.940000
Episode 4613: Reward = 200.000000, Mean 

Episode 4706: Reward = 200.000000, Mean reward (over 100 episodes) = 188.300000
Episode 4707: Reward = 200.000000, Mean reward (over 100 episodes) = 188.300000
Episode 4708: Reward = 200.000000, Mean reward (over 100 episodes) = 188.300000
Episode 4709: Reward = 200.000000, Mean reward (over 100 episodes) = 188.300000
Episode 4710: Reward = 157.000000, Mean reward (over 100 episodes) = 188.200000
Episode 4711: Reward = 200.000000, Mean reward (over 100 episodes) = 188.200000
Episode 4712: Reward = 200.000000, Mean reward (over 100 episodes) = 188.200000
Episode 4713: Reward = 177.000000, Mean reward (over 100 episodes) = 187.970000
Episode 4714: Reward = 200.000000, Mean reward (over 100 episodes) = 187.970000
Episode 4715: Reward = 200.000000, Mean reward (over 100 episodes) = 188.550000
Episode 4716: Reward = 136.000000, Mean reward (over 100 episodes) = 188.470000
Episode 4717: Reward = 172.000000, Mean reward (over 100 episodes) = 188.560000
Episode 4718: Reward = 200.000000, Mean 

Episode 4810: Reward = 200.000000, Mean reward (over 100 episodes) = 184.690000
Episode 4811: Reward = 200.000000, Mean reward (over 100 episodes) = 184.690000
Episode 4812: Reward = 200.000000, Mean reward (over 100 episodes) = 184.690000
Episode 4813: Reward = 200.000000, Mean reward (over 100 episodes) = 184.920000
Episode 4814: Reward = 157.000000, Mean reward (over 100 episodes) = 184.490000
Episode 4815: Reward = 166.000000, Mean reward (over 100 episodes) = 184.150000
Episode 4816: Reward = 200.000000, Mean reward (over 100 episodes) = 184.790000
Episode 4817: Reward = 200.000000, Mean reward (over 100 episodes) = 185.070000
Episode 4818: Reward = 137.000000, Mean reward (over 100 episodes) = 184.440000
Episode 4819: Reward = 200.000000, Mean reward (over 100 episodes) = 184.440000
Episode 4820: Reward = 200.000000, Mean reward (over 100 episodes) = 184.750000
Episode 4821: Reward = 178.000000, Mean reward (over 100 episodes) = 185.040000
Episode 4822: Reward = 167.000000, Mean 

Episode 4915: Reward = 200.000000, Mean reward (over 100 episodes) = 178.900000
Episode 4916: Reward = 200.000000, Mean reward (over 100 episodes) = 178.900000
Episode 4917: Reward = 172.000000, Mean reward (over 100 episodes) = 178.620000
Episode 4918: Reward = 200.000000, Mean reward (over 100 episodes) = 179.250000
Episode 4919: Reward = 200.000000, Mean reward (over 100 episodes) = 179.250000
Episode 4920: Reward = 151.000000, Mean reward (over 100 episodes) = 178.760000
Episode 4921: Reward = 200.000000, Mean reward (over 100 episodes) = 178.980000
Episode 4922: Reward = 200.000000, Mean reward (over 100 episodes) = 179.310000
Episode 4923: Reward = 200.000000, Mean reward (over 100 episodes) = 179.570000
Episode 4924: Reward = 132.000000, Mean reward (over 100 episodes) = 178.890000
Episode 4925: Reward = 200.000000, Mean reward (over 100 episodes) = 179.570000
Episode 4926: Reward = 200.000000, Mean reward (over 100 episodes) = 179.570000
Episode 4927: Reward = 200.000000, Mean 

Episode 5019: Reward = 200.000000, Mean reward (over 100 episodes) = 172.700000
Episode 5020: Reward = 200.000000, Mean reward (over 100 episodes) = 173.190000
Episode 5021: Reward = 173.000000, Mean reward (over 100 episodes) = 172.920000
Episode 5022: Reward = 200.000000, Mean reward (over 100 episodes) = 172.920000
Episode 5023: Reward = 162.000000, Mean reward (over 100 episodes) = 172.540000
Episode 5024: Reward = 200.000000, Mean reward (over 100 episodes) = 173.220000
Episode 5025: Reward = 200.000000, Mean reward (over 100 episodes) = 173.220000
Episode 5026: Reward = 167.000000, Mean reward (over 100 episodes) = 172.890000
Episode 5027: Reward = 200.000000, Mean reward (over 100 episodes) = 172.890000
Episode 5028: Reward = 200.000000, Mean reward (over 100 episodes) = 173.380000
Episode 5029: Reward = 200.000000, Mean reward (over 100 episodes) = 173.620000
Episode 5030: Reward = 144.000000, Mean reward (over 100 episodes) = 173.470000
Episode 5031: Reward = 200.000000, Mean 

Episode 5124: Reward = 200.000000, Mean reward (over 100 episodes) = 174.040000
Episode 5125: Reward = 157.000000, Mean reward (over 100 episodes) = 173.610000
Episode 5126: Reward = 200.000000, Mean reward (over 100 episodes) = 173.940000
Episode 5127: Reward = 146.000000, Mean reward (over 100 episodes) = 173.400000
Episode 5128: Reward = 156.000000, Mean reward (over 100 episodes) = 172.960000
Episode 5129: Reward = 200.000000, Mean reward (over 100 episodes) = 172.960000
Episode 5130: Reward = 200.000000, Mean reward (over 100 episodes) = 173.520000
Episode 5131: Reward = 200.000000, Mean reward (over 100 episodes) = 173.520000
Episode 5132: Reward = 162.000000, Mean reward (over 100 episodes) = 173.140000
Episode 5133: Reward = 120.000000, Mean reward (over 100 episodes) = 172.930000
Episode 5134: Reward = 150.000000, Mean reward (over 100 episodes) = 172.430000
Episode 5135: Reward = 146.000000, Mean reward (over 100 episodes) = 172.140000
Episode 5136: Reward = 141.000000, Mean 

Episode 5227: Reward = 200.000000, Mean reward (over 100 episodes) = 174.750000
Episode 5228: Reward = 159.000000, Mean reward (over 100 episodes) = 174.780000
Episode 5229: Reward = 132.000000, Mean reward (over 100 episodes) = 174.100000
Episode 5230: Reward = 159.000000, Mean reward (over 100 episodes) = 173.690000
Episode 5231: Reward = 148.000000, Mean reward (over 100 episodes) = 173.170000
Episode 5232: Reward = 200.000000, Mean reward (over 100 episodes) = 173.550000
Episode 5233: Reward = 200.000000, Mean reward (over 100 episodes) = 174.350000
Episode 5234: Reward = 165.000000, Mean reward (over 100 episodes) = 174.500000
Episode 5235: Reward = 157.000000, Mean reward (over 100 episodes) = 174.610000
Episode 5236: Reward = 175.000000, Mean reward (over 100 episodes) = 174.950000
Episode 5237: Reward = 200.000000, Mean reward (over 100 episodes) = 175.410000
Episode 5238: Reward = 200.000000, Mean reward (over 100 episodes) = 175.410000
Episode 5239: Reward = 200.000000, Mean 

Episode 5330: Reward = 146.000000, Mean reward (over 100 episodes) = 175.800000
Episode 5331: Reward = 174.000000, Mean reward (over 100 episodes) = 176.060000
Episode 5332: Reward = 118.000000, Mean reward (over 100 episodes) = 175.240000
Episode 5333: Reward = 200.000000, Mean reward (over 100 episodes) = 175.240000
Episode 5334: Reward = 158.000000, Mean reward (over 100 episodes) = 175.170000
Episode 5335: Reward = 139.000000, Mean reward (over 100 episodes) = 174.990000
Episode 5336: Reward = 200.000000, Mean reward (over 100 episodes) = 175.240000
Episode 5337: Reward = 142.000000, Mean reward (over 100 episodes) = 174.660000
Episode 5338: Reward = 200.000000, Mean reward (over 100 episodes) = 174.660000
Episode 5339: Reward = 156.000000, Mean reward (over 100 episodes) = 174.220000
Episode 5340: Reward = 200.000000, Mean reward (over 100 episodes) = 174.490000
Episode 5341: Reward = 200.000000, Mean reward (over 100 episodes) = 174.490000
Episode 5342: Reward = 200.000000, Mean 

Episode 5434: Reward = 169.000000, Mean reward (over 100 episodes) = 164.900000
Episode 5435: Reward = 200.000000, Mean reward (over 100 episodes) = 165.510000
Episode 5436: Reward = 135.000000, Mean reward (over 100 episodes) = 164.860000
Episode 5437: Reward = 200.000000, Mean reward (over 100 episodes) = 165.440000
Episode 5438: Reward = 150.000000, Mean reward (over 100 episodes) = 164.940000
Episode 5439: Reward = 155.000000, Mean reward (over 100 episodes) = 164.930000
Episode 5440: Reward = 200.000000, Mean reward (over 100 episodes) = 164.930000
Episode 5441: Reward = 109.000000, Mean reward (over 100 episodes) = 164.020000
Episode 5442: Reward = 125.000000, Mean reward (over 100 episodes) = 163.270000
Episode 5443: Reward = 200.000000, Mean reward (over 100 episodes) = 163.270000
Episode 5444: Reward = 115.000000, Mean reward (over 100 episodes) = 163.190000
Episode 5445: Reward = 200.000000, Mean reward (over 100 episodes) = 163.630000
Episode 5446: Reward = 178.000000, Mean 

Episode 5537: Reward = 200.000000, Mean reward (over 100 episodes) = 149.430000
Episode 5538: Reward = 130.000000, Mean reward (over 100 episodes) = 149.230000
Episode 5539: Reward = 175.000000, Mean reward (over 100 episodes) = 149.430000
Episode 5540: Reward = 200.000000, Mean reward (over 100 episodes) = 149.430000
Episode 5541: Reward = 126.000000, Mean reward (over 100 episodes) = 149.600000
Episode 5542: Reward = 149.000000, Mean reward (over 100 episodes) = 149.840000
Episode 5543: Reward = 149.000000, Mean reward (over 100 episodes) = 149.330000
Episode 5544: Reward = 161.000000, Mean reward (over 100 episodes) = 149.790000
Episode 5545: Reward = 126.000000, Mean reward (over 100 episodes) = 149.050000
Episode 5546: Reward = 200.000000, Mean reward (over 100 episodes) = 149.270000
Episode 5547: Reward = 123.000000, Mean reward (over 100 episodes) = 148.500000
Episode 5548: Reward = 131.000000, Mean reward (over 100 episodes) = 147.810000
Episode 5549: Reward = 120.000000, Mean 

Episode 5642: Reward = 131.000000, Mean reward (over 100 episodes) = 154.050000
Episode 5643: Reward = 114.000000, Mean reward (over 100 episodes) = 153.700000
Episode 5644: Reward = 152.000000, Mean reward (over 100 episodes) = 153.610000
Episode 5645: Reward = 153.000000, Mean reward (over 100 episodes) = 153.880000
Episode 5646: Reward = 115.000000, Mean reward (over 100 episodes) = 153.030000
Episode 5647: Reward = 155.000000, Mean reward (over 100 episodes) = 153.350000
Episode 5648: Reward = 174.000000, Mean reward (over 100 episodes) = 153.780000
Episode 5649: Reward = 106.000000, Mean reward (over 100 episodes) = 153.640000
Episode 5650: Reward = 150.000000, Mean reward (over 100 episodes) = 153.860000
Episode 5651: Reward = 146.000000, Mean reward (over 100 episodes) = 154.020000
Episode 5652: Reward = 126.000000, Mean reward (over 100 episodes) = 153.500000
Episode 5653: Reward = 160.000000, Mean reward (over 100 episodes) = 153.680000
Episode 5654: Reward = 172.000000, Mean 

Episode 5746: Reward = 175.000000, Mean reward (over 100 episodes) = 147.880000
Episode 5747: Reward = 105.000000, Mean reward (over 100 episodes) = 147.380000
Episode 5748: Reward = 127.000000, Mean reward (over 100 episodes) = 146.910000
Episode 5749: Reward = 130.000000, Mean reward (over 100 episodes) = 147.150000
Episode 5750: Reward = 124.000000, Mean reward (over 100 episodes) = 146.890000
Episode 5751: Reward = 200.000000, Mean reward (over 100 episodes) = 147.430000
Episode 5752: Reward = 115.000000, Mean reward (over 100 episodes) = 147.320000
Episode 5753: Reward = 105.000000, Mean reward (over 100 episodes) = 146.770000
Episode 5754: Reward = 140.000000, Mean reward (over 100 episodes) = 146.450000
Episode 5755: Reward = 134.000000, Mean reward (over 100 episodes) = 146.250000
Episode 5756: Reward = 143.000000, Mean reward (over 100 episodes) = 146.380000
Episode 5757: Reward = 116.000000, Mean reward (over 100 episodes) = 146.280000
Episode 5758: Reward = 137.000000, Mean 

Episode 5849: Reward = 200.000000, Mean reward (over 100 episodes) = 143.700000
Episode 5850: Reward = 125.000000, Mean reward (over 100 episodes) = 143.710000
Episode 5851: Reward = 151.000000, Mean reward (over 100 episodes) = 143.220000
Episode 5852: Reward = 200.000000, Mean reward (over 100 episodes) = 144.070000
Episode 5853: Reward = 143.000000, Mean reward (over 100 episodes) = 144.450000
Episode 5854: Reward = 107.000000, Mean reward (over 100 episodes) = 144.120000
Episode 5855: Reward = 200.000000, Mean reward (over 100 episodes) = 144.780000
Episode 5856: Reward = 116.000000, Mean reward (over 100 episodes) = 144.510000
Episode 5857: Reward = 121.000000, Mean reward (over 100 episodes) = 144.560000
Episode 5858: Reward = 119.000000, Mean reward (over 100 episodes) = 144.380000
Episode 5859: Reward = 141.000000, Mean reward (over 100 episodes) = 144.350000
Episode 5860: Reward = 136.000000, Mean reward (over 100 episodes) = 144.270000
Episode 5861: Reward = 125.000000, Mean 

Episode 5953: Reward = 164.000000, Mean reward (over 100 episodes) = 145.410000
Episode 5954: Reward = 167.000000, Mean reward (over 100 episodes) = 146.010000
Episode 5955: Reward = 100.000000, Mean reward (over 100 episodes) = 145.010000
Episode 5956: Reward = 138.000000, Mean reward (over 100 episodes) = 145.230000
Episode 5957: Reward = 156.000000, Mean reward (over 100 episodes) = 145.580000
Episode 5958: Reward = 141.000000, Mean reward (over 100 episodes) = 145.800000
Episode 5959: Reward = 142.000000, Mean reward (over 100 episodes) = 145.810000
Episode 5960: Reward = 131.000000, Mean reward (over 100 episodes) = 145.760000
Episode 5961: Reward = 200.000000, Mean reward (over 100 episodes) = 146.510000
Episode 5962: Reward = 122.000000, Mean reward (over 100 episodes) = 146.380000
Episode 5963: Reward = 126.000000, Mean reward (over 100 episodes) = 146.340000
Episode 5964: Reward = 121.000000, Mean reward (over 100 episodes) = 146.390000
Episode 5965: Reward = 124.000000, Mean 

Episode 6057: Reward = 118.000000, Mean reward (over 100 episodes) = 135.420000
Episode 6058: Reward = 106.000000, Mean reward (over 100 episodes) = 135.070000
Episode 6059: Reward = 124.000000, Mean reward (over 100 episodes) = 134.890000
Episode 6060: Reward = 131.000000, Mean reward (over 100 episodes) = 134.890000
Episode 6061: Reward = 200.000000, Mean reward (over 100 episodes) = 134.890000
Episode 6062: Reward = 121.000000, Mean reward (over 100 episodes) = 134.880000
Episode 6063: Reward = 153.000000, Mean reward (over 100 episodes) = 135.150000
Episode 6064: Reward = 129.000000, Mean reward (over 100 episodes) = 135.230000
Episode 6065: Reward = 150.000000, Mean reward (over 100 episodes) = 135.490000
Episode 6066: Reward = 142.000000, Mean reward (over 100 episodes) = 135.730000
Episode 6067: Reward = 133.000000, Mean reward (over 100 episodes) = 135.060000
Episode 6068: Reward = 116.000000, Mean reward (over 100 episodes) = 134.220000
Episode 6069: Reward = 154.000000, Mean 

Episode 6161: Reward = 122.000000, Mean reward (over 100 episodes) = 133.660000
Episode 6162: Reward = 108.000000, Mean reward (over 100 episodes) = 133.530000
Episode 6163: Reward = 155.000000, Mean reward (over 100 episodes) = 133.550000
Episode 6164: Reward = 138.000000, Mean reward (over 100 episodes) = 133.640000
Episode 6165: Reward = 151.000000, Mean reward (over 100 episodes) = 133.650000
Episode 6166: Reward = 128.000000, Mean reward (over 100 episodes) = 133.510000
Episode 6167: Reward = 131.000000, Mean reward (over 100 episodes) = 133.490000
Episode 6168: Reward = 112.000000, Mean reward (over 100 episodes) = 133.450000
Episode 6169: Reward = 130.000000, Mean reward (over 100 episodes) = 133.210000
Episode 6170: Reward = 102.000000, Mean reward (over 100 episodes) = 132.920000
Episode 6171: Reward = 200.000000, Mean reward (over 100 episodes) = 133.920000
Episode 6172: Reward = 108.000000, Mean reward (over 100 episodes) = 133.910000
Episode 6173: Reward = 144.000000, Mean 

Episode 6267: Reward = 157.000000, Mean reward (over 100 episodes) = 137.340000
Episode 6268: Reward = 119.000000, Mean reward (over 100 episodes) = 137.410000
Episode 6269: Reward = 137.000000, Mean reward (over 100 episodes) = 137.480000
Episode 6270: Reward = 157.000000, Mean reward (over 100 episodes) = 138.030000
Episode 6271: Reward = 164.000000, Mean reward (over 100 episodes) = 137.670000
Episode 6272: Reward = 139.000000, Mean reward (over 100 episodes) = 137.980000
Episode 6273: Reward = 137.000000, Mean reward (over 100 episodes) = 137.910000
Episode 6274: Reward = 200.000000, Mean reward (over 100 episodes) = 138.400000
Episode 6275: Reward = 144.000000, Mean reward (over 100 episodes) = 138.770000
Episode 6276: Reward = 131.000000, Mean reward (over 100 episodes) = 138.760000
Episode 6277: Reward = 148.000000, Mean reward (over 100 episodes) = 138.750000
Episode 6278: Reward = 200.000000, Mean reward (over 100 episodes) = 139.540000
Episode 6279: Reward = 116.000000, Mean 

Episode 6372: Reward = 109.000000, Mean reward (over 100 episodes) = 140.580000
Episode 6373: Reward = 133.000000, Mean reward (over 100 episodes) = 140.540000
Episode 6374: Reward = 149.000000, Mean reward (over 100 episodes) = 140.030000
Episode 6375: Reward = 110.000000, Mean reward (over 100 episodes) = 139.690000
Episode 6376: Reward = 143.000000, Mean reward (over 100 episodes) = 139.810000
Episode 6377: Reward = 125.000000, Mean reward (over 100 episodes) = 139.580000
Episode 6378: Reward = 127.000000, Mean reward (over 100 episodes) = 138.850000
Episode 6379: Reward = 142.000000, Mean reward (over 100 episodes) = 139.110000
Episode 6380: Reward = 119.000000, Mean reward (over 100 episodes) = 138.990000
Episode 6381: Reward = 200.000000, Mean reward (over 100 episodes) = 139.650000
Episode 6382: Reward = 200.000000, Mean reward (over 100 episodes) = 140.290000
Episode 6383: Reward = 130.000000, Mean reward (over 100 episodes) = 140.530000
Episode 6384: Reward = 122.000000, Mean 

Episode 6476: Reward = 123.000000, Mean reward (over 100 episodes) = 132.920000
Episode 6477: Reward = 102.000000, Mean reward (over 100 episodes) = 132.690000
Episode 6478: Reward = 161.000000, Mean reward (over 100 episodes) = 133.030000
Episode 6479: Reward = 115.000000, Mean reward (over 100 episodes) = 132.760000
Episode 6480: Reward = 200.000000, Mean reward (over 100 episodes) = 133.570000
Episode 6481: Reward = 118.000000, Mean reward (over 100 episodes) = 132.750000
Episode 6482: Reward = 139.000000, Mean reward (over 100 episodes) = 132.140000
Episode 6483: Reward = 200.000000, Mean reward (over 100 episodes) = 132.840000
Episode 6484: Reward = 171.000000, Mean reward (over 100 episodes) = 133.330000
Episode 6485: Reward = 144.000000, Mean reward (over 100 episodes) = 133.510000
Episode 6486: Reward = 109.000000, Mean reward (over 100 episodes) = 132.950000
Episode 6487: Reward = 135.000000, Mean reward (over 100 episodes) = 133.220000
Episode 6488: Reward = 153.000000, Mean 

Episode 6580: Reward = 94.000000, Mean reward (over 100 episodes) = 129.810000
Episode 6581: Reward = 127.000000, Mean reward (over 100 episodes) = 129.900000
Episode 6582: Reward = 153.000000, Mean reward (over 100 episodes) = 130.040000
Episode 6583: Reward = 135.000000, Mean reward (over 100 episodes) = 129.390000
Episode 6584: Reward = 155.000000, Mean reward (over 100 episodes) = 129.230000
Episode 6585: Reward = 143.000000, Mean reward (over 100 episodes) = 129.220000
Episode 6586: Reward = 113.000000, Mean reward (over 100 episodes) = 129.260000
Episode 6587: Reward = 121.000000, Mean reward (over 100 episodes) = 129.120000
Episode 6588: Reward = 100.000000, Mean reward (over 100 episodes) = 128.590000
Episode 6589: Reward = 110.000000, Mean reward (over 100 episodes) = 128.320000
Episode 6590: Reward = 129.000000, Mean reward (over 100 episodes) = 128.320000
Episode 6591: Reward = 110.000000, Mean reward (over 100 episodes) = 128.160000
Episode 6592: Reward = 109.000000, Mean r

Episode 6684: Reward = 115.000000, Mean reward (over 100 episodes) = 121.440000
Episode 6685: Reward = 127.000000, Mean reward (over 100 episodes) = 121.280000
Episode 6686: Reward = 102.000000, Mean reward (over 100 episodes) = 121.170000
Episode 6687: Reward = 124.000000, Mean reward (over 100 episodes) = 121.200000
Episode 6688: Reward = 115.000000, Mean reward (over 100 episodes) = 121.350000
Episode 6689: Reward = 146.000000, Mean reward (over 100 episodes) = 121.710000
Episode 6690: Reward = 200.000000, Mean reward (over 100 episodes) = 122.420000
Episode 6691: Reward = 132.000000, Mean reward (over 100 episodes) = 122.640000
Episode 6692: Reward = 106.000000, Mean reward (over 100 episodes) = 122.610000
Episode 6693: Reward = 113.000000, Mean reward (over 100 episodes) = 122.510000
Episode 6694: Reward = 121.000000, Mean reward (over 100 episodes) = 122.230000
Episode 6695: Reward = 118.000000, Mean reward (over 100 episodes) = 122.220000
Episode 6696: Reward = 117.000000, Mean 

Episode 6787: Reward = 121.000000, Mean reward (over 100 episodes) = 121.380000
Episode 6788: Reward = 114.000000, Mean reward (over 100 episodes) = 121.370000
Episode 6789: Reward = 107.000000, Mean reward (over 100 episodes) = 120.980000
Episode 6790: Reward = 95.000000, Mean reward (over 100 episodes) = 119.930000
Episode 6791: Reward = 118.000000, Mean reward (over 100 episodes) = 119.790000
Episode 6792: Reward = 146.000000, Mean reward (over 100 episodes) = 120.190000
Episode 6793: Reward = 128.000000, Mean reward (over 100 episodes) = 120.340000
Episode 6794: Reward = 132.000000, Mean reward (over 100 episodes) = 120.450000
Episode 6795: Reward = 116.000000, Mean reward (over 100 episodes) = 120.430000
Episode 6796: Reward = 126.000000, Mean reward (over 100 episodes) = 120.520000
Episode 6797: Reward = 123.000000, Mean reward (over 100 episodes) = 120.780000
Episode 6798: Reward = 102.000000, Mean reward (over 100 episodes) = 120.340000
Episode 6799: Reward = 126.000000, Mean r

Episode 6893: Reward = 91.000000, Mean reward (over 100 episodes) = 115.370000
Episode 6894: Reward = 97.000000, Mean reward (over 100 episodes) = 115.020000
Episode 6895: Reward = 118.000000, Mean reward (over 100 episodes) = 115.040000
Episode 6896: Reward = 103.000000, Mean reward (over 100 episodes) = 114.810000
Episode 6897: Reward = 112.000000, Mean reward (over 100 episodes) = 114.700000
Episode 6898: Reward = 137.000000, Mean reward (over 100 episodes) = 115.050000
Episode 6899: Reward = 109.000000, Mean reward (over 100 episodes) = 114.880000
Episode 6900: Reward = 118.000000, Mean reward (over 100 episodes) = 114.790000
Episode 6901: Reward = 94.000000, Mean reward (over 100 episodes) = 114.340000
Episode 6902: Reward = 122.000000, Mean reward (over 100 episodes) = 114.630000
Episode 6903: Reward = 108.000000, Mean reward (over 100 episodes) = 114.420000
Episode 6904: Reward = 112.000000, Mean reward (over 100 episodes) = 114.480000
Episode 6905: Reward = 107.000000, Mean rew

Episode 6996: Reward = 108.000000, Mean reward (over 100 episodes) = 117.080000
Episode 6997: Reward = 96.000000, Mean reward (over 100 episodes) = 116.920000
Episode 6998: Reward = 110.000000, Mean reward (over 100 episodes) = 116.650000
Episode 6999: Reward = 103.000000, Mean reward (over 100 episodes) = 116.590000
Episode 7000: Reward = 90.000000, Mean reward (over 100 episodes) = 116.310000
Episode 7001: Reward = 108.000000, Mean reward (over 100 episodes) = 116.450000
Episode 7002: Reward = 102.000000, Mean reward (over 100 episodes) = 116.250000
Episode 7003: Reward = 129.000000, Mean reward (over 100 episodes) = 116.460000
Episode 7004: Reward = 104.000000, Mean reward (over 100 episodes) = 116.380000
Episode 7005: Reward = 105.000000, Mean reward (over 100 episodes) = 116.360000
Episode 7006: Reward = 99.000000, Mean reward (over 100 episodes) = 116.290000
Episode 7007: Reward = 100.000000, Mean reward (over 100 episodes) = 116.040000
Episode 7008: Reward = 112.000000, Mean rew

Episode 7102: Reward = 121.000000, Mean reward (over 100 episodes) = 117.260000
Episode 7103: Reward = 117.000000, Mean reward (over 100 episodes) = 117.140000
Episode 7104: Reward = 97.000000, Mean reward (over 100 episodes) = 117.070000
Episode 7105: Reward = 110.000000, Mean reward (over 100 episodes) = 117.120000
Episode 7106: Reward = 112.000000, Mean reward (over 100 episodes) = 117.250000
Episode 7107: Reward = 102.000000, Mean reward (over 100 episodes) = 117.270000
Episode 7108: Reward = 113.000000, Mean reward (over 100 episodes) = 117.280000
Episode 7109: Reward = 97.000000, Mean reward (over 100 episodes) = 117.040000
Episode 7110: Reward = 103.000000, Mean reward (over 100 episodes) = 116.720000
Episode 7111: Reward = 112.000000, Mean reward (over 100 episodes) = 116.860000
Episode 7112: Reward = 95.000000, Mean reward (over 100 episodes) = 116.570000
Episode 7113: Reward = 139.000000, Mean reward (over 100 episodes) = 116.350000
Episode 7114: Reward = 113.000000, Mean rew

Episode 7206: Reward = 137.000000, Mean reward (over 100 episodes) = 117.810000
Episode 7207: Reward = 92.000000, Mean reward (over 100 episodes) = 117.710000
Episode 7208: Reward = 112.000000, Mean reward (over 100 episodes) = 117.700000
Episode 7209: Reward = 96.000000, Mean reward (over 100 episodes) = 117.690000
Episode 7210: Reward = 100.000000, Mean reward (over 100 episodes) = 117.660000
Episode 7211: Reward = 101.000000, Mean reward (over 100 episodes) = 117.550000
Episode 7212: Reward = 108.000000, Mean reward (over 100 episodes) = 117.680000
Episode 7213: Reward = 127.000000, Mean reward (over 100 episodes) = 117.560000
Episode 7214: Reward = 129.000000, Mean reward (over 100 episodes) = 117.720000
Episode 7215: Reward = 114.000000, Mean reward (over 100 episodes) = 117.630000
Episode 7216: Reward = 104.000000, Mean reward (over 100 episodes) = 117.480000
Episode 7217: Reward = 118.000000, Mean reward (over 100 episodes) = 117.170000
Episode 7218: Reward = 143.000000, Mean re

Episode 7309: Reward = 113.000000, Mean reward (over 100 episodes) = 110.680000
Episode 7310: Reward = 101.000000, Mean reward (over 100 episodes) = 110.690000
Episode 7311: Reward = 132.000000, Mean reward (over 100 episodes) = 111.000000
Episode 7312: Reward = 100.000000, Mean reward (over 100 episodes) = 110.920000
Episode 7313: Reward = 99.000000, Mean reward (over 100 episodes) = 110.640000
Episode 7314: Reward = 121.000000, Mean reward (over 100 episodes) = 110.560000
Episode 7315: Reward = 103.000000, Mean reward (over 100 episodes) = 110.450000
Episode 7316: Reward = 94.000000, Mean reward (over 100 episodes) = 110.350000
Episode 7317: Reward = 96.000000, Mean reward (over 100 episodes) = 110.130000
Episode 7318: Reward = 134.000000, Mean reward (over 100 episodes) = 110.040000
Episode 7319: Reward = 90.000000, Mean reward (over 100 episodes) = 109.230000
Episode 7320: Reward = 86.000000, Mean reward (over 100 episodes) = 108.630000
Episode 7321: Reward = 103.000000, Mean rewar

Episode 7415: Reward = 93.000000, Mean reward (over 100 episodes) = 107.550000
Episode 7416: Reward = 128.000000, Mean reward (over 100 episodes) = 107.890000
Episode 7417: Reward = 113.000000, Mean reward (over 100 episodes) = 108.060000
Episode 7418: Reward = 96.000000, Mean reward (over 100 episodes) = 107.680000
Episode 7419: Reward = 91.000000, Mean reward (over 100 episodes) = 107.690000
Episode 7420: Reward = 121.000000, Mean reward (over 100 episodes) = 108.040000
Episode 7421: Reward = 112.000000, Mean reward (over 100 episodes) = 108.130000
Episode 7422: Reward = 88.000000, Mean reward (over 100 episodes) = 107.380000
Episode 7423: Reward = 124.000000, Mean reward (over 100 episodes) = 107.660000
Episode 7424: Reward = 85.000000, Mean reward (over 100 episodes) = 107.240000
Episode 7425: Reward = 107.000000, Mean reward (over 100 episodes) = 107.300000
Episode 7426: Reward = 105.000000, Mean reward (over 100 episodes) = 107.410000
Episode 7427: Reward = 98.000000, Mean reward

Episode 7519: Reward = 107.000000, Mean reward (over 100 episodes) = 102.630000
Episode 7520: Reward = 93.000000, Mean reward (over 100 episodes) = 102.350000
Episode 7521: Reward = 105.000000, Mean reward (over 100 episodes) = 102.280000
Episode 7522: Reward = 112.000000, Mean reward (over 100 episodes) = 102.520000
Episode 7523: Reward = 97.000000, Mean reward (over 100 episodes) = 102.250000
Episode 7524: Reward = 87.000000, Mean reward (over 100 episodes) = 102.270000
Episode 7525: Reward = 109.000000, Mean reward (over 100 episodes) = 102.290000
Episode 7526: Reward = 117.000000, Mean reward (over 100 episodes) = 102.410000
Episode 7527: Reward = 93.000000, Mean reward (over 100 episodes) = 102.360000
Episode 7528: Reward = 112.000000, Mean reward (over 100 episodes) = 102.530000
Episode 7529: Reward = 106.000000, Mean reward (over 100 episodes) = 102.680000
Episode 7530: Reward = 96.000000, Mean reward (over 100 episodes) = 102.710000
Episode 7531: Reward = 102.000000, Mean rewar

Episode 7622: Reward = 94.000000, Mean reward (over 100 episodes) = 105.430000
Episode 7623: Reward = 87.000000, Mean reward (over 100 episodes) = 105.330000
Episode 7624: Reward = 122.000000, Mean reward (over 100 episodes) = 105.680000
Episode 7625: Reward = 87.000000, Mean reward (over 100 episodes) = 105.460000
Episode 7626: Reward = 155.000000, Mean reward (over 100 episodes) = 105.840000
Episode 7627: Reward = 146.000000, Mean reward (over 100 episodes) = 106.370000
Episode 7628: Reward = 99.000000, Mean reward (over 100 episodes) = 106.240000
Episode 7629: Reward = 134.000000, Mean reward (over 100 episodes) = 106.520000
Episode 7630: Reward = 120.000000, Mean reward (over 100 episodes) = 106.760000
Episode 7631: Reward = 150.000000, Mean reward (over 100 episodes) = 107.240000
Episode 7632: Reward = 103.000000, Mean reward (over 100 episodes) = 107.270000
Episode 7633: Reward = 121.000000, Mean reward (over 100 episodes) = 107.220000
Episode 7634: Reward = 116.000000, Mean rewa

Episode 7725: Reward = 112.000000, Mean reward (over 100 episodes) = 107.780000
Episode 7726: Reward = 102.000000, Mean reward (over 100 episodes) = 107.250000
Episode 7727: Reward = 133.000000, Mean reward (over 100 episodes) = 107.120000
Episode 7728: Reward = 140.000000, Mean reward (over 100 episodes) = 107.530000
Episode 7729: Reward = 112.000000, Mean reward (over 100 episodes) = 107.310000
Episode 7730: Reward = 109.000000, Mean reward (over 100 episodes) = 107.200000
Episode 7731: Reward = 89.000000, Mean reward (over 100 episodes) = 106.590000
Episode 7732: Reward = 108.000000, Mean reward (over 100 episodes) = 106.640000
Episode 7733: Reward = 84.000000, Mean reward (over 100 episodes) = 106.270000
Episode 7734: Reward = 97.000000, Mean reward (over 100 episodes) = 106.080000
Episode 7735: Reward = 95.000000, Mean reward (over 100 episodes) = 105.900000
Episode 7736: Reward = 106.000000, Mean reward (over 100 episodes) = 105.930000
Episode 7737: Reward = 99.000000, Mean rewar

Episode 7831: Reward = 113.000000, Mean reward (over 100 episodes) = 108.070000
Episode 7832: Reward = 97.000000, Mean reward (over 100 episodes) = 107.960000
Episode 7833: Reward = 96.000000, Mean reward (over 100 episodes) = 108.080000
Episode 7834: Reward = 85.000000, Mean reward (over 100 episodes) = 107.960000
Episode 7835: Reward = 129.000000, Mean reward (over 100 episodes) = 108.300000
Episode 7836: Reward = 102.000000, Mean reward (over 100 episodes) = 108.260000
Episode 7837: Reward = 103.000000, Mean reward (over 100 episodes) = 108.300000
Episode 7838: Reward = 91.000000, Mean reward (over 100 episodes) = 108.060000
Episode 7839: Reward = 100.000000, Mean reward (over 100 episodes) = 107.840000
Episode 7840: Reward = 109.000000, Mean reward (over 100 episodes) = 107.630000
Episode 7841: Reward = 111.000000, Mean reward (over 100 episodes) = 107.840000
Episode 7842: Reward = 106.000000, Mean reward (over 100 episodes) = 107.790000
Episode 7843: Reward = 108.000000, Mean rewa

Episode 7934: Reward = 99.000000, Mean reward (over 100 episodes) = 113.390000
Episode 7935: Reward = 116.000000, Mean reward (over 100 episodes) = 113.260000
Episode 7936: Reward = 89.000000, Mean reward (over 100 episodes) = 113.130000
Episode 7937: Reward = 96.000000, Mean reward (over 100 episodes) = 113.060000
Episode 7938: Reward = 105.000000, Mean reward (over 100 episodes) = 113.200000
Episode 7939: Reward = 118.000000, Mean reward (over 100 episodes) = 113.380000
Episode 7940: Reward = 99.000000, Mean reward (over 100 episodes) = 113.280000
Episode 7941: Reward = 106.000000, Mean reward (over 100 episodes) = 113.230000
Episode 7942: Reward = 88.000000, Mean reward (over 100 episodes) = 113.050000
Episode 7943: Reward = 136.000000, Mean reward (over 100 episodes) = 113.330000
Episode 7944: Reward = 114.000000, Mean reward (over 100 episodes) = 113.420000
Episode 7945: Reward = 91.000000, Mean reward (over 100 episodes) = 113.000000
Episode 7946: Reward = 112.000000, Mean reward

Episode 8038: Reward = 117.000000, Mean reward (over 100 episodes) = 104.680000
Episode 8039: Reward = 92.000000, Mean reward (over 100 episodes) = 104.420000
Episode 8040: Reward = 94.000000, Mean reward (over 100 episodes) = 104.370000
Episode 8041: Reward = 148.000000, Mean reward (over 100 episodes) = 104.790000
Episode 8042: Reward = 99.000000, Mean reward (over 100 episodes) = 104.900000
Episode 8043: Reward = 96.000000, Mean reward (over 100 episodes) = 104.500000
Episode 8044: Reward = 121.000000, Mean reward (over 100 episodes) = 104.570000
Episode 8045: Reward = 97.000000, Mean reward (over 100 episodes) = 104.630000
Episode 8046: Reward = 107.000000, Mean reward (over 100 episodes) = 104.580000
Episode 8047: Reward = 91.000000, Mean reward (over 100 episodes) = 104.540000
Episode 8048: Reward = 105.000000, Mean reward (over 100 episodes) = 103.590000
Episode 8049: Reward = 104.000000, Mean reward (over 100 episodes) = 103.680000
Episode 8050: Reward = 112.000000, Mean reward

Episode 8142: Reward = 127.000000, Mean reward (over 100 episodes) = 102.870000
Episode 8143: Reward = 102.000000, Mean reward (over 100 episodes) = 102.930000
Episode 8144: Reward = 71.000000, Mean reward (over 100 episodes) = 102.430000
Episode 8145: Reward = 160.000000, Mean reward (over 100 episodes) = 103.060000
Episode 8146: Reward = 108.000000, Mean reward (over 100 episodes) = 103.070000
Episode 8147: Reward = 100.000000, Mean reward (over 100 episodes) = 103.160000
Episode 8148: Reward = 96.000000, Mean reward (over 100 episodes) = 103.070000
Episode 8149: Reward = 163.000000, Mean reward (over 100 episodes) = 103.660000
Episode 8150: Reward = 110.000000, Mean reward (over 100 episodes) = 103.640000
Episode 8151: Reward = 121.000000, Mean reward (over 100 episodes) = 104.050000
Episode 8152: Reward = 89.000000, Mean reward (over 100 episodes) = 103.210000
Episode 8153: Reward = 110.000000, Mean reward (over 100 episodes) = 103.190000
Episode 8154: Reward = 87.000000, Mean rewa

Episode 8247: Reward = 111.000000, Mean reward (over 100 episodes) = 105.700000
Episode 8248: Reward = 113.000000, Mean reward (over 100 episodes) = 105.870000
Episode 8249: Reward = 91.000000, Mean reward (over 100 episodes) = 105.150000
Episode 8250: Reward = 88.000000, Mean reward (over 100 episodes) = 104.930000
Episode 8251: Reward = 105.000000, Mean reward (over 100 episodes) = 104.770000
Episode 8252: Reward = 112.000000, Mean reward (over 100 episodes) = 105.000000
Episode 8253: Reward = 98.000000, Mean reward (over 100 episodes) = 104.880000
Episode 8254: Reward = 89.000000, Mean reward (over 100 episodes) = 104.900000
Episode 8255: Reward = 85.000000, Mean reward (over 100 episodes) = 104.110000
Episode 8256: Reward = 88.000000, Mean reward (over 100 episodes) = 103.950000
Episode 8257: Reward = 98.000000, Mean reward (over 100 episodes) = 104.040000
Episode 8258: Reward = 108.000000, Mean reward (over 100 episodes) = 103.710000
Episode 8259: Reward = 107.000000, Mean reward 

Episode 8354: Reward = 113.000000, Mean reward (over 100 episodes) = 108.690000
Episode 8355: Reward = 91.000000, Mean reward (over 100 episodes) = 108.750000
Episode 8356: Reward = 103.000000, Mean reward (over 100 episodes) = 108.900000
Episode 8357: Reward = 133.000000, Mean reward (over 100 episodes) = 109.250000
Episode 8358: Reward = 105.000000, Mean reward (over 100 episodes) = 109.220000
Episode 8359: Reward = 107.000000, Mean reward (over 100 episodes) = 109.220000
Episode 8360: Reward = 100.000000, Mean reward (over 100 episodes) = 108.220000
Episode 8361: Reward = 103.000000, Mean reward (over 100 episodes) = 107.940000
Episode 8362: Reward = 94.000000, Mean reward (over 100 episodes) = 107.950000
Episode 8363: Reward = 137.000000, Mean reward (over 100 episodes) = 108.270000
Episode 8364: Reward = 93.000000, Mean reward (over 100 episodes) = 108.090000
Episode 8365: Reward = 114.000000, Mean reward (over 100 episodes) = 107.890000
Episode 8366: Reward = 117.000000, Mean rew

Episode 8457: Reward = 112.000000, Mean reward (over 100 episodes) = 110.000000
Episode 8458: Reward = 103.000000, Mean reward (over 100 episodes) = 109.980000
Episode 8459: Reward = 124.000000, Mean reward (over 100 episodes) = 110.150000
Episode 8460: Reward = 101.000000, Mean reward (over 100 episodes) = 110.160000
Episode 8461: Reward = 141.000000, Mean reward (over 100 episodes) = 110.540000
Episode 8462: Reward = 104.000000, Mean reward (over 100 episodes) = 110.640000
Episode 8463: Reward = 110.000000, Mean reward (over 100 episodes) = 110.370000
Episode 8464: Reward = 98.000000, Mean reward (over 100 episodes) = 110.420000
Episode 8465: Reward = 113.000000, Mean reward (over 100 episodes) = 110.410000
Episode 8466: Reward = 105.000000, Mean reward (over 100 episodes) = 110.290000
Episode 8467: Reward = 114.000000, Mean reward (over 100 episodes) = 110.580000
Episode 8468: Reward = 113.000000, Mean reward (over 100 episodes) = 110.710000
Episode 8469: Reward = 98.000000, Mean re

Episode 8561: Reward = 117.000000, Mean reward (over 100 episodes) = 111.000000
Episode 8562: Reward = 101.000000, Mean reward (over 100 episodes) = 110.970000
Episode 8563: Reward = 89.000000, Mean reward (over 100 episodes) = 110.760000
Episode 8564: Reward = 102.000000, Mean reward (over 100 episodes) = 110.800000
Episode 8565: Reward = 91.000000, Mean reward (over 100 episodes) = 110.580000
Episode 8566: Reward = 137.000000, Mean reward (over 100 episodes) = 110.900000
Episode 8567: Reward = 144.000000, Mean reward (over 100 episodes) = 111.200000
Episode 8568: Reward = 105.000000, Mean reward (over 100 episodes) = 111.120000
Episode 8569: Reward = 132.000000, Mean reward (over 100 episodes) = 111.460000
Episode 8570: Reward = 86.000000, Mean reward (over 100 episodes) = 111.220000
Episode 8571: Reward = 100.000000, Mean reward (over 100 episodes) = 110.750000
Episode 8572: Reward = 125.000000, Mean reward (over 100 episodes) = 110.680000
Episode 8573: Reward = 115.000000, Mean rew

Episode 8667: Reward = 120.000000, Mean reward (over 100 episodes) = 113.940000
Episode 8668: Reward = 136.000000, Mean reward (over 100 episodes) = 114.250000
Episode 8669: Reward = 100.000000, Mean reward (over 100 episodes) = 113.930000
Episode 8670: Reward = 91.000000, Mean reward (over 100 episodes) = 113.980000
Episode 8671: Reward = 119.000000, Mean reward (over 100 episodes) = 114.170000
Episode 8672: Reward = 177.000000, Mean reward (over 100 episodes) = 114.690000
Episode 8673: Reward = 101.000000, Mean reward (over 100 episodes) = 114.550000
Episode 8674: Reward = 120.000000, Mean reward (over 100 episodes) = 114.520000
Episode 8675: Reward = 99.000000, Mean reward (over 100 episodes) = 114.280000
Episode 8676: Reward = 124.000000, Mean reward (over 100 episodes) = 114.400000
Episode 8677: Reward = 112.000000, Mean reward (over 100 episodes) = 114.140000
Episode 8678: Reward = 142.000000, Mean reward (over 100 episodes) = 114.410000
Episode 8679: Reward = 112.000000, Mean re

Episode 8770: Reward = 117.000000, Mean reward (over 100 episodes) = 119.390000
Episode 8771: Reward = 109.000000, Mean reward (over 100 episodes) = 119.290000
Episode 8772: Reward = 119.000000, Mean reward (over 100 episodes) = 118.710000
Episode 8773: Reward = 117.000000, Mean reward (over 100 episodes) = 118.870000
Episode 8774: Reward = 109.000000, Mean reward (over 100 episodes) = 118.760000
Episode 8775: Reward = 100.000000, Mean reward (over 100 episodes) = 118.770000
Episode 8776: Reward = 120.000000, Mean reward (over 100 episodes) = 118.730000
Episode 8777: Reward = 102.000000, Mean reward (over 100 episodes) = 118.630000
Episode 8778: Reward = 100.000000, Mean reward (over 100 episodes) = 118.210000
Episode 8779: Reward = 109.000000, Mean reward (over 100 episodes) = 118.180000
Episode 8780: Reward = 100.000000, Mean reward (over 100 episodes) = 117.770000
Episode 8781: Reward = 129.000000, Mean reward (over 100 episodes) = 117.720000
Episode 8782: Reward = 116.000000, Mean 

Episode 8874: Reward = 134.000000, Mean reward (over 100 episodes) = 120.490000
Episode 8875: Reward = 103.000000, Mean reward (over 100 episodes) = 120.520000
Episode 8876: Reward = 109.000000, Mean reward (over 100 episodes) = 120.410000
Episode 8877: Reward = 113.000000, Mean reward (over 100 episodes) = 120.520000
Episode 8878: Reward = 107.000000, Mean reward (over 100 episodes) = 120.590000
Episode 8879: Reward = 103.000000, Mean reward (over 100 episodes) = 120.530000
Episode 8880: Reward = 118.000000, Mean reward (over 100 episodes) = 120.710000
Episode 8881: Reward = 101.000000, Mean reward (over 100 episodes) = 120.430000
Episode 8882: Reward = 118.000000, Mean reward (over 100 episodes) = 120.450000
Episode 8883: Reward = 124.000000, Mean reward (over 100 episodes) = 120.330000
Episode 8884: Reward = 108.000000, Mean reward (over 100 episodes) = 120.340000
Episode 8885: Reward = 105.000000, Mean reward (over 100 episodes) = 120.040000
Episode 8886: Reward = 101.000000, Mean 

Episode 8978: Reward = 132.000000, Mean reward (over 100 episodes) = 125.280000
Episode 8979: Reward = 108.000000, Mean reward (over 100 episodes) = 125.330000
Episode 8980: Reward = 127.000000, Mean reward (over 100 episodes) = 125.420000
Episode 8981: Reward = 155.000000, Mean reward (over 100 episodes) = 125.960000
Episode 8982: Reward = 136.000000, Mean reward (over 100 episodes) = 126.140000
Episode 8983: Reward = 103.000000, Mean reward (over 100 episodes) = 125.930000
Episode 8984: Reward = 126.000000, Mean reward (over 100 episodes) = 126.110000
Episode 8985: Reward = 137.000000, Mean reward (over 100 episodes) = 126.430000
Episode 8986: Reward = 132.000000, Mean reward (over 100 episodes) = 126.740000
Episode 8987: Reward = 147.000000, Mean reward (over 100 episodes) = 126.760000
Episode 8988: Reward = 114.000000, Mean reward (over 100 episodes) = 126.550000
Episode 8989: Reward = 142.000000, Mean reward (over 100 episodes) = 126.850000
Episode 8990: Reward = 145.000000, Mean 

Episode 9082: Reward = 157.000000, Mean reward (over 100 episodes) = 123.270000
Episode 9083: Reward = 136.000000, Mean reward (over 100 episodes) = 123.600000
Episode 9084: Reward = 116.000000, Mean reward (over 100 episodes) = 123.500000
Episode 9085: Reward = 100.000000, Mean reward (over 100 episodes) = 123.130000
Episode 9086: Reward = 110.000000, Mean reward (over 100 episodes) = 122.910000
Episode 9087: Reward = 147.000000, Mean reward (over 100 episodes) = 122.910000
Episode 9088: Reward = 111.000000, Mean reward (over 100 episodes) = 122.880000
Episode 9089: Reward = 133.000000, Mean reward (over 100 episodes) = 122.790000
Episode 9090: Reward = 122.000000, Mean reward (over 100 episodes) = 122.560000
Episode 9091: Reward = 136.000000, Mean reward (over 100 episodes) = 122.760000
Episode 9092: Reward = 129.000000, Mean reward (over 100 episodes) = 123.110000
Episode 9093: Reward = 106.000000, Mean reward (over 100 episodes) = 122.830000
Episode 9094: Reward = 120.000000, Mean 

Episode 9185: Reward = 145.000000, Mean reward (over 100 episodes) = 124.660000
Episode 9186: Reward = 109.000000, Mean reward (over 100 episodes) = 124.650000
Episode 9187: Reward = 154.000000, Mean reward (over 100 episodes) = 124.720000
Episode 9188: Reward = 106.000000, Mean reward (over 100 episodes) = 124.670000
Episode 9189: Reward = 132.000000, Mean reward (over 100 episodes) = 124.660000
Episode 9190: Reward = 114.000000, Mean reward (over 100 episodes) = 124.580000
Episode 9191: Reward = 108.000000, Mean reward (over 100 episodes) = 124.300000
Episode 9192: Reward = 138.000000, Mean reward (over 100 episodes) = 124.390000
Episode 9193: Reward = 99.000000, Mean reward (over 100 episodes) = 124.320000
Episode 9194: Reward = 167.000000, Mean reward (over 100 episodes) = 124.790000
Episode 9195: Reward = 108.000000, Mean reward (over 100 episodes) = 124.490000
Episode 9196: Reward = 128.000000, Mean reward (over 100 episodes) = 124.660000
Episode 9197: Reward = 148.000000, Mean r

Episode 9289: Reward = 129.000000, Mean reward (over 100 episodes) = 127.040000
Episode 9290: Reward = 104.000000, Mean reward (over 100 episodes) = 126.940000
Episode 9291: Reward = 124.000000, Mean reward (over 100 episodes) = 127.100000
Episode 9292: Reward = 109.000000, Mean reward (over 100 episodes) = 126.810000
Episode 9293: Reward = 168.000000, Mean reward (over 100 episodes) = 127.500000
Episode 9294: Reward = 116.000000, Mean reward (over 100 episodes) = 126.990000
Episode 9295: Reward = 146.000000, Mean reward (over 100 episodes) = 127.370000
Episode 9296: Reward = 120.000000, Mean reward (over 100 episodes) = 127.290000
Episode 9297: Reward = 122.000000, Mean reward (over 100 episodes) = 127.030000
Episode 9298: Reward = 125.000000, Mean reward (over 100 episodes) = 127.010000
Episode 9299: Reward = 104.000000, Mean reward (over 100 episodes) = 126.550000
Episode 9300: Reward = 125.000000, Mean reward (over 100 episodes) = 126.450000
Episode 9301: Reward = 162.000000, Mean 

Episode 9393: Reward = 120.000000, Mean reward (over 100 episodes) = 129.970000
Episode 9394: Reward = 122.000000, Mean reward (over 100 episodes) = 130.030000
Episode 9395: Reward = 112.000000, Mean reward (over 100 episodes) = 129.690000
Episode 9396: Reward = 167.000000, Mean reward (over 100 episodes) = 130.160000
Episode 9397: Reward = 118.000000, Mean reward (over 100 episodes) = 130.120000
Episode 9398: Reward = 115.000000, Mean reward (over 100 episodes) = 130.020000
Episode 9399: Reward = 149.000000, Mean reward (over 100 episodes) = 130.470000
Episode 9400: Reward = 151.000000, Mean reward (over 100 episodes) = 130.730000
Episode 9401: Reward = 141.000000, Mean reward (over 100 episodes) = 130.520000
Episode 9402: Reward = 122.000000, Mean reward (over 100 episodes) = 130.600000
Episode 9403: Reward = 171.000000, Mean reward (over 100 episodes) = 130.790000
Episode 9404: Reward = 106.000000, Mean reward (over 100 episodes) = 130.200000
Episode 9405: Reward = 129.000000, Mean 

Episode 9496: Reward = 140.000000, Mean reward (over 100 episodes) = 128.790000
Episode 9497: Reward = 139.000000, Mean reward (over 100 episodes) = 129.000000
Episode 9498: Reward = 130.000000, Mean reward (over 100 episodes) = 129.150000
Episode 9499: Reward = 100.000000, Mean reward (over 100 episodes) = 128.660000
Episode 9500: Reward = 140.000000, Mean reward (over 100 episodes) = 128.550000
Episode 9501: Reward = 130.000000, Mean reward (over 100 episodes) = 128.440000
Episode 9502: Reward = 200.000000, Mean reward (over 100 episodes) = 129.220000
Episode 9503: Reward = 144.000000, Mean reward (over 100 episodes) = 128.950000
Episode 9504: Reward = 134.000000, Mean reward (over 100 episodes) = 129.230000
Episode 9505: Reward = 163.000000, Mean reward (over 100 episodes) = 129.570000
Episode 9506: Reward = 143.000000, Mean reward (over 100 episodes) = 129.560000
Episode 9507: Reward = 110.000000, Mean reward (over 100 episodes) = 129.420000
Episode 9508: Reward = 132.000000, Mean 

Episode 9599: Reward = 110.000000, Mean reward (over 100 episodes) = 135.030000
Episode 9600: Reward = 145.000000, Mean reward (over 100 episodes) = 135.080000
Episode 9601: Reward = 118.000000, Mean reward (over 100 episodes) = 134.960000
Episode 9602: Reward = 109.000000, Mean reward (over 100 episodes) = 134.050000
Episode 9603: Reward = 115.000000, Mean reward (over 100 episodes) = 133.760000
Episode 9604: Reward = 132.000000, Mean reward (over 100 episodes) = 133.740000
Episode 9605: Reward = 109.000000, Mean reward (over 100 episodes) = 133.200000
Episode 9606: Reward = 162.000000, Mean reward (over 100 episodes) = 133.390000
Episode 9607: Reward = 155.000000, Mean reward (over 100 episodes) = 133.840000
Episode 9608: Reward = 147.000000, Mean reward (over 100 episodes) = 133.990000
Episode 9609: Reward = 133.000000, Mean reward (over 100 episodes) = 133.920000
Episode 9610: Reward = 144.000000, Mean reward (over 100 episodes) = 134.010000
Episode 9611: Reward = 102.000000, Mean 

Episode 9703: Reward = 141.000000, Mean reward (over 100 episodes) = 129.070000
Episode 9704: Reward = 103.000000, Mean reward (over 100 episodes) = 128.780000
Episode 9705: Reward = 102.000000, Mean reward (over 100 episodes) = 128.710000
Episode 9706: Reward = 130.000000, Mean reward (over 100 episodes) = 128.390000
Episode 9707: Reward = 139.000000, Mean reward (over 100 episodes) = 128.230000
Episode 9708: Reward = 145.000000, Mean reward (over 100 episodes) = 128.210000
Episode 9709: Reward = 143.000000, Mean reward (over 100 episodes) = 128.310000
Episode 9710: Reward = 132.000000, Mean reward (over 100 episodes) = 128.190000
Episode 9711: Reward = 101.000000, Mean reward (over 100 episodes) = 128.180000
Episode 9712: Reward = 130.000000, Mean reward (over 100 episodes) = 128.350000
Episode 9713: Reward = 173.000000, Mean reward (over 100 episodes) = 128.590000
Episode 9714: Reward = 147.000000, Mean reward (over 100 episodes) = 128.850000
Episode 9715: Reward = 158.000000, Mean 

Episode 9807: Reward = 122.000000, Mean reward (over 100 episodes) = 135.020000
Episode 9808: Reward = 128.000000, Mean reward (over 100 episodes) = 134.850000
Episode 9809: Reward = 124.000000, Mean reward (over 100 episodes) = 134.660000
Episode 9810: Reward = 111.000000, Mean reward (over 100 episodes) = 134.450000
Episode 9811: Reward = 135.000000, Mean reward (over 100 episodes) = 134.790000
Episode 9812: Reward = 143.000000, Mean reward (over 100 episodes) = 134.920000
Episode 9813: Reward = 137.000000, Mean reward (over 100 episodes) = 134.560000
Episode 9814: Reward = 128.000000, Mean reward (over 100 episodes) = 134.370000
Episode 9815: Reward = 143.000000, Mean reward (over 100 episodes) = 134.220000
Episode 9816: Reward = 142.000000, Mean reward (over 100 episodes) = 134.040000
Episode 9817: Reward = 107.000000, Mean reward (over 100 episodes) = 133.690000
Episode 9818: Reward = 131.000000, Mean reward (over 100 episodes) = 133.710000
Episode 9819: Reward = 113.000000, Mean 

Episode 9910: Reward = 114.000000, Mean reward (over 100 episodes) = 135.580000
Episode 9911: Reward = 136.000000, Mean reward (over 100 episodes) = 135.590000
Episode 9912: Reward = 117.000000, Mean reward (over 100 episodes) = 135.330000
Episode 9913: Reward = 113.000000, Mean reward (over 100 episodes) = 135.090000
Episode 9914: Reward = 133.000000, Mean reward (over 100 episodes) = 135.140000
Episode 9915: Reward = 108.000000, Mean reward (over 100 episodes) = 134.790000
Episode 9916: Reward = 154.000000, Mean reward (over 100 episodes) = 134.910000
Episode 9917: Reward = 163.000000, Mean reward (over 100 episodes) = 135.470000
Episode 9918: Reward = 152.000000, Mean reward (over 100 episodes) = 135.680000
Episode 9919: Reward = 117.000000, Mean reward (over 100 episodes) = 135.720000
Episode 9920: Reward = 141.000000, Mean reward (over 100 episodes) = 136.010000
Episode 9921: Reward = 123.000000, Mean reward (over 100 episodes) = 135.870000
Episode 9922: Reward = 125.000000, Mean 

Episode 10016: Reward = 148.000000, Mean reward (over 100 episodes) = 134.490000
Episode 10017: Reward = 170.000000, Mean reward (over 100 episodes) = 134.560000
Episode 10018: Reward = 124.000000, Mean reward (over 100 episodes) = 134.280000
Episode 10019: Reward = 134.000000, Mean reward (over 100 episodes) = 134.450000
Episode 10020: Reward = 133.000000, Mean reward (over 100 episodes) = 134.370000
Episode 10021: Reward = 142.000000, Mean reward (over 100 episodes) = 134.560000
Episode 10022: Reward = 130.000000, Mean reward (over 100 episodes) = 134.610000
Episode 10023: Reward = 153.000000, Mean reward (over 100 episodes) = 134.550000
Episode 10024: Reward = 171.000000, Mean reward (over 100 episodes) = 135.160000
Episode 10025: Reward = 163.000000, Mean reward (over 100 episodes) = 135.530000
Episode 10026: Reward = 104.000000, Mean reward (over 100 episodes) = 135.110000
Episode 10027: Reward = 129.000000, Mean reward (over 100 episodes) = 135.040000
Episode 10028: Reward = 137.

Episode 10121: Reward = 118.000000, Mean reward (over 100 episodes) = 135.440000
Episode 10122: Reward = 132.000000, Mean reward (over 100 episodes) = 135.460000
Episode 10123: Reward = 107.000000, Mean reward (over 100 episodes) = 135.000000
Episode 10124: Reward = 151.000000, Mean reward (over 100 episodes) = 134.800000
Episode 10125: Reward = 159.000000, Mean reward (over 100 episodes) = 134.760000
Episode 10126: Reward = 143.000000, Mean reward (over 100 episodes) = 135.150000
Episode 10127: Reward = 135.000000, Mean reward (over 100 episodes) = 135.210000
Episode 10128: Reward = 115.000000, Mean reward (over 100 episodes) = 134.990000
Episode 10129: Reward = 130.000000, Mean reward (over 100 episodes) = 134.810000
Episode 10130: Reward = 133.000000, Mean reward (over 100 episodes) = 134.810000
Episode 10131: Reward = 172.000000, Mean reward (over 100 episodes) = 135.240000
Episode 10132: Reward = 127.000000, Mean reward (over 100 episodes) = 135.220000
Episode 10133: Reward = 151.

Episode 10225: Reward = 133.000000, Mean reward (over 100 episodes) = 128.580000
Episode 10226: Reward = 136.000000, Mean reward (over 100 episodes) = 128.510000
Episode 10227: Reward = 128.000000, Mean reward (over 100 episodes) = 128.440000
Episode 10228: Reward = 118.000000, Mean reward (over 100 episodes) = 128.470000
Episode 10229: Reward = 97.000000, Mean reward (over 100 episodes) = 128.140000
Episode 10230: Reward = 142.000000, Mean reward (over 100 episodes) = 128.230000
Episode 10231: Reward = 107.000000, Mean reward (over 100 episodes) = 127.580000
Episode 10232: Reward = 138.000000, Mean reward (over 100 episodes) = 127.690000
Episode 10233: Reward = 129.000000, Mean reward (over 100 episodes) = 127.470000
Episode 10234: Reward = 115.000000, Mean reward (over 100 episodes) = 127.080000
Episode 10235: Reward = 114.000000, Mean reward (over 100 episodes) = 126.910000
Episode 10236: Reward = 139.000000, Mean reward (over 100 episodes) = 127.160000
Episode 10237: Reward = 127.0

Episode 10327: Reward = 130.000000, Mean reward (over 100 episodes) = 125.780000
Episode 10328: Reward = 137.000000, Mean reward (over 100 episodes) = 125.970000
Episode 10329: Reward = 130.000000, Mean reward (over 100 episodes) = 126.300000
Episode 10330: Reward = 124.000000, Mean reward (over 100 episodes) = 126.120000
Episode 10331: Reward = 103.000000, Mean reward (over 100 episodes) = 126.080000
Episode 10332: Reward = 158.000000, Mean reward (over 100 episodes) = 126.280000
Episode 10333: Reward = 133.000000, Mean reward (over 100 episodes) = 126.320000
Episode 10334: Reward = 121.000000, Mean reward (over 100 episodes) = 126.380000
Episode 10335: Reward = 144.000000, Mean reward (over 100 episodes) = 126.680000
Episode 10336: Reward = 113.000000, Mean reward (over 100 episodes) = 126.420000
Episode 10337: Reward = 142.000000, Mean reward (over 100 episodes) = 126.570000
Episode 10338: Reward = 134.000000, Mean reward (over 100 episodes) = 126.860000
Episode 10339: Reward = 122.

Episode 10432: Reward = 111.000000, Mean reward (over 100 episodes) = 131.870000
Episode 10433: Reward = 118.000000, Mean reward (over 100 episodes) = 131.720000
Episode 10434: Reward = 147.000000, Mean reward (over 100 episodes) = 131.980000
Episode 10435: Reward = 138.000000, Mean reward (over 100 episodes) = 131.920000
Episode 10436: Reward = 112.000000, Mean reward (over 100 episodes) = 131.910000
Episode 10437: Reward = 113.000000, Mean reward (over 100 episodes) = 131.620000
Episode 10438: Reward = 118.000000, Mean reward (over 100 episodes) = 131.460000
Episode 10439: Reward = 132.000000, Mean reward (over 100 episodes) = 131.560000
Episode 10440: Reward = 134.000000, Mean reward (over 100 episodes) = 131.610000
Episode 10441: Reward = 154.000000, Mean reward (over 100 episodes) = 131.950000
Episode 10442: Reward = 163.000000, Mean reward (over 100 episodes) = 132.330000
Episode 10443: Reward = 121.000000, Mean reward (over 100 episodes) = 132.460000
Episode 10444: Reward = 132.

Episode 10536: Reward = 129.000000, Mean reward (over 100 episodes) = 134.730000
Episode 10537: Reward = 128.000000, Mean reward (over 100 episodes) = 134.880000
Episode 10538: Reward = 140.000000, Mean reward (over 100 episodes) = 135.100000
Episode 10539: Reward = 118.000000, Mean reward (over 100 episodes) = 134.960000
Episode 10540: Reward = 110.000000, Mean reward (over 100 episodes) = 134.720000
Episode 10541: Reward = 155.000000, Mean reward (over 100 episodes) = 134.730000
Episode 10542: Reward = 135.000000, Mean reward (over 100 episodes) = 134.450000
Episode 10543: Reward = 153.000000, Mean reward (over 100 episodes) = 134.770000
Episode 10544: Reward = 117.000000, Mean reward (over 100 episodes) = 134.620000
Episode 10545: Reward = 133.000000, Mean reward (over 100 episodes) = 134.770000
Episode 10546: Reward = 119.000000, Mean reward (over 100 episodes) = 133.960000
Episode 10547: Reward = 108.000000, Mean reward (over 100 episodes) = 133.660000
Episode 10548: Reward = 139.

Episode 10640: Reward = 139.000000, Mean reward (over 100 episodes) = 132.070000
Episode 10641: Reward = 128.000000, Mean reward (over 100 episodes) = 131.800000
Episode 10642: Reward = 120.000000, Mean reward (over 100 episodes) = 131.650000
Episode 10643: Reward = 176.000000, Mean reward (over 100 episodes) = 131.880000
Episode 10644: Reward = 157.000000, Mean reward (over 100 episodes) = 132.280000
Episode 10645: Reward = 137.000000, Mean reward (over 100 episodes) = 132.320000
Episode 10646: Reward = 143.000000, Mean reward (over 100 episodes) = 132.560000
Episode 10647: Reward = 101.000000, Mean reward (over 100 episodes) = 132.490000
Episode 10648: Reward = 122.000000, Mean reward (over 100 episodes) = 132.320000
Episode 10649: Reward = 114.000000, Mean reward (over 100 episodes) = 132.140000
Episode 10650: Reward = 142.000000, Mean reward (over 100 episodes) = 132.350000
Episode 10651: Reward = 164.000000, Mean reward (over 100 episodes) = 132.790000
Episode 10652: Reward = 134.

Episode 10743: Reward = 118.000000, Mean reward (over 100 episodes) = 133.660000
Episode 10744: Reward = 126.000000, Mean reward (over 100 episodes) = 133.350000
Episode 10745: Reward = 139.000000, Mean reward (over 100 episodes) = 133.370000
Episode 10746: Reward = 125.000000, Mean reward (over 100 episodes) = 133.190000
Episode 10747: Reward = 141.000000, Mean reward (over 100 episodes) = 133.590000
Episode 10748: Reward = 123.000000, Mean reward (over 100 episodes) = 133.600000
Episode 10749: Reward = 134.000000, Mean reward (over 100 episodes) = 133.800000
Episode 10750: Reward = 142.000000, Mean reward (over 100 episodes) = 133.800000
Episode 10751: Reward = 137.000000, Mean reward (over 100 episodes) = 133.530000
Episode 10752: Reward = 124.000000, Mean reward (over 100 episodes) = 133.430000
Episode 10753: Reward = 113.000000, Mean reward (over 100 episodes) = 133.310000
Episode 10754: Reward = 143.000000, Mean reward (over 100 episodes) = 133.350000
Episode 10755: Reward = 135.

Episode 10847: Reward = 130.000000, Mean reward (over 100 episodes) = 127.170000
Episode 10848: Reward = 114.000000, Mean reward (over 100 episodes) = 127.080000
Episode 10849: Reward = 145.000000, Mean reward (over 100 episodes) = 127.190000
Episode 10850: Reward = 140.000000, Mean reward (over 100 episodes) = 127.170000
Episode 10851: Reward = 131.000000, Mean reward (over 100 episodes) = 127.110000
Episode 10852: Reward = 127.000000, Mean reward (over 100 episodes) = 127.140000
Episode 10853: Reward = 124.000000, Mean reward (over 100 episodes) = 127.250000
Episode 10854: Reward = 109.000000, Mean reward (over 100 episodes) = 126.910000
Episode 10855: Reward = 133.000000, Mean reward (over 100 episodes) = 126.890000
Episode 10856: Reward = 109.000000, Mean reward (over 100 episodes) = 126.710000
Episode 10857: Reward = 132.000000, Mean reward (over 100 episodes) = 126.030000
Episode 10858: Reward = 125.000000, Mean reward (over 100 episodes) = 126.000000
Episode 10859: Reward = 145.

Episode 10949: Reward = 127.000000, Mean reward (over 100 episodes) = 124.840000
Episode 10950: Reward = 119.000000, Mean reward (over 100 episodes) = 124.630000
Episode 10951: Reward = 124.000000, Mean reward (over 100 episodes) = 124.560000
Episode 10952: Reward = 134.000000, Mean reward (over 100 episodes) = 124.630000
Episode 10953: Reward = 142.000000, Mean reward (over 100 episodes) = 124.810000
Episode 10954: Reward = 109.000000, Mean reward (over 100 episodes) = 124.810000
Episode 10955: Reward = 115.000000, Mean reward (over 100 episodes) = 124.630000
Episode 10956: Reward = 118.000000, Mean reward (over 100 episodes) = 124.720000
Episode 10957: Reward = 98.000000, Mean reward (over 100 episodes) = 124.380000
Episode 10958: Reward = 137.000000, Mean reward (over 100 episodes) = 124.500000
Episode 10959: Reward = 101.000000, Mean reward (over 100 episodes) = 124.060000
Episode 10960: Reward = 133.000000, Mean reward (over 100 episodes) = 124.290000
Episode 10961: Reward = 143.0

Episode 11053: Reward = 131.000000, Mean reward (over 100 episodes) = 123.360000
Episode 11054: Reward = 120.000000, Mean reward (over 100 episodes) = 123.470000
Episode 11055: Reward = 105.000000, Mean reward (over 100 episodes) = 123.370000
Episode 11056: Reward = 151.000000, Mean reward (over 100 episodes) = 123.700000
Episode 11057: Reward = 146.000000, Mean reward (over 100 episodes) = 124.180000
Episode 11058: Reward = 121.000000, Mean reward (over 100 episodes) = 124.020000
Episode 11059: Reward = 119.000000, Mean reward (over 100 episodes) = 124.200000
Episode 11060: Reward = 115.000000, Mean reward (over 100 episodes) = 124.020000
Episode 11061: Reward = 144.000000, Mean reward (over 100 episodes) = 124.030000
Episode 11062: Reward = 125.000000, Mean reward (over 100 episodes) = 123.940000
Episode 11063: Reward = 123.000000, Mean reward (over 100 episodes) = 123.980000
Episode 11064: Reward = 117.000000, Mean reward (over 100 episodes) = 124.000000
Episode 11065: Reward = 109.

Episode 11157: Reward = 121.000000, Mean reward (over 100 episodes) = 121.830000
Episode 11158: Reward = 117.000000, Mean reward (over 100 episodes) = 121.790000
Episode 11159: Reward = 118.000000, Mean reward (over 100 episodes) = 121.780000
Episode 11160: Reward = 127.000000, Mean reward (over 100 episodes) = 121.900000
Episode 11161: Reward = 108.000000, Mean reward (over 100 episodes) = 121.540000
Episode 11162: Reward = 118.000000, Mean reward (over 100 episodes) = 121.470000
Episode 11163: Reward = 109.000000, Mean reward (over 100 episodes) = 121.330000
Episode 11164: Reward = 146.000000, Mean reward (over 100 episodes) = 121.620000
Episode 11165: Reward = 114.000000, Mean reward (over 100 episodes) = 121.670000
Episode 11166: Reward = 135.000000, Mean reward (over 100 episodes) = 121.930000
Episode 11167: Reward = 124.000000, Mean reward (over 100 episodes) = 121.850000
Episode 11168: Reward = 116.000000, Mean reward (over 100 episodes) = 121.960000
Episode 11169: Reward = 100.

Episode 11261: Reward = 107.000000, Mean reward (over 100 episodes) = 122.060000
Episode 11262: Reward = 110.000000, Mean reward (over 100 episodes) = 121.980000
Episode 11263: Reward = 116.000000, Mean reward (over 100 episodes) = 122.050000
Episode 11264: Reward = 115.000000, Mean reward (over 100 episodes) = 121.740000
Episode 11265: Reward = 127.000000, Mean reward (over 100 episodes) = 121.870000
Episode 11266: Reward = 118.000000, Mean reward (over 100 episodes) = 121.700000
Episode 11267: Reward = 148.000000, Mean reward (over 100 episodes) = 121.940000
Episode 11268: Reward = 115.000000, Mean reward (over 100 episodes) = 121.930000
Episode 11269: Reward = 125.000000, Mean reward (over 100 episodes) = 122.180000
Episode 11270: Reward = 97.000000, Mean reward (over 100 episodes) = 122.010000
Episode 11271: Reward = 99.000000, Mean reward (over 100 episodes) = 121.720000
Episode 11272: Reward = 116.000000, Mean reward (over 100 episodes) = 120.880000
Episode 11273: Reward = 144.00

Episode 11363: Reward = 96.000000, Mean reward (over 100 episodes) = 115.920000
Episode 11364: Reward = 100.000000, Mean reward (over 100 episodes) = 115.770000
Episode 11365: Reward = 108.000000, Mean reward (over 100 episodes) = 115.580000
Episode 11366: Reward = 105.000000, Mean reward (over 100 episodes) = 115.450000
Episode 11367: Reward = 106.000000, Mean reward (over 100 episodes) = 115.030000
Episode 11368: Reward = 110.000000, Mean reward (over 100 episodes) = 114.980000
Episode 11369: Reward = 113.000000, Mean reward (over 100 episodes) = 114.860000
Episode 11370: Reward = 156.000000, Mean reward (over 100 episodes) = 115.450000
Episode 11371: Reward = 113.000000, Mean reward (over 100 episodes) = 115.590000
Episode 11372: Reward = 119.000000, Mean reward (over 100 episodes) = 115.620000
Episode 11373: Reward = 130.000000, Mean reward (over 100 episodes) = 115.480000
Episode 11374: Reward = 110.000000, Mean reward (over 100 episodes) = 115.600000
Episode 11375: Reward = 122.0

Episode 11468: Reward = 113.000000, Mean reward (over 100 episodes) = 111.940000
Episode 11469: Reward = 108.000000, Mean reward (over 100 episodes) = 111.890000
Episode 11470: Reward = 131.000000, Mean reward (over 100 episodes) = 111.640000
Episode 11471: Reward = 104.000000, Mean reward (over 100 episodes) = 111.550000
Episode 11472: Reward = 133.000000, Mean reward (over 100 episodes) = 111.690000
Episode 11473: Reward = 101.000000, Mean reward (over 100 episodes) = 111.400000
Episode 11474: Reward = 132.000000, Mean reward (over 100 episodes) = 111.620000
Episode 11475: Reward = 103.000000, Mean reward (over 100 episodes) = 111.430000
Episode 11476: Reward = 109.000000, Mean reward (over 100 episodes) = 111.530000
Episode 11477: Reward = 117.000000, Mean reward (over 100 episodes) = 111.700000
Episode 11478: Reward = 110.000000, Mean reward (over 100 episodes) = 111.720000
Episode 11479: Reward = 144.000000, Mean reward (over 100 episodes) = 112.060000
Episode 11480: Reward = 100.

Episode 11570: Reward = 109.000000, Mean reward (over 100 episodes) = 112.720000
Episode 11571: Reward = 112.000000, Mean reward (over 100 episodes) = 112.800000
Episode 11572: Reward = 88.000000, Mean reward (over 100 episodes) = 112.350000
Episode 11573: Reward = 97.000000, Mean reward (over 100 episodes) = 112.310000
Episode 11574: Reward = 130.000000, Mean reward (over 100 episodes) = 112.290000
Episode 11575: Reward = 122.000000, Mean reward (over 100 episodes) = 112.480000
Episode 11576: Reward = 113.000000, Mean reward (over 100 episodes) = 112.520000
Episode 11577: Reward = 108.000000, Mean reward (over 100 episodes) = 112.430000
Episode 11578: Reward = 109.000000, Mean reward (over 100 episodes) = 112.420000
Episode 11579: Reward = 95.000000, Mean reward (over 100 episodes) = 111.930000
Episode 11580: Reward = 98.000000, Mean reward (over 100 episodes) = 111.910000
Episode 11581: Reward = 114.000000, Mean reward (over 100 episodes) = 111.880000
Episode 11582: Reward = 107.0000

Episode 11675: Reward = 112.000000, Mean reward (over 100 episodes) = 112.960000
Episode 11676: Reward = 110.000000, Mean reward (over 100 episodes) = 112.930000
Episode 11677: Reward = 108.000000, Mean reward (over 100 episodes) = 112.930000
Episode 11678: Reward = 96.000000, Mean reward (over 100 episodes) = 112.800000
Episode 11679: Reward = 102.000000, Mean reward (over 100 episodes) = 112.870000
Episode 11680: Reward = 95.000000, Mean reward (over 100 episodes) = 112.840000
Episode 11681: Reward = 106.000000, Mean reward (over 100 episodes) = 112.760000
Episode 11682: Reward = 99.000000, Mean reward (over 100 episodes) = 112.680000
Episode 11683: Reward = 117.000000, Mean reward (over 100 episodes) = 112.500000
Episode 11684: Reward = 114.000000, Mean reward (over 100 episodes) = 112.600000
Episode 11685: Reward = 115.000000, Mean reward (over 100 episodes) = 112.650000
Episode 11686: Reward = 131.000000, Mean reward (over 100 episodes) = 112.620000
Episode 11687: Reward = 108.000

Episode 11778: Reward = 111.000000, Mean reward (over 100 episodes) = 115.440000
Episode 11779: Reward = 104.000000, Mean reward (over 100 episodes) = 115.460000
Episode 11780: Reward = 136.000000, Mean reward (over 100 episodes) = 115.870000
Episode 11781: Reward = 95.000000, Mean reward (over 100 episodes) = 115.760000
Episode 11782: Reward = 166.000000, Mean reward (over 100 episodes) = 116.430000
Episode 11783: Reward = 155.000000, Mean reward (over 100 episodes) = 116.810000
Episode 11784: Reward = 104.000000, Mean reward (over 100 episodes) = 116.710000
Episode 11785: Reward = 112.000000, Mean reward (over 100 episodes) = 116.680000
Episode 11786: Reward = 123.000000, Mean reward (over 100 episodes) = 116.600000
Episode 11787: Reward = 98.000000, Mean reward (over 100 episodes) = 116.500000
Episode 11788: Reward = 119.000000, Mean reward (over 100 episodes) = 116.560000
Episode 11789: Reward = 114.000000, Mean reward (over 100 episodes) = 116.520000
Episode 11790: Reward = 126.00

Episode 11880: Reward = 136.000000, Mean reward (over 100 episodes) = 119.790000
Episode 11881: Reward = 117.000000, Mean reward (over 100 episodes) = 120.010000
Episode 11882: Reward = 97.000000, Mean reward (over 100 episodes) = 119.320000
Episode 11883: Reward = 159.000000, Mean reward (over 100 episodes) = 119.360000
Episode 11884: Reward = 99.000000, Mean reward (over 100 episodes) = 119.310000
Episode 11885: Reward = 140.000000, Mean reward (over 100 episodes) = 119.590000
Episode 11886: Reward = 106.000000, Mean reward (over 100 episodes) = 119.420000
Episode 11887: Reward = 100.000000, Mean reward (over 100 episodes) = 119.440000
Episode 11888: Reward = 135.000000, Mean reward (over 100 episodes) = 119.600000
Episode 11889: Reward = 90.000000, Mean reward (over 100 episodes) = 119.360000
Episode 11890: Reward = 110.000000, Mean reward (over 100 episodes) = 119.200000
Episode 11891: Reward = 106.000000, Mean reward (over 100 episodes) = 118.890000
Episode 11892: Reward = 113.000

Episode 11983: Reward = 132.000000, Mean reward (over 100 episodes) = 114.540000
Episode 11984: Reward = 95.000000, Mean reward (over 100 episodes) = 114.500000
Episode 11985: Reward = 96.000000, Mean reward (over 100 episodes) = 114.060000
Episode 11986: Reward = 103.000000, Mean reward (over 100 episodes) = 114.030000
Episode 11987: Reward = 91.000000, Mean reward (over 100 episodes) = 113.940000
Episode 11988: Reward = 104.000000, Mean reward (over 100 episodes) = 113.630000
Episode 11989: Reward = 116.000000, Mean reward (over 100 episodes) = 113.890000
Episode 11990: Reward = 108.000000, Mean reward (over 100 episodes) = 113.870000
Episode 11991: Reward = 95.000000, Mean reward (over 100 episodes) = 113.760000
Episode 11992: Reward = 113.000000, Mean reward (over 100 episodes) = 113.760000
Episode 11993: Reward = 100.000000, Mean reward (over 100 episodes) = 113.720000
Episode 11994: Reward = 120.000000, Mean reward (over 100 episodes) = 113.780000
Episode 11995: Reward = 88.00000

Episode 12086: Reward = 107.000000, Mean reward (over 100 episodes) = 110.340000
Episode 12087: Reward = 122.000000, Mean reward (over 100 episodes) = 110.650000
Episode 12088: Reward = 104.000000, Mean reward (over 100 episodes) = 110.650000
Episode 12089: Reward = 134.000000, Mean reward (over 100 episodes) = 110.830000
Episode 12090: Reward = 111.000000, Mean reward (over 100 episodes) = 110.860000
Episode 12091: Reward = 106.000000, Mean reward (over 100 episodes) = 110.970000
Episode 12092: Reward = 110.000000, Mean reward (over 100 episodes) = 110.940000
Episode 12093: Reward = 98.000000, Mean reward (over 100 episodes) = 110.920000
Episode 12094: Reward = 98.000000, Mean reward (over 100 episodes) = 110.700000
Episode 12095: Reward = 96.000000, Mean reward (over 100 episodes) = 110.780000
Episode 12096: Reward = 105.000000, Mean reward (over 100 episodes) = 110.570000
Episode 12097: Reward = 105.000000, Mean reward (over 100 episodes) = 110.590000
Episode 12098: Reward = 133.000

Episode 12192: Reward = 96.000000, Mean reward (over 100 episodes) = 110.010000
Episode 12193: Reward = 120.000000, Mean reward (over 100 episodes) = 110.230000
Episode 12194: Reward = 106.000000, Mean reward (over 100 episodes) = 110.310000
Episode 12195: Reward = 118.000000, Mean reward (over 100 episodes) = 110.530000
Episode 12196: Reward = 92.000000, Mean reward (over 100 episodes) = 110.400000
Episode 12197: Reward = 108.000000, Mean reward (over 100 episodes) = 110.430000
Episode 12198: Reward = 103.000000, Mean reward (over 100 episodes) = 110.130000
Episode 12199: Reward = 112.000000, Mean reward (over 100 episodes) = 110.210000
Episode 12200: Reward = 126.000000, Mean reward (over 100 episodes) = 110.250000
Episode 12201: Reward = 103.000000, Mean reward (over 100 episodes) = 110.360000
Episode 12202: Reward = 96.000000, Mean reward (over 100 episodes) = 110.060000
Episode 12203: Reward = 141.000000, Mean reward (over 100 episodes) = 110.580000
Episode 12204: Reward = 119.000

Episode 12297: Reward = 107.000000, Mean reward (over 100 episodes) = 110.340000
Episode 12298: Reward = 114.000000, Mean reward (over 100 episodes) = 110.450000
Episode 12299: Reward = 133.000000, Mean reward (over 100 episodes) = 110.660000
Episode 12300: Reward = 104.000000, Mean reward (over 100 episodes) = 110.440000
Episode 12301: Reward = 95.000000, Mean reward (over 100 episodes) = 110.360000
Episode 12302: Reward = 98.000000, Mean reward (over 100 episodes) = 110.380000
Episode 12303: Reward = 134.000000, Mean reward (over 100 episodes) = 110.310000
Episode 12304: Reward = 115.000000, Mean reward (over 100 episodes) = 110.270000
Episode 12305: Reward = 113.000000, Mean reward (over 100 episodes) = 110.310000
Episode 12306: Reward = 111.000000, Mean reward (over 100 episodes) = 110.410000
Episode 12307: Reward = 99.000000, Mean reward (over 100 episodes) = 110.420000
Episode 12308: Reward = 125.000000, Mean reward (over 100 episodes) = 110.590000
Episode 12309: Reward = 99.0000

Episode 12399: Reward = 84.000000, Mean reward (over 100 episodes) = 107.370000
Episode 12400: Reward = 100.000000, Mean reward (over 100 episodes) = 107.330000
Episode 12401: Reward = 102.000000, Mean reward (over 100 episodes) = 107.400000
Episode 12402: Reward = 102.000000, Mean reward (over 100 episodes) = 107.440000
Episode 12403: Reward = 131.000000, Mean reward (over 100 episodes) = 107.410000
Episode 12404: Reward = 91.000000, Mean reward (over 100 episodes) = 107.170000
Episode 12405: Reward = 84.000000, Mean reward (over 100 episodes) = 106.880000
Episode 12406: Reward = 96.000000, Mean reward (over 100 episodes) = 106.730000
Episode 12407: Reward = 93.000000, Mean reward (over 100 episodes) = 106.670000
Episode 12408: Reward = 104.000000, Mean reward (over 100 episodes) = 106.460000
Episode 12409: Reward = 107.000000, Mean reward (over 100 episodes) = 106.540000
Episode 12410: Reward = 116.000000, Mean reward (over 100 episodes) = 106.820000
Episode 12411: Reward = 96.000000

Episode 12503: Reward = 94.000000, Mean reward (over 100 episodes) = 104.820000
Episode 12504: Reward = 102.000000, Mean reward (over 100 episodes) = 104.930000
Episode 12505: Reward = 102.000000, Mean reward (over 100 episodes) = 105.110000
Episode 12506: Reward = 86.000000, Mean reward (over 100 episodes) = 105.010000
Episode 12507: Reward = 131.000000, Mean reward (over 100 episodes) = 105.390000
Episode 12508: Reward = 106.000000, Mean reward (over 100 episodes) = 105.410000
Episode 12509: Reward = 87.000000, Mean reward (over 100 episodes) = 105.210000
Episode 12510: Reward = 89.000000, Mean reward (over 100 episodes) = 104.940000
Episode 12511: Reward = 95.000000, Mean reward (over 100 episodes) = 104.930000
Episode 12512: Reward = 146.000000, Mean reward (over 100 episodes) = 105.220000
Episode 12513: Reward = 128.000000, Mean reward (over 100 episodes) = 105.340000
Episode 12514: Reward = 100.000000, Mean reward (over 100 episodes) = 105.130000
Episode 12515: Reward = 95.000000

Episode 12607: Reward = 96.000000, Mean reward (over 100 episodes) = 107.600000
Episode 12608: Reward = 106.000000, Mean reward (over 100 episodes) = 107.600000
Episode 12609: Reward = 134.000000, Mean reward (over 100 episodes) = 108.070000
Episode 12610: Reward = 97.000000, Mean reward (over 100 episodes) = 108.150000
Episode 12611: Reward = 119.000000, Mean reward (over 100 episodes) = 108.390000
Episode 12612: Reward = 119.000000, Mean reward (over 100 episodes) = 108.120000
Episode 12613: Reward = 91.000000, Mean reward (over 100 episodes) = 107.750000
Episode 12614: Reward = 100.000000, Mean reward (over 100 episodes) = 107.750000
Episode 12615: Reward = 130.000000, Mean reward (over 100 episodes) = 108.100000
Episode 12616: Reward = 87.000000, Mean reward (over 100 episodes) = 107.710000
Episode 12617: Reward = 124.000000, Mean reward (over 100 episodes) = 107.940000
Episode 12618: Reward = 141.000000, Mean reward (over 100 episodes) = 108.330000
Episode 12619: Reward = 128.0000

Episode 12711: Reward = 111.000000, Mean reward (over 100 episodes) = 111.800000
Episode 12712: Reward = 115.000000, Mean reward (over 100 episodes) = 111.760000
Episode 12713: Reward = 101.000000, Mean reward (over 100 episodes) = 111.860000
Episode 12714: Reward = 89.000000, Mean reward (over 100 episodes) = 111.750000
Episode 12715: Reward = 104.000000, Mean reward (over 100 episodes) = 111.490000
Episode 12716: Reward = 99.000000, Mean reward (over 100 episodes) = 111.610000
Episode 12717: Reward = 106.000000, Mean reward (over 100 episodes) = 111.430000
Episode 12718: Reward = 101.000000, Mean reward (over 100 episodes) = 111.030000
Episode 12719: Reward = 97.000000, Mean reward (over 100 episodes) = 110.720000
Episode 12720: Reward = 99.000000, Mean reward (over 100 episodes) = 110.690000
Episode 12721: Reward = 108.000000, Mean reward (over 100 episodes) = 110.850000
Episode 12722: Reward = 108.000000, Mean reward (over 100 episodes) = 110.710000
Episode 12723: Reward = 110.0000

Episode 12813: Reward = 89.000000, Mean reward (over 100 episodes) = 109.620000
Episode 12814: Reward = 132.000000, Mean reward (over 100 episodes) = 110.050000
Episode 12815: Reward = 108.000000, Mean reward (over 100 episodes) = 110.090000
Episode 12816: Reward = 100.000000, Mean reward (over 100 episodes) = 110.100000
Episode 12817: Reward = 200.000000, Mean reward (over 100 episodes) = 111.040000
Episode 12818: Reward = 122.000000, Mean reward (over 100 episodes) = 111.250000
Episode 12819: Reward = 106.000000, Mean reward (over 100 episodes) = 111.340000
Episode 12820: Reward = 136.000000, Mean reward (over 100 episodes) = 111.710000
Episode 12821: Reward = 110.000000, Mean reward (over 100 episodes) = 111.730000
Episode 12822: Reward = 86.000000, Mean reward (over 100 episodes) = 111.510000
Episode 12823: Reward = 103.000000, Mean reward (over 100 episodes) = 111.440000
Episode 12824: Reward = 112.000000, Mean reward (over 100 episodes) = 111.480000
Episode 12825: Reward = 94.000

Episode 12915: Reward = 105.000000, Mean reward (over 100 episodes) = 113.710000
Episode 12916: Reward = 103.000000, Mean reward (over 100 episodes) = 113.740000
Episode 12917: Reward = 99.000000, Mean reward (over 100 episodes) = 112.730000
Episode 12918: Reward = 105.000000, Mean reward (over 100 episodes) = 112.560000
Episode 12919: Reward = 159.000000, Mean reward (over 100 episodes) = 113.090000
Episode 12920: Reward = 98.000000, Mean reward (over 100 episodes) = 112.710000
Episode 12921: Reward = 104.000000, Mean reward (over 100 episodes) = 112.650000
Episode 12922: Reward = 98.000000, Mean reward (over 100 episodes) = 112.770000
Episode 12923: Reward = 101.000000, Mean reward (over 100 episodes) = 112.750000
Episode 12924: Reward = 119.000000, Mean reward (over 100 episodes) = 112.820000
Episode 12925: Reward = 136.000000, Mean reward (over 100 episodes) = 113.240000
Episode 12926: Reward = 106.000000, Mean reward (over 100 episodes) = 113.180000
Episode 12927: Reward = 101.000

Episode 13018: Reward = 118.000000, Mean reward (over 100 episodes) = 110.120000
Episode 13019: Reward = 89.000000, Mean reward (over 100 episodes) = 109.420000
Episode 13020: Reward = 144.000000, Mean reward (over 100 episodes) = 109.880000
Episode 13021: Reward = 117.000000, Mean reward (over 100 episodes) = 110.010000
Episode 13022: Reward = 99.000000, Mean reward (over 100 episodes) = 110.020000
Episode 13023: Reward = 97.000000, Mean reward (over 100 episodes) = 109.980000
Episode 13024: Reward = 128.000000, Mean reward (over 100 episodes) = 110.070000
Episode 13025: Reward = 139.000000, Mean reward (over 100 episodes) = 110.100000
Episode 13026: Reward = 93.000000, Mean reward (over 100 episodes) = 109.970000
Episode 13027: Reward = 124.000000, Mean reward (over 100 episodes) = 110.200000
Episode 13028: Reward = 121.000000, Mean reward (over 100 episodes) = 110.120000
Episode 13029: Reward = 127.000000, Mean reward (over 100 episodes) = 109.960000
Episode 13030: Reward = 133.0000

Episode 13122: Reward = 108.000000, Mean reward (over 100 episodes) = 112.780000
Episode 13123: Reward = 112.000000, Mean reward (over 100 episodes) = 112.930000
Episode 13124: Reward = 147.000000, Mean reward (over 100 episodes) = 113.120000
Episode 13125: Reward = 107.000000, Mean reward (over 100 episodes) = 112.800000
Episode 13126: Reward = 125.000000, Mean reward (over 100 episodes) = 113.120000
Episode 13127: Reward = 120.000000, Mean reward (over 100 episodes) = 113.080000
Episode 13128: Reward = 91.000000, Mean reward (over 100 episodes) = 112.780000
Episode 13129: Reward = 110.000000, Mean reward (over 100 episodes) = 112.610000
Episode 13130: Reward = 96.000000, Mean reward (over 100 episodes) = 112.240000
Episode 13131: Reward = 94.000000, Mean reward (over 100 episodes) = 112.200000
Episode 13132: Reward = 106.000000, Mean reward (over 100 episodes) = 112.170000
Episode 13133: Reward = 88.000000, Mean reward (over 100 episodes) = 111.800000
Episode 13134: Reward = 120.0000

Episode 13224: Reward = 104.000000, Mean reward (over 100 episodes) = 109.600000
Episode 13225: Reward = 98.000000, Mean reward (over 100 episodes) = 109.510000
Episode 13226: Reward = 124.000000, Mean reward (over 100 episodes) = 109.500000
Episode 13227: Reward = 103.000000, Mean reward (over 100 episodes) = 109.330000
Episode 13228: Reward = 109.000000, Mean reward (over 100 episodes) = 109.510000
Episode 13229: Reward = 105.000000, Mean reward (over 100 episodes) = 109.460000
Episode 13230: Reward = 92.000000, Mean reward (over 100 episodes) = 109.420000
Episode 13231: Reward = 129.000000, Mean reward (over 100 episodes) = 109.770000
Episode 13232: Reward = 105.000000, Mean reward (over 100 episodes) = 109.760000
Episode 13233: Reward = 109.000000, Mean reward (over 100 episodes) = 109.970000
Episode 13234: Reward = 119.000000, Mean reward (over 100 episodes) = 109.960000
Episode 13235: Reward = 104.000000, Mean reward (over 100 episodes) = 110.000000
Episode 13236: Reward = 85.000

Episode 13328: Reward = 131.000000, Mean reward (over 100 episodes) = 111.240000
Episode 13329: Reward = 114.000000, Mean reward (over 100 episodes) = 111.330000
Episode 13330: Reward = 108.000000, Mean reward (over 100 episodes) = 111.490000
Episode 13331: Reward = 122.000000, Mean reward (over 100 episodes) = 111.420000
Episode 13332: Reward = 132.000000, Mean reward (over 100 episodes) = 111.690000
Episode 13333: Reward = 92.000000, Mean reward (over 100 episodes) = 111.520000
Episode 13334: Reward = 117.000000, Mean reward (over 100 episodes) = 111.500000
Episode 13335: Reward = 119.000000, Mean reward (over 100 episodes) = 111.650000
Episode 13336: Reward = 91.000000, Mean reward (over 100 episodes) = 111.710000
Episode 13337: Reward = 112.000000, Mean reward (over 100 episodes) = 111.740000
Episode 13338: Reward = 108.000000, Mean reward (over 100 episodes) = 111.770000
Episode 13339: Reward = 102.000000, Mean reward (over 100 episodes) = 111.450000
Episode 13340: Reward = 96.000

Episode 13432: Reward = 95.000000, Mean reward (over 100 episodes) = 112.470000
Episode 13433: Reward = 105.000000, Mean reward (over 100 episodes) = 112.600000
Episode 13434: Reward = 128.000000, Mean reward (over 100 episodes) = 112.710000
Episode 13435: Reward = 114.000000, Mean reward (over 100 episodes) = 112.660000
Episode 13436: Reward = 107.000000, Mean reward (over 100 episodes) = 112.820000
Episode 13437: Reward = 113.000000, Mean reward (over 100 episodes) = 112.830000
Episode 13438: Reward = 106.000000, Mean reward (over 100 episodes) = 112.810000
Episode 13439: Reward = 100.000000, Mean reward (over 100 episodes) = 112.790000
Episode 13440: Reward = 129.000000, Mean reward (over 100 episodes) = 113.120000
Episode 13441: Reward = 107.000000, Mean reward (over 100 episodes) = 113.130000
Episode 13442: Reward = 116.000000, Mean reward (over 100 episodes) = 113.030000
Episode 13443: Reward = 115.000000, Mean reward (over 100 episodes) = 113.250000
Episode 13444: Reward = 116.0

Episode 13538: Reward = 116.000000, Mean reward (over 100 episodes) = 118.920000
Episode 13539: Reward = 148.000000, Mean reward (over 100 episodes) = 119.400000
Episode 13540: Reward = 126.000000, Mean reward (over 100 episodes) = 119.370000
Episode 13541: Reward = 119.000000, Mean reward (over 100 episodes) = 119.490000
Episode 13542: Reward = 121.000000, Mean reward (over 100 episodes) = 119.540000
Episode 13543: Reward = 135.000000, Mean reward (over 100 episodes) = 119.740000
Episode 13544: Reward = 106.000000, Mean reward (over 100 episodes) = 119.640000
Episode 13545: Reward = 161.000000, Mean reward (over 100 episodes) = 119.790000
Episode 13546: Reward = 110.000000, Mean reward (over 100 episodes) = 119.570000
Episode 13547: Reward = 140.000000, Mean reward (over 100 episodes) = 120.070000
Episode 13548: Reward = 107.000000, Mean reward (over 100 episodes) = 120.140000
Episode 13549: Reward = 102.000000, Mean reward (over 100 episodes) = 120.220000
Episode 13550: Reward = 106.

Episode 13642: Reward = 143.000000, Mean reward (over 100 episodes) = 124.310000
Episode 13643: Reward = 105.000000, Mean reward (over 100 episodes) = 124.010000
Episode 13644: Reward = 118.000000, Mean reward (over 100 episodes) = 124.130000
Episode 13645: Reward = 139.000000, Mean reward (over 100 episodes) = 123.910000
Episode 13646: Reward = 106.000000, Mean reward (over 100 episodes) = 123.870000
Episode 13647: Reward = 122.000000, Mean reward (over 100 episodes) = 123.690000
Episode 13648: Reward = 127.000000, Mean reward (over 100 episodes) = 123.890000
Episode 13649: Reward = 115.000000, Mean reward (over 100 episodes) = 124.020000
Episode 13650: Reward = 138.000000, Mean reward (over 100 episodes) = 124.340000
Episode 13651: Reward = 121.000000, Mean reward (over 100 episodes) = 124.100000
Episode 13652: Reward = 158.000000, Mean reward (over 100 episodes) = 124.630000
Episode 13653: Reward = 127.000000, Mean reward (over 100 episodes) = 124.750000
Episode 13654: Reward = 130.

Episode 13747: Reward = 149.000000, Mean reward (over 100 episodes) = 126.960000
Episode 13748: Reward = 116.000000, Mean reward (over 100 episodes) = 126.850000
Episode 13749: Reward = 137.000000, Mean reward (over 100 episodes) = 127.070000
Episode 13750: Reward = 109.000000, Mean reward (over 100 episodes) = 126.780000
Episode 13751: Reward = 114.000000, Mean reward (over 100 episodes) = 126.710000
Episode 13752: Reward = 116.000000, Mean reward (over 100 episodes) = 126.290000
Episode 13753: Reward = 102.000000, Mean reward (over 100 episodes) = 126.040000
Episode 13754: Reward = 103.000000, Mean reward (over 100 episodes) = 125.770000
Episode 13755: Reward = 121.000000, Mean reward (over 100 episodes) = 125.920000
Episode 13756: Reward = 117.000000, Mean reward (over 100 episodes) = 126.090000
Episode 13757: Reward = 126.000000, Mean reward (over 100 episodes) = 125.980000
Episode 13758: Reward = 111.000000, Mean reward (over 100 episodes) = 126.050000
Episode 13759: Reward = 108.

Episode 13851: Reward = 163.000000, Mean reward (over 100 episodes) = 126.000000
Episode 13852: Reward = 126.000000, Mean reward (over 100 episodes) = 126.100000
Episode 13853: Reward = 142.000000, Mean reward (over 100 episodes) = 126.500000
Episode 13854: Reward = 109.000000, Mean reward (over 100 episodes) = 126.560000
Episode 13855: Reward = 112.000000, Mean reward (over 100 episodes) = 126.470000
Episode 13856: Reward = 140.000000, Mean reward (over 100 episodes) = 126.700000
Episode 13857: Reward = 101.000000, Mean reward (over 100 episodes) = 126.450000
Episode 13858: Reward = 114.000000, Mean reward (over 100 episodes) = 126.480000
Episode 13859: Reward = 117.000000, Mean reward (over 100 episodes) = 126.570000
Episode 13860: Reward = 137.000000, Mean reward (over 100 episodes) = 126.170000
Episode 13861: Reward = 112.000000, Mean reward (over 100 episodes) = 126.140000
Episode 13862: Reward = 113.000000, Mean reward (over 100 episodes) = 126.050000
Episode 13863: Reward = 200.

Episode 13955: Reward = 112.000000, Mean reward (over 100 episodes) = 126.470000
Episode 13956: Reward = 146.000000, Mean reward (over 100 episodes) = 126.530000
Episode 13957: Reward = 140.000000, Mean reward (over 100 episodes) = 126.920000
Episode 13958: Reward = 116.000000, Mean reward (over 100 episodes) = 126.940000
Episode 13959: Reward = 121.000000, Mean reward (over 100 episodes) = 126.980000
Episode 13960: Reward = 105.000000, Mean reward (over 100 episodes) = 126.660000
Episode 13961: Reward = 146.000000, Mean reward (over 100 episodes) = 127.000000
Episode 13962: Reward = 120.000000, Mean reward (over 100 episodes) = 127.070000
Episode 13963: Reward = 100.000000, Mean reward (over 100 episodes) = 126.070000
Episode 13964: Reward = 156.000000, Mean reward (over 100 episodes) = 126.300000
Episode 13965: Reward = 131.000000, Mean reward (over 100 episodes) = 126.360000
Episode 13966: Reward = 138.000000, Mean reward (over 100 episodes) = 126.440000
Episode 13967: Reward = 147.

Episode 14059: Reward = 125.000000, Mean reward (over 100 episodes) = 124.340000
Episode 14060: Reward = 143.000000, Mean reward (over 100 episodes) = 124.720000
Episode 14061: Reward = 158.000000, Mean reward (over 100 episodes) = 124.840000
Episode 14062: Reward = 99.000000, Mean reward (over 100 episodes) = 124.630000
Episode 14063: Reward = 131.000000, Mean reward (over 100 episodes) = 124.940000
Episode 14064: Reward = 113.000000, Mean reward (over 100 episodes) = 124.510000
Episode 14065: Reward = 117.000000, Mean reward (over 100 episodes) = 124.370000
Episode 14066: Reward = 112.000000, Mean reward (over 100 episodes) = 124.110000
Episode 14067: Reward = 123.000000, Mean reward (over 100 episodes) = 123.870000
Episode 14068: Reward = 121.000000, Mean reward (over 100 episodes) = 123.740000
Episode 14069: Reward = 136.000000, Mean reward (over 100 episodes) = 124.050000
Episode 14070: Reward = 134.000000, Mean reward (over 100 episodes) = 124.320000
Episode 14071: Reward = 166.0

Episode 14163: Reward = 170.000000, Mean reward (over 100 episodes) = 130.530000
Episode 14164: Reward = 124.000000, Mean reward (over 100 episodes) = 130.640000
Episode 14165: Reward = 139.000000, Mean reward (over 100 episodes) = 130.860000
Episode 14166: Reward = 144.000000, Mean reward (over 100 episodes) = 131.180000
Episode 14167: Reward = 167.000000, Mean reward (over 100 episodes) = 131.620000
Episode 14168: Reward = 112.000000, Mean reward (over 100 episodes) = 131.530000
Episode 14169: Reward = 118.000000, Mean reward (over 100 episodes) = 131.350000
Episode 14170: Reward = 111.000000, Mean reward (over 100 episodes) = 131.120000
Episode 14171: Reward = 118.000000, Mean reward (over 100 episodes) = 130.640000
Episode 14172: Reward = 108.000000, Mean reward (over 100 episodes) = 130.380000
Episode 14173: Reward = 110.000000, Mean reward (over 100 episodes) = 130.220000
Episode 14174: Reward = 148.000000, Mean reward (over 100 episodes) = 130.590000
Episode 14175: Reward = 110.

Episode 14266: Reward = 139.000000, Mean reward (over 100 episodes) = 130.330000
Episode 14267: Reward = 135.000000, Mean reward (over 100 episodes) = 130.010000
Episode 14268: Reward = 129.000000, Mean reward (over 100 episodes) = 130.180000
Episode 14269: Reward = 125.000000, Mean reward (over 100 episodes) = 130.250000
Episode 14270: Reward = 129.000000, Mean reward (over 100 episodes) = 130.430000
Episode 14271: Reward = 116.000000, Mean reward (over 100 episodes) = 130.410000
Episode 14272: Reward = 136.000000, Mean reward (over 100 episodes) = 130.690000
Episode 14273: Reward = 135.000000, Mean reward (over 100 episodes) = 130.940000
Episode 14274: Reward = 152.000000, Mean reward (over 100 episodes) = 130.980000
Episode 14275: Reward = 127.000000, Mean reward (over 100 episodes) = 131.150000
Episode 14276: Reward = 135.000000, Mean reward (over 100 episodes) = 131.160000
Episode 14277: Reward = 121.000000, Mean reward (over 100 episodes) = 130.810000
Episode 14278: Reward = 119.

Episode 14369: Reward = 131.000000, Mean reward (over 100 episodes) = 127.740000
Episode 14370: Reward = 156.000000, Mean reward (over 100 episodes) = 128.010000
Episode 14371: Reward = 104.000000, Mean reward (over 100 episodes) = 127.890000
Episode 14372: Reward = 142.000000, Mean reward (over 100 episodes) = 127.950000
Episode 14373: Reward = 104.000000, Mean reward (over 100 episodes) = 127.640000
Episode 14374: Reward = 142.000000, Mean reward (over 100 episodes) = 127.540000
Episode 14375: Reward = 147.000000, Mean reward (over 100 episodes) = 127.740000
Episode 14376: Reward = 108.000000, Mean reward (over 100 episodes) = 127.470000
Episode 14377: Reward = 131.000000, Mean reward (over 100 episodes) = 127.570000
Episode 14378: Reward = 149.000000, Mean reward (over 100 episodes) = 127.870000
Episode 14379: Reward = 148.000000, Mean reward (over 100 episodes) = 128.110000
Episode 14380: Reward = 119.000000, Mean reward (over 100 episodes) = 127.880000
Episode 14381: Reward = 146.

Episode 14473: Reward = 116.000000, Mean reward (over 100 episodes) = 123.650000
Episode 14474: Reward = 155.000000, Mean reward (over 100 episodes) = 123.780000
Episode 14475: Reward = 119.000000, Mean reward (over 100 episodes) = 123.500000
Episode 14476: Reward = 122.000000, Mean reward (over 100 episodes) = 123.640000
Episode 14477: Reward = 150.000000, Mean reward (over 100 episodes) = 123.830000
Episode 14478: Reward = 134.000000, Mean reward (over 100 episodes) = 123.680000
Episode 14479: Reward = 100.000000, Mean reward (over 100 episodes) = 123.200000
Episode 14480: Reward = 115.000000, Mean reward (over 100 episodes) = 123.160000
Episode 14481: Reward = 108.000000, Mean reward (over 100 episodes) = 122.780000
Episode 14482: Reward = 104.000000, Mean reward (over 100 episodes) = 122.640000
Episode 14483: Reward = 96.000000, Mean reward (over 100 episodes) = 122.570000
Episode 14484: Reward = 114.000000, Mean reward (over 100 episodes) = 122.340000
Episode 14485: Reward = 108.0

Episode 14576: Reward = 146.000000, Mean reward (over 100 episodes) = 121.020000
Episode 14577: Reward = 128.000000, Mean reward (over 100 episodes) = 120.800000
Episode 14578: Reward = 135.000000, Mean reward (over 100 episodes) = 120.810000
Episode 14579: Reward = 112.000000, Mean reward (over 100 episodes) = 120.930000
Episode 14580: Reward = 136.000000, Mean reward (over 100 episodes) = 121.140000
Episode 14581: Reward = 139.000000, Mean reward (over 100 episodes) = 121.450000
Episode 14582: Reward = 141.000000, Mean reward (over 100 episodes) = 121.820000
Episode 14583: Reward = 148.000000, Mean reward (over 100 episodes) = 122.340000
Episode 14584: Reward = 97.000000, Mean reward (over 100 episodes) = 122.170000
Episode 14585: Reward = 168.000000, Mean reward (over 100 episodes) = 122.770000
Episode 14586: Reward = 112.000000, Mean reward (over 100 episodes) = 122.520000
Episode 14587: Reward = 141.000000, Mean reward (over 100 episodes) = 122.760000
Episode 14588: Reward = 115.0

Episode 14681: Reward = 104.000000, Mean reward (over 100 episodes) = 121.300000
Episode 14682: Reward = 109.000000, Mean reward (over 100 episodes) = 120.980000
Episode 14683: Reward = 138.000000, Mean reward (over 100 episodes) = 120.880000
Episode 14684: Reward = 101.000000, Mean reward (over 100 episodes) = 120.920000
Episode 14685: Reward = 120.000000, Mean reward (over 100 episodes) = 120.440000
Episode 14686: Reward = 108.000000, Mean reward (over 100 episodes) = 120.400000
Episode 14687: Reward = 106.000000, Mean reward (over 100 episodes) = 120.050000
Episode 14688: Reward = 126.000000, Mean reward (over 100 episodes) = 120.160000
Episode 14689: Reward = 96.000000, Mean reward (over 100 episodes) = 119.780000
Episode 14690: Reward = 114.000000, Mean reward (over 100 episodes) = 119.880000
Episode 14691: Reward = 121.000000, Mean reward (over 100 episodes) = 119.980000
Episode 14692: Reward = 105.000000, Mean reward (over 100 episodes) = 120.000000
Episode 14693: Reward = 118.0

Episode 14783: Reward = 105.000000, Mean reward (over 100 episodes) = 115.210000
Episode 14784: Reward = 123.000000, Mean reward (over 100 episodes) = 115.430000
Episode 14785: Reward = 102.000000, Mean reward (over 100 episodes) = 115.250000
Episode 14786: Reward = 112.000000, Mean reward (over 100 episodes) = 115.290000
Episode 14787: Reward = 134.000000, Mean reward (over 100 episodes) = 115.570000
Episode 14788: Reward = 115.000000, Mean reward (over 100 episodes) = 115.460000
Episode 14789: Reward = 125.000000, Mean reward (over 100 episodes) = 115.750000
Episode 14790: Reward = 117.000000, Mean reward (over 100 episodes) = 115.780000
Episode 14791: Reward = 103.000000, Mean reward (over 100 episodes) = 115.600000
Episode 14792: Reward = 96.000000, Mean reward (over 100 episodes) = 115.510000
Episode 14793: Reward = 118.000000, Mean reward (over 100 episodes) = 115.510000
Episode 14794: Reward = 96.000000, Mean reward (over 100 episodes) = 115.300000
Episode 14795: Reward = 103.00

Episode 14888: Reward = 129.000000, Mean reward (over 100 episodes) = 121.270000
Episode 14889: Reward = 97.000000, Mean reward (over 100 episodes) = 120.990000
Episode 14890: Reward = 129.000000, Mean reward (over 100 episodes) = 121.110000
Episode 14891: Reward = 136.000000, Mean reward (over 100 episodes) = 121.440000
Episode 14892: Reward = 120.000000, Mean reward (over 100 episodes) = 121.680000
Episode 14893: Reward = 112.000000, Mean reward (over 100 episodes) = 121.620000
Episode 14894: Reward = 93.000000, Mean reward (over 100 episodes) = 121.590000
Episode 14895: Reward = 130.000000, Mean reward (over 100 episodes) = 121.860000
Episode 14896: Reward = 106.000000, Mean reward (over 100 episodes) = 121.930000
Episode 14897: Reward = 176.000000, Mean reward (over 100 episodes) = 122.540000
Episode 14898: Reward = 103.000000, Mean reward (over 100 episodes) = 122.420000
Episode 14899: Reward = 110.000000, Mean reward (over 100 episodes) = 122.380000
Episode 14900: Reward = 154.00

Episode 14990: Reward = 122.000000, Mean reward (over 100 episodes) = 118.760000
Episode 14991: Reward = 99.000000, Mean reward (over 100 episodes) = 118.390000
Episode 14992: Reward = 112.000000, Mean reward (over 100 episodes) = 118.310000
Episode 14993: Reward = 161.000000, Mean reward (over 100 episodes) = 118.800000
Episode 14994: Reward = 110.000000, Mean reward (over 100 episodes) = 118.970000
Episode 14995: Reward = 124.000000, Mean reward (over 100 episodes) = 118.910000
Episode 14996: Reward = 144.000000, Mean reward (over 100 episodes) = 119.290000
Episode 14997: Reward = 137.000000, Mean reward (over 100 episodes) = 118.900000
Episode 14998: Reward = 117.000000, Mean reward (over 100 episodes) = 119.040000
Episode 14999: Reward = 104.000000, Mean reward (over 100 episodes) = 118.980000
Episode 15000: Reward = 113.000000, Mean reward (over 100 episodes) = 118.570000
Episode 15001: Reward = 126.000000, Mean reward (over 100 episodes) = 118.660000
Episode 15002: Reward = 134.0

Episode 15092: Reward = 114.000000, Mean reward (over 100 episodes) = 120.390000
Episode 15093: Reward = 99.000000, Mean reward (over 100 episodes) = 119.770000
Episode 15094: Reward = 148.000000, Mean reward (over 100 episodes) = 120.150000
Episode 15095: Reward = 103.000000, Mean reward (over 100 episodes) = 119.940000
Episode 15096: Reward = 133.000000, Mean reward (over 100 episodes) = 119.830000
Episode 15097: Reward = 102.000000, Mean reward (over 100 episodes) = 119.480000
Episode 15098: Reward = 98.000000, Mean reward (over 100 episodes) = 119.290000
Episode 15099: Reward = 110.000000, Mean reward (over 100 episodes) = 119.350000
Episode 15100: Reward = 160.000000, Mean reward (over 100 episodes) = 119.820000
Episode 15101: Reward = 129.000000, Mean reward (over 100 episodes) = 119.850000
Episode 15102: Reward = 92.000000, Mean reward (over 100 episodes) = 119.430000
Episode 15103: Reward = 107.000000, Mean reward (over 100 episodes) = 119.450000
Episode 15104: Reward = 125.000

Episode 15195: Reward = 132.000000, Mean reward (over 100 episodes) = 116.350000
Episode 15196: Reward = 112.000000, Mean reward (over 100 episodes) = 116.140000
Episode 15197: Reward = 150.000000, Mean reward (over 100 episodes) = 116.620000
Episode 15198: Reward = 105.000000, Mean reward (over 100 episodes) = 116.690000
Episode 15199: Reward = 104.000000, Mean reward (over 100 episodes) = 116.630000
Episode 15200: Reward = 131.000000, Mean reward (over 100 episodes) = 116.340000
Episode 15201: Reward = 101.000000, Mean reward (over 100 episodes) = 116.060000
Episode 15202: Reward = 102.000000, Mean reward (over 100 episodes) = 116.160000
Episode 15203: Reward = 127.000000, Mean reward (over 100 episodes) = 116.360000
Episode 15204: Reward = 93.000000, Mean reward (over 100 episodes) = 116.040000
Episode 15205: Reward = 123.000000, Mean reward (over 100 episodes) = 116.300000
Episode 15206: Reward = 91.000000, Mean reward (over 100 episodes) = 116.100000
Episode 15207: Reward = 94.000

Episode 15297: Reward = 136.000000, Mean reward (over 100 episodes) = 114.330000
Episode 15298: Reward = 115.000000, Mean reward (over 100 episodes) = 114.430000
Episode 15299: Reward = 200.000000, Mean reward (over 100 episodes) = 115.390000
Episode 15300: Reward = 100.000000, Mean reward (over 100 episodes) = 115.080000
Episode 15301: Reward = 114.000000, Mean reward (over 100 episodes) = 115.210000
Episode 15302: Reward = 128.000000, Mean reward (over 100 episodes) = 115.470000
Episode 15303: Reward = 117.000000, Mean reward (over 100 episodes) = 115.370000
Episode 15304: Reward = 145.000000, Mean reward (over 100 episodes) = 115.890000
Episode 15305: Reward = 139.000000, Mean reward (over 100 episodes) = 116.050000
Episode 15306: Reward = 104.000000, Mean reward (over 100 episodes) = 116.180000
Episode 15307: Reward = 112.000000, Mean reward (over 100 episodes) = 116.360000
Episode 15308: Reward = 100.000000, Mean reward (over 100 episodes) = 116.110000
Episode 15309: Reward = 115.

Episode 15401: Reward = 157.000000, Mean reward (over 100 episodes) = 118.170000
Episode 15402: Reward = 138.000000, Mean reward (over 100 episodes) = 118.270000
Episode 15403: Reward = 125.000000, Mean reward (over 100 episodes) = 118.350000
Episode 15404: Reward = 126.000000, Mean reward (over 100 episodes) = 118.160000
Episode 15405: Reward = 120.000000, Mean reward (over 100 episodes) = 117.970000
Episode 15406: Reward = 100.000000, Mean reward (over 100 episodes) = 117.930000
Episode 15407: Reward = 112.000000, Mean reward (over 100 episodes) = 117.930000
Episode 15408: Reward = 118.000000, Mean reward (over 100 episodes) = 118.110000
Episode 15409: Reward = 103.000000, Mean reward (over 100 episodes) = 117.990000
Episode 15410: Reward = 108.000000, Mean reward (over 100 episodes) = 117.850000
Episode 15411: Reward = 96.000000, Mean reward (over 100 episodes) = 117.830000
Episode 15412: Reward = 139.000000, Mean reward (over 100 episodes) = 118.250000
Episode 15413: Reward = 104.0

Episode 15503: Reward = 118.000000, Mean reward (over 100 episodes) = 122.780000
Episode 15504: Reward = 127.000000, Mean reward (over 100 episodes) = 122.790000
Episode 15505: Reward = 111.000000, Mean reward (over 100 episodes) = 122.700000
Episode 15506: Reward = 106.000000, Mean reward (over 100 episodes) = 122.760000
Episode 15507: Reward = 109.000000, Mean reward (over 100 episodes) = 122.730000
Episode 15508: Reward = 98.000000, Mean reward (over 100 episodes) = 122.530000
Episode 15509: Reward = 106.000000, Mean reward (over 100 episodes) = 122.560000
Episode 15510: Reward = 122.000000, Mean reward (over 100 episodes) = 122.700000
Episode 15511: Reward = 98.000000, Mean reward (over 100 episodes) = 122.720000
Episode 15512: Reward = 108.000000, Mean reward (over 100 episodes) = 122.410000
Episode 15513: Reward = 112.000000, Mean reward (over 100 episodes) = 122.490000
Episode 15514: Reward = 135.000000, Mean reward (over 100 episodes) = 122.750000
Episode 15515: Reward = 108.00

Episode 15607: Reward = 108.000000, Mean reward (over 100 episodes) = 125.890000
Episode 15608: Reward = 105.000000, Mean reward (over 100 episodes) = 125.960000
Episode 15609: Reward = 122.000000, Mean reward (over 100 episodes) = 126.120000
Episode 15610: Reward = 125.000000, Mean reward (over 100 episodes) = 126.150000
Episode 15611: Reward = 109.000000, Mean reward (over 100 episodes) = 126.260000
Episode 15612: Reward = 100.000000, Mean reward (over 100 episodes) = 126.180000
Episode 15613: Reward = 171.000000, Mean reward (over 100 episodes) = 126.770000
Episode 15614: Reward = 140.000000, Mean reward (over 100 episodes) = 126.820000
Episode 15615: Reward = 110.000000, Mean reward (over 100 episodes) = 126.840000
Episode 15616: Reward = 141.000000, Mean reward (over 100 episodes) = 127.240000
Episode 15617: Reward = 95.000000, Mean reward (over 100 episodes) = 126.980000
Episode 15618: Reward = 133.000000, Mean reward (over 100 episodes) = 126.840000
Episode 15619: Reward = 114.0

Episode 15711: Reward = 159.000000, Mean reward (over 100 episodes) = 124.690000
Episode 15712: Reward = 122.000000, Mean reward (over 100 episodes) = 124.910000
Episode 15713: Reward = 119.000000, Mean reward (over 100 episodes) = 124.390000
Episode 15714: Reward = 102.000000, Mean reward (over 100 episodes) = 124.010000
Episode 15715: Reward = 113.000000, Mean reward (over 100 episodes) = 124.040000
Episode 15716: Reward = 116.000000, Mean reward (over 100 episodes) = 123.790000
Episode 15717: Reward = 108.000000, Mean reward (over 100 episodes) = 123.920000
Episode 15718: Reward = 108.000000, Mean reward (over 100 episodes) = 123.670000
Episode 15719: Reward = 103.000000, Mean reward (over 100 episodes) = 123.560000
Episode 15720: Reward = 111.000000, Mean reward (over 100 episodes) = 123.470000
Episode 15721: Reward = 109.000000, Mean reward (over 100 episodes) = 123.110000
Episode 15722: Reward = 127.000000, Mean reward (over 100 episodes) = 123.120000
Episode 15723: Reward = 129.

Episode 15815: Reward = 134.000000, Mean reward (over 100 episodes) = 126.580000
Episode 15816: Reward = 108.000000, Mean reward (over 100 episodes) = 126.500000
Episode 15817: Reward = 156.000000, Mean reward (over 100 episodes) = 126.980000
Episode 15818: Reward = 154.000000, Mean reward (over 100 episodes) = 127.440000
Episode 15819: Reward = 99.000000, Mean reward (over 100 episodes) = 127.400000
Episode 15820: Reward = 108.000000, Mean reward (over 100 episodes) = 127.370000
Episode 15821: Reward = 129.000000, Mean reward (over 100 episodes) = 127.570000
Episode 15822: Reward = 136.000000, Mean reward (over 100 episodes) = 127.660000
Episode 15823: Reward = 135.000000, Mean reward (over 100 episodes) = 127.720000
Episode 15824: Reward = 121.000000, Mean reward (over 100 episodes) = 127.960000
Episode 15825: Reward = 126.000000, Mean reward (over 100 episodes) = 127.700000
Episode 15826: Reward = 146.000000, Mean reward (over 100 episodes) = 127.950000
Episode 15827: Reward = 132.0

Episode 15919: Reward = 100.000000, Mean reward (over 100 episodes) = 127.900000
Episode 15920: Reward = 104.000000, Mean reward (over 100 episodes) = 127.860000
Episode 15921: Reward = 99.000000, Mean reward (over 100 episodes) = 127.560000
Episode 15922: Reward = 176.000000, Mean reward (over 100 episodes) = 127.960000
Episode 15923: Reward = 116.000000, Mean reward (over 100 episodes) = 127.770000
Episode 15924: Reward = 200.000000, Mean reward (over 100 episodes) = 128.560000
Episode 15925: Reward = 105.000000, Mean reward (over 100 episodes) = 128.350000
Episode 15926: Reward = 114.000000, Mean reward (over 100 episodes) = 128.030000
Episode 15927: Reward = 115.000000, Mean reward (over 100 episodes) = 127.860000
Episode 15928: Reward = 128.000000, Mean reward (over 100 episodes) = 128.040000
Episode 15929: Reward = 109.000000, Mean reward (over 100 episodes) = 127.770000
Episode 15930: Reward = 125.000000, Mean reward (over 100 episodes) = 127.500000
Episode 15931: Reward = 94.00

Episode 16023: Reward = 130.000000, Mean reward (over 100 episodes) = 123.640000
Episode 16024: Reward = 122.000000, Mean reward (over 100 episodes) = 122.860000
Episode 16025: Reward = 105.000000, Mean reward (over 100 episodes) = 122.860000
Episode 16026: Reward = 106.000000, Mean reward (over 100 episodes) = 122.780000
Episode 16027: Reward = 109.000000, Mean reward (over 100 episodes) = 122.720000
Episode 16028: Reward = 121.000000, Mean reward (over 100 episodes) = 122.650000
Episode 16029: Reward = 144.000000, Mean reward (over 100 episodes) = 123.000000
Episode 16030: Reward = 127.000000, Mean reward (over 100 episodes) = 123.020000
Episode 16031: Reward = 121.000000, Mean reward (over 100 episodes) = 123.290000
Episode 16032: Reward = 130.000000, Mean reward (over 100 episodes) = 123.480000
Episode 16033: Reward = 133.000000, Mean reward (over 100 episodes) = 123.780000
Episode 16034: Reward = 148.000000, Mean reward (over 100 episodes) = 124.100000
Episode 16035: Reward = 117.

Episode 16126: Reward = 125.000000, Mean reward (over 100 episodes) = 123.070000
Episode 16127: Reward = 99.000000, Mean reward (over 100 episodes) = 122.970000
Episode 16128: Reward = 109.000000, Mean reward (over 100 episodes) = 122.850000
Episode 16129: Reward = 132.000000, Mean reward (over 100 episodes) = 122.730000
Episode 16130: Reward = 128.000000, Mean reward (over 100 episodes) = 122.740000
Episode 16131: Reward = 109.000000, Mean reward (over 100 episodes) = 122.620000
Episode 16132: Reward = 117.000000, Mean reward (over 100 episodes) = 122.490000
Episode 16133: Reward = 115.000000, Mean reward (over 100 episodes) = 122.310000
Episode 16134: Reward = 120.000000, Mean reward (over 100 episodes) = 122.030000
Episode 16135: Reward = 101.000000, Mean reward (over 100 episodes) = 121.870000
Episode 16136: Reward = 107.000000, Mean reward (over 100 episodes) = 121.840000
Episode 16137: Reward = 141.000000, Mean reward (over 100 episodes) = 122.190000
Episode 16138: Reward = 176.0

Episode 16228: Reward = 200.000000, Mean reward (over 100 episodes) = 129.400000
Episode 16229: Reward = 125.000000, Mean reward (over 100 episodes) = 129.330000
Episode 16230: Reward = 149.000000, Mean reward (over 100 episodes) = 129.540000
Episode 16231: Reward = 136.000000, Mean reward (over 100 episodes) = 129.810000
Episode 16232: Reward = 141.000000, Mean reward (over 100 episodes) = 130.050000
Episode 16233: Reward = 125.000000, Mean reward (over 100 episodes) = 130.150000
Episode 16234: Reward = 148.000000, Mean reward (over 100 episodes) = 130.430000
Episode 16235: Reward = 118.000000, Mean reward (over 100 episodes) = 130.600000
Episode 16236: Reward = 137.000000, Mean reward (over 100 episodes) = 130.900000
Episode 16237: Reward = 118.000000, Mean reward (over 100 episodes) = 130.670000
Episode 16238: Reward = 135.000000, Mean reward (over 100 episodes) = 130.260000
Episode 16239: Reward = 122.000000, Mean reward (over 100 episodes) = 130.380000
Episode 16240: Reward = 130.

Episode 16332: Reward = 98.000000, Mean reward (over 100 episodes) = 128.490000
Episode 16333: Reward = 133.000000, Mean reward (over 100 episodes) = 128.570000
Episode 16334: Reward = 115.000000, Mean reward (over 100 episodes) = 128.240000
Episode 16335: Reward = 121.000000, Mean reward (over 100 episodes) = 128.270000
Episode 16336: Reward = 133.000000, Mean reward (over 100 episodes) = 128.230000
Episode 16337: Reward = 134.000000, Mean reward (over 100 episodes) = 128.390000
Episode 16338: Reward = 127.000000, Mean reward (over 100 episodes) = 128.310000
Episode 16339: Reward = 119.000000, Mean reward (over 100 episodes) = 128.280000
Episode 16340: Reward = 122.000000, Mean reward (over 100 episodes) = 128.200000
Episode 16341: Reward = 104.000000, Mean reward (over 100 episodes) = 127.720000
Episode 16342: Reward = 110.000000, Mean reward (over 100 episodes) = 127.480000
Episode 16343: Reward = 126.000000, Mean reward (over 100 episodes) = 127.360000
Episode 16344: Reward = 106.0

Episode 16436: Reward = 125.000000, Mean reward (over 100 episodes) = 120.750000
Episode 16437: Reward = 146.000000, Mean reward (over 100 episodes) = 120.870000
Episode 16438: Reward = 120.000000, Mean reward (over 100 episodes) = 120.800000
Episode 16439: Reward = 108.000000, Mean reward (over 100 episodes) = 120.690000
Episode 16440: Reward = 113.000000, Mean reward (over 100 episodes) = 120.600000
Episode 16441: Reward = 124.000000, Mean reward (over 100 episodes) = 120.800000
Episode 16442: Reward = 97.000000, Mean reward (over 100 episodes) = 120.670000
Episode 16443: Reward = 156.000000, Mean reward (over 100 episodes) = 120.970000
Episode 16444: Reward = 131.000000, Mean reward (over 100 episodes) = 121.220000
Episode 16445: Reward = 118.000000, Mean reward (over 100 episodes) = 121.210000
Episode 16446: Reward = 111.000000, Mean reward (over 100 episodes) = 120.940000
Episode 16447: Reward = 112.000000, Mean reward (over 100 episodes) = 120.960000
Episode 16448: Reward = 150.0

Episode 16541: Reward = 115.000000, Mean reward (over 100 episodes) = 119.260000
Episode 16542: Reward = 120.000000, Mean reward (over 100 episodes) = 119.490000
Episode 16543: Reward = 126.000000, Mean reward (over 100 episodes) = 119.190000
Episode 16544: Reward = 111.000000, Mean reward (over 100 episodes) = 118.990000
Episode 16545: Reward = 167.000000, Mean reward (over 100 episodes) = 119.480000
Episode 16546: Reward = 116.000000, Mean reward (over 100 episodes) = 119.530000
Episode 16547: Reward = 157.000000, Mean reward (over 100 episodes) = 119.980000
Episode 16548: Reward = 134.000000, Mean reward (over 100 episodes) = 119.820000
Episode 16549: Reward = 142.000000, Mean reward (over 100 episodes) = 120.130000
Episode 16550: Reward = 108.000000, Mean reward (over 100 episodes) = 120.140000
Episode 16551: Reward = 100.000000, Mean reward (over 100 episodes) = 120.110000
Episode 16552: Reward = 122.000000, Mean reward (over 100 episodes) = 120.230000
Episode 16553: Reward = 111.

Episode 16644: Reward = 101.000000, Mean reward (over 100 episodes) = 123.320000
Episode 16645: Reward = 157.000000, Mean reward (over 100 episodes) = 123.220000
Episode 16646: Reward = 117.000000, Mean reward (over 100 episodes) = 123.230000
Episode 16647: Reward = 144.000000, Mean reward (over 100 episodes) = 123.100000
Episode 16648: Reward = 98.000000, Mean reward (over 100 episodes) = 122.740000
Episode 16649: Reward = 136.000000, Mean reward (over 100 episodes) = 122.680000
Episode 16650: Reward = 101.000000, Mean reward (over 100 episodes) = 122.610000
Episode 16651: Reward = 132.000000, Mean reward (over 100 episodes) = 122.930000
Episode 16652: Reward = 106.000000, Mean reward (over 100 episodes) = 122.770000
Episode 16653: Reward = 124.000000, Mean reward (over 100 episodes) = 122.900000
Episode 16654: Reward = 98.000000, Mean reward (over 100 episodes) = 122.750000
Episode 16655: Reward = 153.000000, Mean reward (over 100 episodes) = 123.020000
Episode 16656: Reward = 130.00

Episode 16748: Reward = 127.000000, Mean reward (over 100 episodes) = 123.500000
Episode 16749: Reward = 146.000000, Mean reward (over 100 episodes) = 123.600000
Episode 16750: Reward = 114.000000, Mean reward (over 100 episodes) = 123.730000
Episode 16751: Reward = 108.000000, Mean reward (over 100 episodes) = 123.490000
Episode 16752: Reward = 135.000000, Mean reward (over 100 episodes) = 123.780000
Episode 16753: Reward = 159.000000, Mean reward (over 100 episodes) = 124.130000
Episode 16754: Reward = 119.000000, Mean reward (over 100 episodes) = 124.340000
Episode 16755: Reward = 125.000000, Mean reward (over 100 episodes) = 124.060000
Episode 16756: Reward = 104.000000, Mean reward (over 100 episodes) = 123.800000
Episode 16757: Reward = 98.000000, Mean reward (over 100 episodes) = 123.700000
Episode 16758: Reward = 124.000000, Mean reward (over 100 episodes) = 123.760000
Episode 16759: Reward = 147.000000, Mean reward (over 100 episodes) = 124.240000
Episode 16760: Reward = 98.00

Episode 16852: Reward = 119.000000, Mean reward (over 100 episodes) = 123.520000
Episode 16853: Reward = 123.000000, Mean reward (over 100 episodes) = 123.160000
Episode 16854: Reward = 101.000000, Mean reward (over 100 episodes) = 122.980000
Episode 16855: Reward = 137.000000, Mean reward (over 100 episodes) = 123.100000
Episode 16856: Reward = 119.000000, Mean reward (over 100 episodes) = 123.250000
Episode 16857: Reward = 108.000000, Mean reward (over 100 episodes) = 123.350000
Episode 16858: Reward = 126.000000, Mean reward (over 100 episodes) = 123.370000
Episode 16859: Reward = 132.000000, Mean reward (over 100 episodes) = 123.220000
Episode 16860: Reward = 102.000000, Mean reward (over 100 episodes) = 123.260000
Episode 16861: Reward = 100.000000, Mean reward (over 100 episodes) = 123.030000
Episode 16862: Reward = 103.000000, Mean reward (over 100 episodes) = 122.940000
Episode 16863: Reward = 129.000000, Mean reward (over 100 episodes) = 122.480000
Episode 16864: Reward = 126.

Episode 16955: Reward = 132.000000, Mean reward (over 100 episodes) = 123.120000
Episode 16956: Reward = 134.000000, Mean reward (over 100 episodes) = 123.270000
Episode 16957: Reward = 127.000000, Mean reward (over 100 episodes) = 123.460000
Episode 16958: Reward = 133.000000, Mean reward (over 100 episodes) = 123.530000
Episode 16959: Reward = 126.000000, Mean reward (over 100 episodes) = 123.470000
Episode 16960: Reward = 146.000000, Mean reward (over 100 episodes) = 123.910000
Episode 16961: Reward = 126.000000, Mean reward (over 100 episodes) = 124.170000
Episode 16962: Reward = 115.000000, Mean reward (over 100 episodes) = 124.290000
Episode 16963: Reward = 130.000000, Mean reward (over 100 episodes) = 124.300000
Episode 16964: Reward = 125.000000, Mean reward (over 100 episodes) = 124.290000
Episode 16965: Reward = 128.000000, Mean reward (over 100 episodes) = 124.310000
Episode 16966: Reward = 126.000000, Mean reward (over 100 episodes) = 124.240000
Episode 16967: Reward = 166.

Episode 17059: Reward = 109.000000, Mean reward (over 100 episodes) = 123.460000
Episode 17060: Reward = 147.000000, Mean reward (over 100 episodes) = 123.470000
Episode 17061: Reward = 141.000000, Mean reward (over 100 episodes) = 123.620000
Episode 17062: Reward = 113.000000, Mean reward (over 100 episodes) = 123.600000
Episode 17063: Reward = 126.000000, Mean reward (over 100 episodes) = 123.560000
Episode 17064: Reward = 135.000000, Mean reward (over 100 episodes) = 123.660000
Episode 17065: Reward = 141.000000, Mean reward (over 100 episodes) = 123.790000
Episode 17066: Reward = 128.000000, Mean reward (over 100 episodes) = 123.810000
Episode 17067: Reward = 116.000000, Mean reward (over 100 episodes) = 123.310000
Episode 17068: Reward = 103.000000, Mean reward (over 100 episodes) = 123.190000
Episode 17069: Reward = 114.000000, Mean reward (over 100 episodes) = 122.860000
Episode 17070: Reward = 115.000000, Mean reward (over 100 episodes) = 123.010000
Episode 17071: Reward = 107.

Episode 17163: Reward = 134.000000, Mean reward (over 100 episodes) = 123.940000
Episode 17164: Reward = 122.000000, Mean reward (over 100 episodes) = 123.810000
Episode 17165: Reward = 110.000000, Mean reward (over 100 episodes) = 123.500000
Episode 17166: Reward = 113.000000, Mean reward (over 100 episodes) = 123.350000
Episode 17167: Reward = 116.000000, Mean reward (over 100 episodes) = 123.350000
Episode 17168: Reward = 99.000000, Mean reward (over 100 episodes) = 123.310000
Episode 17169: Reward = 128.000000, Mean reward (over 100 episodes) = 123.450000
Episode 17170: Reward = 112.000000, Mean reward (over 100 episodes) = 123.420000
Episode 17171: Reward = 127.000000, Mean reward (over 100 episodes) = 123.620000
Episode 17172: Reward = 124.000000, Mean reward (over 100 episodes) = 123.640000
Episode 17173: Reward = 105.000000, Mean reward (over 100 episodes) = 123.620000
Episode 17174: Reward = 116.000000, Mean reward (over 100 episodes) = 123.320000
Episode 17175: Reward = 114.0

Episode 17265: Reward = 200.000000, Mean reward (over 100 episodes) = 117.420000
Episode 17266: Reward = 148.000000, Mean reward (over 100 episodes) = 117.770000
Episode 17267: Reward = 92.000000, Mean reward (over 100 episodes) = 117.530000
Episode 17268: Reward = 145.000000, Mean reward (over 100 episodes) = 117.990000
Episode 17269: Reward = 139.000000, Mean reward (over 100 episodes) = 118.100000
Episode 17270: Reward = 91.000000, Mean reward (over 100 episodes) = 117.890000
Episode 17271: Reward = 143.000000, Mean reward (over 100 episodes) = 118.050000
Episode 17272: Reward = 109.000000, Mean reward (over 100 episodes) = 117.900000
Episode 17273: Reward = 104.000000, Mean reward (over 100 episodes) = 117.890000
Episode 17274: Reward = 164.000000, Mean reward (over 100 episodes) = 118.370000
Episode 17275: Reward = 134.000000, Mean reward (over 100 episodes) = 118.570000
Episode 17276: Reward = 114.000000, Mean reward (over 100 episodes) = 118.670000
Episode 17277: Reward = 97.000

Episode 17368: Reward = 121.000000, Mean reward (over 100 episodes) = 114.830000
Episode 17369: Reward = 124.000000, Mean reward (over 100 episodes) = 114.680000
Episode 17370: Reward = 129.000000, Mean reward (over 100 episodes) = 115.060000
Episode 17371: Reward = 115.000000, Mean reward (over 100 episodes) = 114.780000
Episode 17372: Reward = 103.000000, Mean reward (over 100 episodes) = 114.720000
Episode 17373: Reward = 96.000000, Mean reward (over 100 episodes) = 114.640000
Episode 17374: Reward = 113.000000, Mean reward (over 100 episodes) = 114.130000
Episode 17375: Reward = 140.000000, Mean reward (over 100 episodes) = 114.190000
Episode 17376: Reward = 139.000000, Mean reward (over 100 episodes) = 114.440000
Episode 17377: Reward = 104.000000, Mean reward (over 100 episodes) = 114.510000
Episode 17378: Reward = 108.000000, Mean reward (over 100 episodes) = 114.450000
Episode 17379: Reward = 120.000000, Mean reward (over 100 episodes) = 114.670000
Episode 17380: Reward = 107.0

Episode 17471: Reward = 125.000000, Mean reward (over 100 episodes) = 120.630000
Episode 17472: Reward = 112.000000, Mean reward (over 100 episodes) = 120.720000
Episode 17473: Reward = 162.000000, Mean reward (over 100 episodes) = 121.380000
Episode 17474: Reward = 137.000000, Mean reward (over 100 episodes) = 121.620000
Episode 17475: Reward = 144.000000, Mean reward (over 100 episodes) = 121.660000
Episode 17476: Reward = 100.000000, Mean reward (over 100 episodes) = 121.270000
Episode 17477: Reward = 96.000000, Mean reward (over 100 episodes) = 121.190000
Episode 17478: Reward = 148.000000, Mean reward (over 100 episodes) = 121.590000
Episode 17479: Reward = 111.000000, Mean reward (over 100 episodes) = 121.500000
Episode 17480: Reward = 107.000000, Mean reward (over 100 episodes) = 121.500000
Episode 17481: Reward = 114.000000, Mean reward (over 100 episodes) = 121.410000
Episode 17482: Reward = 149.000000, Mean reward (over 100 episodes) = 121.920000
Episode 17483: Reward = 123.0

Episode 17576: Reward = 103.000000, Mean reward (over 100 episodes) = 121.820000
Episode 17577: Reward = 104.000000, Mean reward (over 100 episodes) = 121.900000
Episode 17578: Reward = 141.000000, Mean reward (over 100 episodes) = 121.830000
Episode 17579: Reward = 127.000000, Mean reward (over 100 episodes) = 121.990000
Episode 17580: Reward = 127.000000, Mean reward (over 100 episodes) = 122.190000
Episode 17581: Reward = 115.000000, Mean reward (over 100 episodes) = 122.200000
Episode 17582: Reward = 105.000000, Mean reward (over 100 episodes) = 121.760000
Episode 17583: Reward = 106.000000, Mean reward (over 100 episodes) = 121.590000
Episode 17584: Reward = 115.000000, Mean reward (over 100 episodes) = 121.560000
Episode 17585: Reward = 116.000000, Mean reward (over 100 episodes) = 121.180000
Episode 17586: Reward = 139.000000, Mean reward (over 100 episodes) = 121.080000
Episode 17587: Reward = 114.000000, Mean reward (over 100 episodes) = 121.030000
Episode 17588: Reward = 135.

Episode 17679: Reward = 133.000000, Mean reward (over 100 episodes) = 123.090000
Episode 17680: Reward = 106.000000, Mean reward (over 100 episodes) = 122.880000
Episode 17681: Reward = 98.000000, Mean reward (over 100 episodes) = 122.710000
Episode 17682: Reward = 119.000000, Mean reward (over 100 episodes) = 122.850000
Episode 17683: Reward = 136.000000, Mean reward (over 100 episodes) = 123.150000
Episode 17684: Reward = 125.000000, Mean reward (over 100 episodes) = 123.250000
Episode 17685: Reward = 112.000000, Mean reward (over 100 episodes) = 123.210000
Episode 17686: Reward = 134.000000, Mean reward (over 100 episodes) = 123.160000
Episode 17687: Reward = 134.000000, Mean reward (over 100 episodes) = 123.360000
Episode 17688: Reward = 119.000000, Mean reward (over 100 episodes) = 123.200000
Episode 17689: Reward = 159.000000, Mean reward (over 100 episodes) = 123.690000
Episode 17690: Reward = 113.000000, Mean reward (over 100 episodes) = 123.850000
Episode 17691: Reward = 122.0

Episode 17783: Reward = 120.000000, Mean reward (over 100 episodes) = 127.170000
Episode 17784: Reward = 134.000000, Mean reward (over 100 episodes) = 127.260000
Episode 17785: Reward = 102.000000, Mean reward (over 100 episodes) = 127.160000
Episode 17786: Reward = 100.000000, Mean reward (over 100 episodes) = 126.820000
Episode 17787: Reward = 118.000000, Mean reward (over 100 episodes) = 126.660000
Episode 17788: Reward = 107.000000, Mean reward (over 100 episodes) = 126.540000
Episode 17789: Reward = 122.000000, Mean reward (over 100 episodes) = 126.170000
Episode 17790: Reward = 151.000000, Mean reward (over 100 episodes) = 126.550000
Episode 17791: Reward = 104.000000, Mean reward (over 100 episodes) = 126.370000
Episode 17792: Reward = 107.000000, Mean reward (over 100 episodes) = 126.330000
Episode 17793: Reward = 136.000000, Mean reward (over 100 episodes) = 126.470000
Episode 17794: Reward = 128.000000, Mean reward (over 100 episodes) = 126.440000
Episode 17795: Reward = 137.

Episode 17886: Reward = 129.000000, Mean reward (over 100 episodes) = 128.580000
Episode 17887: Reward = 102.000000, Mean reward (over 100 episodes) = 128.420000
Episode 17888: Reward = 114.000000, Mean reward (over 100 episodes) = 128.490000
Episode 17889: Reward = 139.000000, Mean reward (over 100 episodes) = 128.660000
Episode 17890: Reward = 112.000000, Mean reward (over 100 episodes) = 128.270000
Episode 17891: Reward = 96.000000, Mean reward (over 100 episodes) = 128.190000
Episode 17892: Reward = 116.000000, Mean reward (over 100 episodes) = 128.280000
Episode 17893: Reward = 115.000000, Mean reward (over 100 episodes) = 128.070000
Episode 17894: Reward = 129.000000, Mean reward (over 100 episodes) = 128.080000
Episode 17895: Reward = 160.000000, Mean reward (over 100 episodes) = 128.310000
Episode 17896: Reward = 115.000000, Mean reward (over 100 episodes) = 127.930000
Episode 17897: Reward = 126.000000, Mean reward (over 100 episodes) = 127.910000
Episode 17898: Reward = 108.0

Episode 17990: Reward = 118.000000, Mean reward (over 100 episodes) = 125.780000
Episode 17991: Reward = 111.000000, Mean reward (over 100 episodes) = 125.930000
Episode 17992: Reward = 116.000000, Mean reward (over 100 episodes) = 125.930000
Episode 17993: Reward = 102.000000, Mean reward (over 100 episodes) = 125.800000
Episode 17994: Reward = 107.000000, Mean reward (over 100 episodes) = 125.580000
Episode 17995: Reward = 112.000000, Mean reward (over 100 episodes) = 125.100000
Episode 17996: Reward = 152.000000, Mean reward (over 100 episodes) = 125.470000
Episode 17997: Reward = 129.000000, Mean reward (over 100 episodes) = 125.500000
Episode 17998: Reward = 102.000000, Mean reward (over 100 episodes) = 125.440000
Episode 17999: Reward = 129.000000, Mean reward (over 100 episodes) = 125.540000
Episode 18000: Reward = 117.000000, Mean reward (over 100 episodes) = 125.660000
Episode 18001: Reward = 126.000000, Mean reward (over 100 episodes) = 125.680000
Episode 18002: Reward = 130.

Episode 18094: Reward = 111.000000, Mean reward (over 100 episodes) = 123.790000
Episode 18095: Reward = 133.000000, Mean reward (over 100 episodes) = 124.000000
Episode 18096: Reward = 151.000000, Mean reward (over 100 episodes) = 123.990000
Episode 18097: Reward = 120.000000, Mean reward (over 100 episodes) = 123.900000
Episode 18098: Reward = 126.000000, Mean reward (over 100 episodes) = 124.140000
Episode 18099: Reward = 102.000000, Mean reward (over 100 episodes) = 123.870000
Episode 18100: Reward = 145.000000, Mean reward (over 100 episodes) = 124.150000
Episode 18101: Reward = 127.000000, Mean reward (over 100 episodes) = 124.160000
Episode 18102: Reward = 112.000000, Mean reward (over 100 episodes) = 123.980000
Episode 18103: Reward = 136.000000, Mean reward (over 100 episodes) = 124.090000
Episode 18104: Reward = 118.000000, Mean reward (over 100 episodes) = 124.080000
Episode 18105: Reward = 121.000000, Mean reward (over 100 episodes) = 123.960000
Episode 18106: Reward = 131.

Episode 18197: Reward = 146.000000, Mean reward (over 100 episodes) = 127.830000
Episode 18198: Reward = 136.000000, Mean reward (over 100 episodes) = 127.930000
Episode 18199: Reward = 143.000000, Mean reward (over 100 episodes) = 128.340000
Episode 18200: Reward = 146.000000, Mean reward (over 100 episodes) = 128.350000
Episode 18201: Reward = 109.000000, Mean reward (over 100 episodes) = 128.170000
Episode 18202: Reward = 114.000000, Mean reward (over 100 episodes) = 128.190000
Episode 18203: Reward = 126.000000, Mean reward (over 100 episodes) = 128.090000
Episode 18204: Reward = 100.000000, Mean reward (over 100 episodes) = 127.910000
Episode 18205: Reward = 130.000000, Mean reward (over 100 episodes) = 128.000000
Episode 18206: Reward = 119.000000, Mean reward (over 100 episodes) = 127.880000
Episode 18207: Reward = 139.000000, Mean reward (over 100 episodes) = 127.830000
Episode 18208: Reward = 130.000000, Mean reward (over 100 episodes) = 128.070000
Episode 18209: Reward = 100.

Episode 18300: Reward = 141.000000, Mean reward (over 100 episodes) = 127.600000
Episode 18301: Reward = 153.000000, Mean reward (over 100 episodes) = 128.040000
Episode 18302: Reward = 146.000000, Mean reward (over 100 episodes) = 128.360000
Episode 18303: Reward = 125.000000, Mean reward (over 100 episodes) = 128.350000
Episode 18304: Reward = 133.000000, Mean reward (over 100 episodes) = 128.680000
Episode 18305: Reward = 123.000000, Mean reward (over 100 episodes) = 128.610000
Episode 18306: Reward = 150.000000, Mean reward (over 100 episodes) = 128.920000
Episode 18307: Reward = 136.000000, Mean reward (over 100 episodes) = 128.890000
Episode 18308: Reward = 110.000000, Mean reward (over 100 episodes) = 128.690000
Episode 18309: Reward = 119.000000, Mean reward (over 100 episodes) = 128.880000
Episode 18310: Reward = 138.000000, Mean reward (over 100 episodes) = 128.990000
Episode 18311: Reward = 136.000000, Mean reward (over 100 episodes) = 129.150000
Episode 18312: Reward = 113.

Episode 18404: Reward = 144.000000, Mean reward (over 100 episodes) = 135.260000
Episode 18405: Reward = 119.000000, Mean reward (over 100 episodes) = 135.220000
Episode 18406: Reward = 141.000000, Mean reward (over 100 episodes) = 135.130000
Episode 18407: Reward = 119.000000, Mean reward (over 100 episodes) = 134.960000
Episode 18408: Reward = 117.000000, Mean reward (over 100 episodes) = 135.030000
Episode 18409: Reward = 200.000000, Mean reward (over 100 episodes) = 135.840000
Episode 18410: Reward = 132.000000, Mean reward (over 100 episodes) = 135.780000
Episode 18411: Reward = 157.000000, Mean reward (over 100 episodes) = 135.990000
Episode 18412: Reward = 147.000000, Mean reward (over 100 episodes) = 136.330000
Episode 18413: Reward = 145.000000, Mean reward (over 100 episodes) = 136.590000
Episode 18414: Reward = 119.000000, Mean reward (over 100 episodes) = 136.340000
Episode 18415: Reward = 123.000000, Mean reward (over 100 episodes) = 136.070000
Episode 18416: Reward = 143.

Episode 18507: Reward = 128.000000, Mean reward (over 100 episodes) = 139.010000
Episode 18508: Reward = 146.000000, Mean reward (over 100 episodes) = 139.300000
Episode 18509: Reward = 125.000000, Mean reward (over 100 episodes) = 138.550000
Episode 18510: Reward = 148.000000, Mean reward (over 100 episodes) = 138.710000
Episode 18511: Reward = 117.000000, Mean reward (over 100 episodes) = 138.310000
Episode 18512: Reward = 138.000000, Mean reward (over 100 episodes) = 138.220000
Episode 18513: Reward = 146.000000, Mean reward (over 100 episodes) = 138.230000
Episode 18514: Reward = 136.000000, Mean reward (over 100 episodes) = 138.400000
Episode 18515: Reward = 152.000000, Mean reward (over 100 episodes) = 138.690000
Episode 18516: Reward = 150.000000, Mean reward (over 100 episodes) = 138.760000
Episode 18517: Reward = 140.000000, Mean reward (over 100 episodes) = 138.960000
Episode 18518: Reward = 128.000000, Mean reward (over 100 episodes) = 138.820000
Episode 18519: Reward = 130.

Episode 18610: Reward = 123.000000, Mean reward (over 100 episodes) = 138.740000
Episode 18611: Reward = 128.000000, Mean reward (over 100 episodes) = 138.850000
Episode 18612: Reward = 115.000000, Mean reward (over 100 episodes) = 138.620000
Episode 18613: Reward = 131.000000, Mean reward (over 100 episodes) = 138.470000
Episode 18614: Reward = 123.000000, Mean reward (over 100 episodes) = 138.340000
Episode 18615: Reward = 108.000000, Mean reward (over 100 episodes) = 137.900000
Episode 18616: Reward = 153.000000, Mean reward (over 100 episodes) = 137.930000
Episode 18617: Reward = 121.000000, Mean reward (over 100 episodes) = 137.740000
Episode 18618: Reward = 120.000000, Mean reward (over 100 episodes) = 137.660000
Episode 18619: Reward = 127.000000, Mean reward (over 100 episodes) = 137.630000
Episode 18620: Reward = 142.000000, Mean reward (over 100 episodes) = 137.750000
Episode 18621: Reward = 130.000000, Mean reward (over 100 episodes) = 137.620000
Episode 18622: Reward = 112.

Episode 18714: Reward = 143.000000, Mean reward (over 100 episodes) = 137.370000
Episode 18715: Reward = 136.000000, Mean reward (over 100 episodes) = 137.650000
Episode 18716: Reward = 164.000000, Mean reward (over 100 episodes) = 137.760000
Episode 18717: Reward = 139.000000, Mean reward (over 100 episodes) = 137.940000
Episode 18718: Reward = 129.000000, Mean reward (over 100 episodes) = 138.030000
Episode 18719: Reward = 152.000000, Mean reward (over 100 episodes) = 138.280000
Episode 18720: Reward = 149.000000, Mean reward (over 100 episodes) = 138.350000
Episode 18721: Reward = 141.000000, Mean reward (over 100 episodes) = 138.460000
Episode 18722: Reward = 146.000000, Mean reward (over 100 episodes) = 138.800000
Episode 18723: Reward = 169.000000, Mean reward (over 100 episodes) = 138.920000
Episode 18724: Reward = 133.000000, Mean reward (over 100 episodes) = 138.970000
Episode 18725: Reward = 116.000000, Mean reward (over 100 episodes) = 138.650000
Episode 18726: Reward = 168.

Episode 18816: Reward = 137.000000, Mean reward (over 100 episodes) = 141.040000
Episode 18817: Reward = 130.000000, Mean reward (over 100 episodes) = 140.950000
Episode 18818: Reward = 119.000000, Mean reward (over 100 episodes) = 140.850000
Episode 18819: Reward = 136.000000, Mean reward (over 100 episodes) = 140.690000
Episode 18820: Reward = 135.000000, Mean reward (over 100 episodes) = 140.550000
Episode 18821: Reward = 150.000000, Mean reward (over 100 episodes) = 140.640000
Episode 18822: Reward = 126.000000, Mean reward (over 100 episodes) = 140.440000
Episode 18823: Reward = 137.000000, Mean reward (over 100 episodes) = 140.120000
Episode 18824: Reward = 132.000000, Mean reward (over 100 episodes) = 140.110000
Episode 18825: Reward = 134.000000, Mean reward (over 100 episodes) = 140.290000
Episode 18826: Reward = 102.000000, Mean reward (over 100 episodes) = 139.630000
Episode 18827: Reward = 111.000000, Mean reward (over 100 episodes) = 139.400000
Episode 18828: Reward = 133.

Episode 18920: Reward = 116.000000, Mean reward (over 100 episodes) = 138.610000
Episode 18921: Reward = 145.000000, Mean reward (over 100 episodes) = 138.560000
Episode 18922: Reward = 133.000000, Mean reward (over 100 episodes) = 138.630000
Episode 18923: Reward = 160.000000, Mean reward (over 100 episodes) = 138.860000
Episode 18924: Reward = 139.000000, Mean reward (over 100 episodes) = 138.930000
Episode 18925: Reward = 177.000000, Mean reward (over 100 episodes) = 139.360000
Episode 18926: Reward = 137.000000, Mean reward (over 100 episodes) = 139.710000
Episode 18927: Reward = 153.000000, Mean reward (over 100 episodes) = 140.130000
Episode 18928: Reward = 136.000000, Mean reward (over 100 episodes) = 140.160000
Episode 18929: Reward = 148.000000, Mean reward (over 100 episodes) = 140.360000
Episode 18930: Reward = 122.000000, Mean reward (over 100 episodes) = 140.440000
Episode 18931: Reward = 130.000000, Mean reward (over 100 episodes) = 140.410000
Episode 18932: Reward = 145.

Episode 19023: Reward = 137.000000, Mean reward (over 100 episodes) = 140.740000
Episode 19024: Reward = 117.000000, Mean reward (over 100 episodes) = 140.520000
Episode 19025: Reward = 142.000000, Mean reward (over 100 episodes) = 140.170000
Episode 19026: Reward = 200.000000, Mean reward (over 100 episodes) = 140.800000
Episode 19027: Reward = 166.000000, Mean reward (over 100 episodes) = 140.930000
Episode 19028: Reward = 139.000000, Mean reward (over 100 episodes) = 140.960000
Episode 19029: Reward = 150.000000, Mean reward (over 100 episodes) = 140.980000
Episode 19030: Reward = 119.000000, Mean reward (over 100 episodes) = 140.950000
Episode 19031: Reward = 134.000000, Mean reward (over 100 episodes) = 140.990000
Episode 19032: Reward = 122.000000, Mean reward (over 100 episodes) = 140.760000
Episode 19033: Reward = 139.000000, Mean reward (over 100 episodes) = 140.380000
Episode 19034: Reward = 120.000000, Mean reward (over 100 episodes) = 140.160000
Episode 19035: Reward = 149.

Episode 19128: Reward = 165.000000, Mean reward (over 100 episodes) = 138.840000
Episode 19129: Reward = 142.000000, Mean reward (over 100 episodes) = 138.760000
Episode 19130: Reward = 149.000000, Mean reward (over 100 episodes) = 139.060000
Episode 19131: Reward = 161.000000, Mean reward (over 100 episodes) = 139.330000
Episode 19132: Reward = 111.000000, Mean reward (over 100 episodes) = 139.220000
Episode 19133: Reward = 120.000000, Mean reward (over 100 episodes) = 139.030000
Episode 19134: Reward = 158.000000, Mean reward (over 100 episodes) = 139.410000
Episode 19135: Reward = 125.000000, Mean reward (over 100 episodes) = 139.170000
Episode 19136: Reward = 142.000000, Mean reward (over 100 episodes) = 139.240000
Episode 19137: Reward = 200.000000, Mean reward (over 100 episodes) = 139.880000
Episode 19138: Reward = 155.000000, Mean reward (over 100 episodes) = 140.150000
Episode 19139: Reward = 167.000000, Mean reward (over 100 episodes) = 140.420000
Episode 19140: Reward = 142.

Episode 19232: Reward = 132.000000, Mean reward (over 100 episodes) = 143.550000
Episode 19233: Reward = 166.000000, Mean reward (over 100 episodes) = 144.010000
Episode 19234: Reward = 123.000000, Mean reward (over 100 episodes) = 143.660000
Episode 19235: Reward = 126.000000, Mean reward (over 100 episodes) = 143.670000
Episode 19236: Reward = 112.000000, Mean reward (over 100 episodes) = 143.370000
Episode 19237: Reward = 135.000000, Mean reward (over 100 episodes) = 142.720000
Episode 19238: Reward = 130.000000, Mean reward (over 100 episodes) = 142.470000
Episode 19239: Reward = 124.000000, Mean reward (over 100 episodes) = 142.040000
Episode 19240: Reward = 152.000000, Mean reward (over 100 episodes) = 142.140000
Episode 19241: Reward = 151.000000, Mean reward (over 100 episodes) = 142.280000
Episode 19242: Reward = 164.000000, Mean reward (over 100 episodes) = 142.760000
Episode 19243: Reward = 144.000000, Mean reward (over 100 episodes) = 142.740000
Episode 19244: Reward = 144.

Episode 19335: Reward = 118.000000, Mean reward (over 100 episodes) = 134.670000
Episode 19336: Reward = 154.000000, Mean reward (over 100 episodes) = 135.090000
Episode 19337: Reward = 131.000000, Mean reward (over 100 episodes) = 135.050000
Episode 19338: Reward = 129.000000, Mean reward (over 100 episodes) = 135.040000
Episode 19339: Reward = 145.000000, Mean reward (over 100 episodes) = 135.250000
Episode 19340: Reward = 121.000000, Mean reward (over 100 episodes) = 134.940000
Episode 19341: Reward = 143.000000, Mean reward (over 100 episodes) = 134.860000
Episode 19342: Reward = 129.000000, Mean reward (over 100 episodes) = 134.510000
Episode 19343: Reward = 135.000000, Mean reward (over 100 episodes) = 134.420000
Episode 19344: Reward = 200.000000, Mean reward (over 100 episodes) = 134.980000
Episode 19345: Reward = 115.000000, Mean reward (over 100 episodes) = 134.760000
Episode 19346: Reward = 200.000000, Mean reward (over 100 episodes) = 135.430000
Episode 19347: Reward = 121.

Episode 19440: Reward = 145.000000, Mean reward (over 100 episodes) = 139.110000
Episode 19441: Reward = 136.000000, Mean reward (over 100 episodes) = 139.040000
Episode 19442: Reward = 112.000000, Mean reward (over 100 episodes) = 138.870000
Episode 19443: Reward = 128.000000, Mean reward (over 100 episodes) = 138.800000
Episode 19444: Reward = 112.000000, Mean reward (over 100 episodes) = 137.920000
Episode 19445: Reward = 131.000000, Mean reward (over 100 episodes) = 138.080000
Episode 19446: Reward = 151.000000, Mean reward (over 100 episodes) = 137.590000
Episode 19447: Reward = 121.000000, Mean reward (over 100 episodes) = 137.590000
Episode 19448: Reward = 121.000000, Mean reward (over 100 episodes) = 137.510000
Episode 19449: Reward = 132.000000, Mean reward (over 100 episodes) = 137.380000
Episode 19450: Reward = 155.000000, Mean reward (over 100 episodes) = 137.650000
Episode 19451: Reward = 156.000000, Mean reward (over 100 episodes) = 138.140000
Episode 19452: Reward = 158.

Episode 19545: Reward = 135.000000, Mean reward (over 100 episodes) = 138.220000
Episode 19546: Reward = 129.000000, Mean reward (over 100 episodes) = 138.000000
Episode 19547: Reward = 121.000000, Mean reward (over 100 episodes) = 138.000000
Episode 19548: Reward = 141.000000, Mean reward (over 100 episodes) = 138.200000
Episode 19549: Reward = 116.000000, Mean reward (over 100 episodes) = 138.040000
Episode 19550: Reward = 132.000000, Mean reward (over 100 episodes) = 137.810000
Episode 19551: Reward = 135.000000, Mean reward (over 100 episodes) = 137.600000
Episode 19552: Reward = 105.000000, Mean reward (over 100 episodes) = 137.070000
Episode 19553: Reward = 120.000000, Mean reward (over 100 episodes) = 136.780000
Episode 19554: Reward = 103.000000, Mean reward (over 100 episodes) = 136.650000
Episode 19555: Reward = 129.000000, Mean reward (over 100 episodes) = 136.560000
Episode 19556: Reward = 115.000000, Mean reward (over 100 episodes) = 136.280000
Episode 19557: Reward = 111.

Episode 19648: Reward = 200.000000, Mean reward (over 100 episodes) = 130.430000
Episode 19649: Reward = 112.000000, Mean reward (over 100 episodes) = 130.390000
Episode 19650: Reward = 108.000000, Mean reward (over 100 episodes) = 130.150000
Episode 19651: Reward = 135.000000, Mean reward (over 100 episodes) = 130.150000
Episode 19652: Reward = 141.000000, Mean reward (over 100 episodes) = 130.510000
Episode 19653: Reward = 146.000000, Mean reward (over 100 episodes) = 130.770000
Episode 19654: Reward = 114.000000, Mean reward (over 100 episodes) = 130.880000
Episode 19655: Reward = 119.000000, Mean reward (over 100 episodes) = 130.780000
Episode 19656: Reward = 138.000000, Mean reward (over 100 episodes) = 131.010000
Episode 19657: Reward = 141.000000, Mean reward (over 100 episodes) = 131.310000
Episode 19658: Reward = 160.000000, Mean reward (over 100 episodes) = 131.610000
Episode 19659: Reward = 130.000000, Mean reward (over 100 episodes) = 131.630000
Episode 19660: Reward = 152.

Episode 19751: Reward = 132.000000, Mean reward (over 100 episodes) = 130.040000
Episode 19752: Reward = 126.000000, Mean reward (over 100 episodes) = 129.890000
Episode 19753: Reward = 148.000000, Mean reward (over 100 episodes) = 129.910000
Episode 19754: Reward = 173.000000, Mean reward (over 100 episodes) = 130.500000
Episode 19755: Reward = 131.000000, Mean reward (over 100 episodes) = 130.620000
Episode 19756: Reward = 132.000000, Mean reward (over 100 episodes) = 130.560000
Episode 19757: Reward = 115.000000, Mean reward (over 100 episodes) = 130.300000
Episode 19758: Reward = 145.000000, Mean reward (over 100 episodes) = 130.150000
Episode 19759: Reward = 120.000000, Mean reward (over 100 episodes) = 130.050000
Episode 19760: Reward = 115.000000, Mean reward (over 100 episodes) = 129.680000
Episode 19761: Reward = 139.000000, Mean reward (over 100 episodes) = 129.780000
Episode 19762: Reward = 149.000000, Mean reward (over 100 episodes) = 129.890000
Episode 19763: Reward = 109.

Episode 19854: Reward = 109.000000, Mean reward (over 100 episodes) = 127.610000
Episode 19855: Reward = 146.000000, Mean reward (over 100 episodes) = 127.760000
Episode 19856: Reward = 115.000000, Mean reward (over 100 episodes) = 127.590000
Episode 19857: Reward = 121.000000, Mean reward (over 100 episodes) = 127.650000
Episode 19858: Reward = 137.000000, Mean reward (over 100 episodes) = 127.570000
Episode 19859: Reward = 112.000000, Mean reward (over 100 episodes) = 127.490000
Episode 19860: Reward = 155.000000, Mean reward (over 100 episodes) = 127.890000
Episode 19861: Reward = 146.000000, Mean reward (over 100 episodes) = 127.960000
Episode 19862: Reward = 120.000000, Mean reward (over 100 episodes) = 127.670000
Episode 19863: Reward = 123.000000, Mean reward (over 100 episodes) = 127.810000
Episode 19864: Reward = 127.000000, Mean reward (over 100 episodes) = 127.690000
Episode 19865: Reward = 107.000000, Mean reward (over 100 episodes) = 127.360000
Episode 19866: Reward = 107.

Episode 19958: Reward = 176.000000, Mean reward (over 100 episodes) = 126.940000
Episode 19959: Reward = 103.000000, Mean reward (over 100 episodes) = 126.850000
Episode 19960: Reward = 115.000000, Mean reward (over 100 episodes) = 126.450000
Episode 19961: Reward = 159.000000, Mean reward (over 100 episodes) = 126.580000
Episode 19962: Reward = 130.000000, Mean reward (over 100 episodes) = 126.680000
Episode 19963: Reward = 128.000000, Mean reward (over 100 episodes) = 126.730000
Episode 19964: Reward = 117.000000, Mean reward (over 100 episodes) = 126.630000
Episode 19965: Reward = 101.000000, Mean reward (over 100 episodes) = 126.570000
Episode 19966: Reward = 106.000000, Mean reward (over 100 episodes) = 126.560000
Episode 19967: Reward = 131.000000, Mean reward (over 100 episodes) = 126.410000
Episode 19968: Reward = 108.000000, Mean reward (over 100 episodes) = 126.340000
Episode 19969: Reward = 140.000000, Mean reward (over 100 episodes) = 126.520000
Episode 19970: Reward = 127.

Episode 20062: Reward = 159.000000, Mean reward (over 100 episodes) = 127.170000
Episode 20063: Reward = 162.000000, Mean reward (over 100 episodes) = 127.510000
Episode 20064: Reward = 125.000000, Mean reward (over 100 episodes) = 127.590000
Episode 20065: Reward = 139.000000, Mean reward (over 100 episodes) = 127.970000
Episode 20066: Reward = 129.000000, Mean reward (over 100 episodes) = 128.200000
Episode 20067: Reward = 119.000000, Mean reward (over 100 episodes) = 128.080000
Episode 20068: Reward = 114.000000, Mean reward (over 100 episodes) = 128.140000
Episode 20069: Reward = 148.000000, Mean reward (over 100 episodes) = 128.220000
Episode 20070: Reward = 133.000000, Mean reward (over 100 episodes) = 128.280000
Episode 20071: Reward = 123.000000, Mean reward (over 100 episodes) = 128.260000
Episode 20072: Reward = 130.000000, Mean reward (over 100 episodes) = 128.250000
Episode 20073: Reward = 143.000000, Mean reward (over 100 episodes) = 128.660000
Episode 20074: Reward = 141.

Episode 20166: Reward = 122.000000, Mean reward (over 100 episodes) = 130.290000
Episode 20167: Reward = 151.000000, Mean reward (over 100 episodes) = 130.610000
Episode 20168: Reward = 138.000000, Mean reward (over 100 episodes) = 130.850000
Episode 20169: Reward = 129.000000, Mean reward (over 100 episodes) = 130.660000
Episode 20170: Reward = 144.000000, Mean reward (over 100 episodes) = 130.770000
Episode 20171: Reward = 124.000000, Mean reward (over 100 episodes) = 130.780000
Episode 20172: Reward = 133.000000, Mean reward (over 100 episodes) = 130.810000
Episode 20173: Reward = 147.000000, Mean reward (over 100 episodes) = 130.850000
Episode 20174: Reward = 146.000000, Mean reward (over 100 episodes) = 130.900000
Episode 20175: Reward = 140.000000, Mean reward (over 100 episodes) = 131.030000
Episode 20176: Reward = 116.000000, Mean reward (over 100 episodes) = 130.190000
Episode 20177: Reward = 135.000000, Mean reward (over 100 episodes) = 130.220000
Episode 20178: Reward = 200.

Episode 20271: Reward = 131.000000, Mean reward (over 100 episodes) = 134.360000
Episode 20272: Reward = 137.000000, Mean reward (over 100 episodes) = 134.400000
Episode 20273: Reward = 135.000000, Mean reward (over 100 episodes) = 134.280000
Episode 20274: Reward = 103.000000, Mean reward (over 100 episodes) = 133.850000
Episode 20275: Reward = 135.000000, Mean reward (over 100 episodes) = 133.800000
Episode 20276: Reward = 115.000000, Mean reward (over 100 episodes) = 133.790000
Episode 20277: Reward = 131.000000, Mean reward (over 100 episodes) = 133.750000
Episode 20278: Reward = 112.000000, Mean reward (over 100 episodes) = 132.870000
Episode 20279: Reward = 116.000000, Mean reward (over 100 episodes) = 132.850000
Episode 20280: Reward = 117.000000, Mean reward (over 100 episodes) = 132.750000
Episode 20281: Reward = 120.000000, Mean reward (over 100 episodes) = 132.630000
Episode 20282: Reward = 111.000000, Mean reward (over 100 episodes) = 132.350000
Episode 20283: Reward = 127.

Episode 20373: Reward = 112.000000, Mean reward (over 100 episodes) = 128.370000
Episode 20374: Reward = 121.000000, Mean reward (over 100 episodes) = 128.550000
Episode 20375: Reward = 116.000000, Mean reward (over 100 episodes) = 128.360000
Episode 20376: Reward = 108.000000, Mean reward (over 100 episodes) = 128.290000
Episode 20377: Reward = 138.000000, Mean reward (over 100 episodes) = 128.360000
Episode 20378: Reward = 105.000000, Mean reward (over 100 episodes) = 128.290000
Episode 20379: Reward = 124.000000, Mean reward (over 100 episodes) = 128.370000
Episode 20380: Reward = 132.000000, Mean reward (over 100 episodes) = 128.520000
Episode 20381: Reward = 130.000000, Mean reward (over 100 episodes) = 128.620000
Episode 20382: Reward = 131.000000, Mean reward (over 100 episodes) = 128.820000
Episode 20383: Reward = 136.000000, Mean reward (over 100 episodes) = 128.910000
Episode 20384: Reward = 129.000000, Mean reward (over 100 episodes) = 128.930000
Episode 20385: Reward = 167.

Episode 20476: Reward = 161.000000, Mean reward (over 100 episodes) = 129.020000
Episode 20477: Reward = 151.000000, Mean reward (over 100 episodes) = 129.150000
Episode 20478: Reward = 133.000000, Mean reward (over 100 episodes) = 129.430000
Episode 20479: Reward = 135.000000, Mean reward (over 100 episodes) = 129.540000
Episode 20480: Reward = 156.000000, Mean reward (over 100 episodes) = 129.780000
Episode 20481: Reward = 125.000000, Mean reward (over 100 episodes) = 129.730000
Episode 20482: Reward = 159.000000, Mean reward (over 100 episodes) = 130.010000
Episode 20483: Reward = 134.000000, Mean reward (over 100 episodes) = 129.990000
Episode 20484: Reward = 123.000000, Mean reward (over 100 episodes) = 129.930000
Episode 20485: Reward = 147.000000, Mean reward (over 100 episodes) = 129.730000
Episode 20486: Reward = 107.000000, Mean reward (over 100 episodes) = 129.610000
Episode 20487: Reward = 116.000000, Mean reward (over 100 episodes) = 129.530000
Episode 20488: Reward = 141.

Episode 20579: Reward = 132.000000, Mean reward (over 100 episodes) = 132.000000
Episode 20580: Reward = 134.000000, Mean reward (over 100 episodes) = 131.780000
Episode 20581: Reward = 113.000000, Mean reward (over 100 episodes) = 131.660000
Episode 20582: Reward = 145.000000, Mean reward (over 100 episodes) = 131.520000
Episode 20583: Reward = 131.000000, Mean reward (over 100 episodes) = 131.490000
Episode 20584: Reward = 128.000000, Mean reward (over 100 episodes) = 131.540000
Episode 20585: Reward = 128.000000, Mean reward (over 100 episodes) = 131.350000
Episode 20586: Reward = 143.000000, Mean reward (over 100 episodes) = 131.710000
Episode 20587: Reward = 140.000000, Mean reward (over 100 episodes) = 131.950000
Episode 20588: Reward = 115.000000, Mean reward (over 100 episodes) = 131.690000
Episode 20589: Reward = 119.000000, Mean reward (over 100 episodes) = 131.500000
Episode 20590: Reward = 130.000000, Mean reward (over 100 episodes) = 131.530000
Episode 20591: Reward = 130.

Episode 20683: Reward = 128.000000, Mean reward (over 100 episodes) = 125.210000
Episode 20684: Reward = 128.000000, Mean reward (over 100 episodes) = 125.210000
Episode 20685: Reward = 141.000000, Mean reward (over 100 episodes) = 125.340000
Episode 20686: Reward = 133.000000, Mean reward (over 100 episodes) = 125.240000
Episode 20687: Reward = 129.000000, Mean reward (over 100 episodes) = 125.130000
Episode 20688: Reward = 118.000000, Mean reward (over 100 episodes) = 125.160000
Episode 20689: Reward = 122.000000, Mean reward (over 100 episodes) = 125.190000
Episode 20690: Reward = 122.000000, Mean reward (over 100 episodes) = 125.110000
Episode 20691: Reward = 126.000000, Mean reward (over 100 episodes) = 125.070000
Episode 20692: Reward = 112.000000, Mean reward (over 100 episodes) = 124.820000
Episode 20693: Reward = 110.000000, Mean reward (over 100 episodes) = 124.700000
Episode 20694: Reward = 129.000000, Mean reward (over 100 episodes) = 124.890000
Episode 20695: Reward = 122.

Episode 20785: Reward = 146.000000, Mean reward (over 100 episodes) = 133.780000
Episode 20786: Reward = 114.000000, Mean reward (over 100 episodes) = 133.590000
Episode 20787: Reward = 105.000000, Mean reward (over 100 episodes) = 133.350000
Episode 20788: Reward = 200.000000, Mean reward (over 100 episodes) = 134.170000
Episode 20789: Reward = 142.000000, Mean reward (over 100 episodes) = 134.370000
Episode 20790: Reward = 131.000000, Mean reward (over 100 episodes) = 134.460000
Episode 20791: Reward = 128.000000, Mean reward (over 100 episodes) = 134.480000
Episode 20792: Reward = 125.000000, Mean reward (over 100 episodes) = 134.610000
Episode 20793: Reward = 105.000000, Mean reward (over 100 episodes) = 134.560000
Episode 20794: Reward = 117.000000, Mean reward (over 100 episodes) = 134.440000
Episode 20795: Reward = 101.000000, Mean reward (over 100 episodes) = 134.230000
Episode 20796: Reward = 144.000000, Mean reward (over 100 episodes) = 134.400000
Episode 20797: Reward = 128.

Episode 20889: Reward = 130.000000, Mean reward (over 100 episodes) = 122.890000
Episode 20890: Reward = 114.000000, Mean reward (over 100 episodes) = 122.720000
Episode 20891: Reward = 144.000000, Mean reward (over 100 episodes) = 122.880000
Episode 20892: Reward = 139.000000, Mean reward (over 100 episodes) = 123.020000
Episode 20893: Reward = 123.000000, Mean reward (over 100 episodes) = 123.200000
Episode 20894: Reward = 133.000000, Mean reward (over 100 episodes) = 123.360000
Episode 20895: Reward = 124.000000, Mean reward (over 100 episodes) = 123.590000
Episode 20896: Reward = 132.000000, Mean reward (over 100 episodes) = 123.470000
Episode 20897: Reward = 118.000000, Mean reward (over 100 episodes) = 123.370000
Episode 20898: Reward = 150.000000, Mean reward (over 100 episodes) = 123.800000
Episode 20899: Reward = 109.000000, Mean reward (over 100 episodes) = 123.540000
Episode 20900: Reward = 142.000000, Mean reward (over 100 episodes) = 123.570000
Episode 20901: Reward = 125.

Episode 20992: Reward = 139.000000, Mean reward (over 100 episodes) = 127.280000
Episode 20993: Reward = 144.000000, Mean reward (over 100 episodes) = 127.490000
Episode 20994: Reward = 112.000000, Mean reward (over 100 episodes) = 127.280000
Episode 20995: Reward = 110.000000, Mean reward (over 100 episodes) = 127.140000
Episode 20996: Reward = 133.000000, Mean reward (over 100 episodes) = 127.150000
Episode 20997: Reward = 102.000000, Mean reward (over 100 episodes) = 126.990000
Episode 20998: Reward = 115.000000, Mean reward (over 100 episodes) = 126.640000
Episode 20999: Reward = 130.000000, Mean reward (over 100 episodes) = 126.850000
Episode 21000: Reward = 133.000000, Mean reward (over 100 episodes) = 126.760000
Episode 21001: Reward = 143.000000, Mean reward (over 100 episodes) = 126.940000
Episode 21002: Reward = 116.000000, Mean reward (over 100 episodes) = 126.870000
Episode 21003: Reward = 129.000000, Mean reward (over 100 episodes) = 127.000000
Episode 21004: Reward = 125.

Episode 21096: Reward = 145.000000, Mean reward (over 100 episodes) = 126.880000
Episode 21097: Reward = 106.000000, Mean reward (over 100 episodes) = 126.920000
Episode 21098: Reward = 122.000000, Mean reward (over 100 episodes) = 126.990000
Episode 21099: Reward = 106.000000, Mean reward (over 100 episodes) = 126.750000
Episode 21100: Reward = 156.000000, Mean reward (over 100 episodes) = 126.980000
Episode 21101: Reward = 143.000000, Mean reward (over 100 episodes) = 126.980000
Episode 21102: Reward = 115.000000, Mean reward (over 100 episodes) = 126.970000
Episode 21103: Reward = 160.000000, Mean reward (over 100 episodes) = 127.280000
Episode 21104: Reward = 129.000000, Mean reward (over 100 episodes) = 127.320000
Episode 21105: Reward = 106.000000, Mean reward (over 100 episodes) = 127.230000
Episode 21106: Reward = 124.000000, Mean reward (over 100 episodes) = 127.330000
Episode 21107: Reward = 133.000000, Mean reward (over 100 episodes) = 127.490000
Episode 21108: Reward = 112.

Episode 21201: Reward = 152.000000, Mean reward (over 100 episodes) = 130.780000
Episode 21202: Reward = 99.000000, Mean reward (over 100 episodes) = 130.620000
Episode 21203: Reward = 134.000000, Mean reward (over 100 episodes) = 130.360000
Episode 21204: Reward = 137.000000, Mean reward (over 100 episodes) = 130.440000
Episode 21205: Reward = 144.000000, Mean reward (over 100 episodes) = 130.820000
Episode 21206: Reward = 118.000000, Mean reward (over 100 episodes) = 130.760000
Episode 21207: Reward = 169.000000, Mean reward (over 100 episodes) = 131.120000
Episode 21208: Reward = 114.000000, Mean reward (over 100 episodes) = 131.140000
Episode 21209: Reward = 100.000000, Mean reward (over 100 episodes) = 131.150000
Episode 21210: Reward = 118.000000, Mean reward (over 100 episodes) = 131.100000
Episode 21211: Reward = 115.000000, Mean reward (over 100 episodes) = 131.070000
Episode 21212: Reward = 132.000000, Mean reward (over 100 episodes) = 131.070000
Episode 21213: Reward = 149.0

Episode 21306: Reward = 130.000000, Mean reward (over 100 episodes) = 130.700000
Episode 21307: Reward = 139.000000, Mean reward (over 100 episodes) = 130.400000
Episode 21308: Reward = 107.000000, Mean reward (over 100 episodes) = 130.330000
Episode 21309: Reward = 146.000000, Mean reward (over 100 episodes) = 130.790000
Episode 21310: Reward = 121.000000, Mean reward (over 100 episodes) = 130.820000
Episode 21311: Reward = 121.000000, Mean reward (over 100 episodes) = 130.880000
Episode 21312: Reward = 114.000000, Mean reward (over 100 episodes) = 130.700000
Episode 21313: Reward = 121.000000, Mean reward (over 100 episodes) = 130.420000
Episode 21314: Reward = 103.000000, Mean reward (over 100 episodes) = 130.410000
Episode 21315: Reward = 120.000000, Mean reward (over 100 episodes) = 130.240000
Episode 21316: Reward = 114.000000, Mean reward (over 100 episodes) = 130.160000
Episode 21317: Reward = 116.000000, Mean reward (over 100 episodes) = 129.910000
Episode 21318: Reward = 113.

Episode 21410: Reward = 139.000000, Mean reward (over 100 episodes) = 125.330000
Episode 21411: Reward = 107.000000, Mean reward (over 100 episodes) = 125.190000
Episode 21412: Reward = 124.000000, Mean reward (over 100 episodes) = 125.290000
Episode 21413: Reward = 127.000000, Mean reward (over 100 episodes) = 125.350000
Episode 21414: Reward = 150.000000, Mean reward (over 100 episodes) = 125.820000
Episode 21415: Reward = 115.000000, Mean reward (over 100 episodes) = 125.770000
Episode 21416: Reward = 136.000000, Mean reward (over 100 episodes) = 125.990000
Episode 21417: Reward = 133.000000, Mean reward (over 100 episodes) = 126.160000
Episode 21418: Reward = 140.000000, Mean reward (over 100 episodes) = 126.430000
Episode 21419: Reward = 140.000000, Mean reward (over 100 episodes) = 126.540000
Episode 21420: Reward = 112.000000, Mean reward (over 100 episodes) = 126.330000
Episode 21421: Reward = 124.000000, Mean reward (over 100 episodes) = 126.120000
Episode 21422: Reward = 163.

Episode 21513: Reward = 132.000000, Mean reward (over 100 episodes) = 129.320000
Episode 21514: Reward = 116.000000, Mean reward (over 100 episodes) = 128.980000
Episode 21515: Reward = 118.000000, Mean reward (over 100 episodes) = 129.010000
Episode 21516: Reward = 144.000000, Mean reward (over 100 episodes) = 129.090000
Episode 21517: Reward = 114.000000, Mean reward (over 100 episodes) = 128.900000
Episode 21518: Reward = 100.000000, Mean reward (over 100 episodes) = 128.500000
Episode 21519: Reward = 109.000000, Mean reward (over 100 episodes) = 128.190000
Episode 21520: Reward = 124.000000, Mean reward (over 100 episodes) = 128.310000
Episode 21521: Reward = 111.000000, Mean reward (over 100 episodes) = 128.180000
Episode 21522: Reward = 102.000000, Mean reward (over 100 episodes) = 127.570000
Episode 21523: Reward = 171.000000, Mean reward (over 100 episodes) = 128.020000
Episode 21524: Reward = 149.000000, Mean reward (over 100 episodes) = 128.090000
Episode 21525: Reward = 119.

Episode 21618: Reward = 146.000000, Mean reward (over 100 episodes) = 129.810000
Episode 21619: Reward = 118.000000, Mean reward (over 100 episodes) = 129.900000
Episode 21620: Reward = 133.000000, Mean reward (over 100 episodes) = 129.990000
Episode 21621: Reward = 127.000000, Mean reward (over 100 episodes) = 130.150000
Episode 21622: Reward = 111.000000, Mean reward (over 100 episodes) = 130.240000
Episode 21623: Reward = 120.000000, Mean reward (over 100 episodes) = 129.730000
Episode 21624: Reward = 140.000000, Mean reward (over 100 episodes) = 129.640000
Episode 21625: Reward = 126.000000, Mean reward (over 100 episodes) = 129.710000
Episode 21626: Reward = 143.000000, Mean reward (over 100 episodes) = 130.050000
Episode 21627: Reward = 129.000000, Mean reward (over 100 episodes) = 129.890000
Episode 21628: Reward = 148.000000, Mean reward (over 100 episodes) = 130.190000
Episode 21629: Reward = 126.000000, Mean reward (over 100 episodes) = 130.140000
Episode 21630: Reward = 137.

Episode 21720: Reward = 139.000000, Mean reward (over 100 episodes) = 134.810000
Episode 21721: Reward = 107.000000, Mean reward (over 100 episodes) = 134.610000
Episode 21722: Reward = 127.000000, Mean reward (over 100 episodes) = 134.770000
Episode 21723: Reward = 154.000000, Mean reward (over 100 episodes) = 135.110000
Episode 21724: Reward = 135.000000, Mean reward (over 100 episodes) = 135.060000
Episode 21725: Reward = 114.000000, Mean reward (over 100 episodes) = 134.940000
Episode 21726: Reward = 147.000000, Mean reward (over 100 episodes) = 134.980000
Episode 21727: Reward = 146.000000, Mean reward (over 100 episodes) = 135.150000
Episode 21728: Reward = 159.000000, Mean reward (over 100 episodes) = 135.260000
Episode 21729: Reward = 131.000000, Mean reward (over 100 episodes) = 135.310000
Episode 21730: Reward = 110.000000, Mean reward (over 100 episodes) = 135.040000
Episode 21731: Reward = 104.000000, Mean reward (over 100 episodes) = 134.870000
Episode 21732: Reward = 113.

Episode 21823: Reward = 118.000000, Mean reward (over 100 episodes) = 131.960000
Episode 21824: Reward = 121.000000, Mean reward (over 100 episodes) = 131.820000
Episode 21825: Reward = 133.000000, Mean reward (over 100 episodes) = 132.010000
Episode 21826: Reward = 113.000000, Mean reward (over 100 episodes) = 131.670000
Episode 21827: Reward = 153.000000, Mean reward (over 100 episodes) = 131.740000
Episode 21828: Reward = 112.000000, Mean reward (over 100 episodes) = 131.270000
Episode 21829: Reward = 139.000000, Mean reward (over 100 episodes) = 131.350000
Episode 21830: Reward = 126.000000, Mean reward (over 100 episodes) = 131.510000
Episode 21831: Reward = 119.000000, Mean reward (over 100 episodes) = 131.660000
Episode 21832: Reward = 139.000000, Mean reward (over 100 episodes) = 131.920000
Episode 21833: Reward = 129.000000, Mean reward (over 100 episodes) = 131.710000
Episode 21834: Reward = 170.000000, Mean reward (over 100 episodes) = 131.410000
Episode 21835: Reward = 130.

Episode 21925: Reward = 145.000000, Mean reward (over 100 episodes) = 131.230000
Episode 21926: Reward = 109.000000, Mean reward (over 100 episodes) = 131.190000
Episode 21927: Reward = 104.000000, Mean reward (over 100 episodes) = 130.700000
Episode 21928: Reward = 126.000000, Mean reward (over 100 episodes) = 130.840000
Episode 21929: Reward = 130.000000, Mean reward (over 100 episodes) = 130.750000
Episode 21930: Reward = 143.000000, Mean reward (over 100 episodes) = 130.920000
Episode 21931: Reward = 128.000000, Mean reward (over 100 episodes) = 131.010000
Episode 21932: Reward = 106.000000, Mean reward (over 100 episodes) = 130.680000
Episode 21933: Reward = 123.000000, Mean reward (over 100 episodes) = 130.620000
Episode 21934: Reward = 94.000000, Mean reward (over 100 episodes) = 129.860000
Episode 21935: Reward = 138.000000, Mean reward (over 100 episodes) = 129.940000
Episode 21936: Reward = 120.000000, Mean reward (over 100 episodes) = 129.730000
Episode 21937: Reward = 112.0

Episode 22029: Reward = 118.000000, Mean reward (over 100 episodes) = 126.380000
Episode 22030: Reward = 110.000000, Mean reward (over 100 episodes) = 126.050000
Episode 22031: Reward = 115.000000, Mean reward (over 100 episodes) = 125.920000
Episode 22032: Reward = 143.000000, Mean reward (over 100 episodes) = 126.290000
Episode 22033: Reward = 112.000000, Mean reward (over 100 episodes) = 126.180000
Episode 22034: Reward = 116.000000, Mean reward (over 100 episodes) = 126.400000
Episode 22035: Reward = 124.000000, Mean reward (over 100 episodes) = 126.260000
Episode 22036: Reward = 130.000000, Mean reward (over 100 episodes) = 126.360000
Episode 22037: Reward = 141.000000, Mean reward (over 100 episodes) = 126.650000
Episode 22038: Reward = 141.000000, Mean reward (over 100 episodes) = 127.070000
Episode 22039: Reward = 140.000000, Mean reward (over 100 episodes) = 127.060000
Episode 22040: Reward = 119.000000, Mean reward (over 100 episodes) = 127.180000
Episode 22041: Reward = 147.

Episode 22133: Reward = 113.000000, Mean reward (over 100 episodes) = 129.730000
Episode 22134: Reward = 140.000000, Mean reward (over 100 episodes) = 129.970000
Episode 22135: Reward = 142.000000, Mean reward (over 100 episodes) = 130.150000
Episode 22136: Reward = 104.000000, Mean reward (over 100 episodes) = 129.890000
Episode 22137: Reward = 140.000000, Mean reward (over 100 episodes) = 129.880000
Episode 22138: Reward = 137.000000, Mean reward (over 100 episodes) = 129.840000
Episode 22139: Reward = 117.000000, Mean reward (over 100 episodes) = 129.610000
Episode 22140: Reward = 124.000000, Mean reward (over 100 episodes) = 129.660000
Episode 22141: Reward = 135.000000, Mean reward (over 100 episodes) = 129.540000
Episode 22142: Reward = 116.000000, Mean reward (over 100 episodes) = 129.530000
Episode 22143: Reward = 113.000000, Mean reward (over 100 episodes) = 129.370000
Episode 22144: Reward = 137.000000, Mean reward (over 100 episodes) = 129.530000
Episode 22145: Reward = 107.

Episode 22237: Reward = 112.000000, Mean reward (over 100 episodes) = 128.000000
Episode 22238: Reward = 134.000000, Mean reward (over 100 episodes) = 127.970000
Episode 22239: Reward = 124.000000, Mean reward (over 100 episodes) = 128.040000
Episode 22240: Reward = 145.000000, Mean reward (over 100 episodes) = 128.250000
Episode 22241: Reward = 127.000000, Mean reward (over 100 episodes) = 128.170000
Episode 22242: Reward = 114.000000, Mean reward (over 100 episodes) = 128.150000
Episode 22243: Reward = 107.000000, Mean reward (over 100 episodes) = 128.090000
Episode 22244: Reward = 131.000000, Mean reward (over 100 episodes) = 128.030000
Episode 22245: Reward = 121.000000, Mean reward (over 100 episodes) = 128.170000
Episode 22246: Reward = 133.000000, Mean reward (over 100 episodes) = 128.210000
Episode 22247: Reward = 107.000000, Mean reward (over 100 episodes) = 127.880000
Episode 22248: Reward = 104.000000, Mean reward (over 100 episodes) = 127.860000
Episode 22249: Reward = 124.

Episode 22341: Reward = 149.000000, Mean reward (over 100 episodes) = 124.050000
Episode 22342: Reward = 104.000000, Mean reward (over 100 episodes) = 123.950000
Episode 22343: Reward = 120.000000, Mean reward (over 100 episodes) = 124.080000
Episode 22344: Reward = 115.000000, Mean reward (over 100 episodes) = 123.920000
Episode 22345: Reward = 125.000000, Mean reward (over 100 episodes) = 123.960000
Episode 22346: Reward = 95.000000, Mean reward (over 100 episodes) = 123.580000
Episode 22347: Reward = 139.000000, Mean reward (over 100 episodes) = 123.900000
Episode 22348: Reward = 144.000000, Mean reward (over 100 episodes) = 124.300000
Episode 22349: Reward = 123.000000, Mean reward (over 100 episodes) = 124.290000
Episode 22350: Reward = 115.000000, Mean reward (over 100 episodes) = 124.230000
Episode 22351: Reward = 127.000000, Mean reward (over 100 episodes) = 124.270000
Episode 22352: Reward = 121.000000, Mean reward (over 100 episodes) = 124.170000
Episode 22353: Reward = 128.0

Episode 22446: Reward = 128.000000, Mean reward (over 100 episodes) = 122.160000
Episode 22447: Reward = 154.000000, Mean reward (over 100 episodes) = 122.310000
Episode 22448: Reward = 104.000000, Mean reward (over 100 episodes) = 121.910000
Episode 22449: Reward = 101.000000, Mean reward (over 100 episodes) = 121.690000
Episode 22450: Reward = 145.000000, Mean reward (over 100 episodes) = 121.990000
Episode 22451: Reward = 125.000000, Mean reward (over 100 episodes) = 121.970000
Episode 22452: Reward = 120.000000, Mean reward (over 100 episodes) = 121.960000
Episode 22453: Reward = 139.000000, Mean reward (over 100 episodes) = 122.070000
Episode 22454: Reward = 132.000000, Mean reward (over 100 episodes) = 122.370000
Episode 22455: Reward = 101.000000, Mean reward (over 100 episodes) = 122.210000
Episode 22456: Reward = 121.000000, Mean reward (over 100 episodes) = 122.430000
Episode 22457: Reward = 118.000000, Mean reward (over 100 episodes) = 122.430000
Episode 22458: Reward = 122.

Episode 22550: Reward = 107.000000, Mean reward (over 100 episodes) = 120.880000
Episode 22551: Reward = 103.000000, Mean reward (over 100 episodes) = 120.660000
Episode 22552: Reward = 141.000000, Mean reward (over 100 episodes) = 120.870000
Episode 22553: Reward = 112.000000, Mean reward (over 100 episodes) = 120.600000
Episode 22554: Reward = 159.000000, Mean reward (over 100 episodes) = 120.870000
Episode 22555: Reward = 109.000000, Mean reward (over 100 episodes) = 120.950000
Episode 22556: Reward = 104.000000, Mean reward (over 100 episodes) = 120.780000
Episode 22557: Reward = 126.000000, Mean reward (over 100 episodes) = 120.860000
Episode 22558: Reward = 160.000000, Mean reward (over 100 episodes) = 121.240000
Episode 22559: Reward = 126.000000, Mean reward (over 100 episodes) = 121.440000
Episode 22560: Reward = 129.000000, Mean reward (over 100 episodes) = 121.500000
Episode 22561: Reward = 122.000000, Mean reward (over 100 episodes) = 121.130000
Episode 22562: Reward = 113.

Episode 22655: Reward = 96.000000, Mean reward (over 100 episodes) = 121.940000
Episode 22656: Reward = 107.000000, Mean reward (over 100 episodes) = 121.970000
Episode 22657: Reward = 100.000000, Mean reward (over 100 episodes) = 121.710000
Episode 22658: Reward = 115.000000, Mean reward (over 100 episodes) = 121.260000
Episode 22659: Reward = 131.000000, Mean reward (over 100 episodes) = 121.310000
Episode 22660: Reward = 110.000000, Mean reward (over 100 episodes) = 121.120000
Episode 22661: Reward = 126.000000, Mean reward (over 100 episodes) = 121.160000
Episode 22662: Reward = 163.000000, Mean reward (over 100 episodes) = 121.660000
Episode 22663: Reward = 111.000000, Mean reward (over 100 episodes) = 121.510000
Episode 22664: Reward = 105.000000, Mean reward (over 100 episodes) = 121.300000
Episode 22665: Reward = 110.000000, Mean reward (over 100 episodes) = 121.210000
Episode 22666: Reward = 116.000000, Mean reward (over 100 episodes) = 121.190000
Episode 22667: Reward = 108.0

Episode 22760: Reward = 126.000000, Mean reward (over 100 episodes) = 123.740000
Episode 22761: Reward = 124.000000, Mean reward (over 100 episodes) = 123.720000
Episode 22762: Reward = 123.000000, Mean reward (over 100 episodes) = 123.320000
Episode 22763: Reward = 110.000000, Mean reward (over 100 episodes) = 123.310000
Episode 22764: Reward = 135.000000, Mean reward (over 100 episodes) = 123.610000
Episode 22765: Reward = 139.000000, Mean reward (over 100 episodes) = 123.900000
Episode 22766: Reward = 115.000000, Mean reward (over 100 episodes) = 123.890000
Episode 22767: Reward = 126.000000, Mean reward (over 100 episodes) = 124.070000
Episode 22768: Reward = 131.000000, Mean reward (over 100 episodes) = 124.280000
Episode 22769: Reward = 105.000000, Mean reward (over 100 episodes) = 123.920000
Episode 22770: Reward = 128.000000, Mean reward (over 100 episodes) = 124.010000
Episode 22771: Reward = 121.000000, Mean reward (over 100 episodes) = 123.840000
Episode 22772: Reward = 124.

Episode 22863: Reward = 200.000000, Mean reward (over 100 episodes) = 134.200000
Episode 22864: Reward = 175.000000, Mean reward (over 100 episodes) = 134.600000
Episode 22865: Reward = 129.000000, Mean reward (over 100 episodes) = 134.500000
Episode 22866: Reward = 151.000000, Mean reward (over 100 episodes) = 134.860000
Episode 22867: Reward = 106.000000, Mean reward (over 100 episodes) = 134.660000
Episode 22868: Reward = 136.000000, Mean reward (over 100 episodes) = 134.710000
Episode 22869: Reward = 119.000000, Mean reward (over 100 episodes) = 134.850000
Episode 22870: Reward = 144.000000, Mean reward (over 100 episodes) = 135.010000
Episode 22871: Reward = 177.000000, Mean reward (over 100 episodes) = 135.570000
Episode 22872: Reward = 114.000000, Mean reward (over 100 episodes) = 135.470000
Episode 22873: Reward = 129.000000, Mean reward (over 100 episodes) = 135.260000
Episode 22874: Reward = 132.000000, Mean reward (over 100 episodes) = 135.360000
Episode 22875: Reward = 127.

Episode 22968: Reward = 133.000000, Mean reward (over 100 episodes) = 129.680000
Episode 22969: Reward = 116.000000, Mean reward (over 100 episodes) = 129.650000
Episode 22970: Reward = 141.000000, Mean reward (over 100 episodes) = 129.620000
Episode 22971: Reward = 133.000000, Mean reward (over 100 episodes) = 129.180000
Episode 22972: Reward = 127.000000, Mean reward (over 100 episodes) = 129.310000
Episode 22973: Reward = 129.000000, Mean reward (over 100 episodes) = 129.310000
Episode 22974: Reward = 136.000000, Mean reward (over 100 episodes) = 129.350000
Episode 22975: Reward = 141.000000, Mean reward (over 100 episodes) = 129.490000
Episode 22976: Reward = 154.000000, Mean reward (over 100 episodes) = 129.700000
Episode 22977: Reward = 144.000000, Mean reward (over 100 episodes) = 129.940000
Episode 22978: Reward = 122.000000, Mean reward (over 100 episodes) = 129.940000
Episode 22979: Reward = 110.000000, Mean reward (over 100 episodes) = 129.870000
Episode 22980: Reward = 104.

Episode 23072: Reward = 103.000000, Mean reward (over 100 episodes) = 125.580000
Episode 23073: Reward = 128.000000, Mean reward (over 100 episodes) = 125.570000
Episode 23074: Reward = 118.000000, Mean reward (over 100 episodes) = 125.390000
Episode 23075: Reward = 119.000000, Mean reward (over 100 episodes) = 125.170000
Episode 23076: Reward = 119.000000, Mean reward (over 100 episodes) = 124.820000
Episode 23077: Reward = 124.000000, Mean reward (over 100 episodes) = 124.620000
Episode 23078: Reward = 135.000000, Mean reward (over 100 episodes) = 124.750000
Episode 23079: Reward = 128.000000, Mean reward (over 100 episodes) = 124.930000
Episode 23080: Reward = 147.000000, Mean reward (over 100 episodes) = 125.360000
Episode 23081: Reward = 121.000000, Mean reward (over 100 episodes) = 125.290000
Episode 23082: Reward = 121.000000, Mean reward (over 100 episodes) = 125.330000
Episode 23083: Reward = 116.000000, Mean reward (over 100 episodes) = 125.110000
Episode 23084: Reward = 111.

Episode 23175: Reward = 142.000000, Mean reward (over 100 episodes) = 128.990000
Episode 23176: Reward = 120.000000, Mean reward (over 100 episodes) = 129.000000
Episode 23177: Reward = 127.000000, Mean reward (over 100 episodes) = 129.030000
Episode 23178: Reward = 115.000000, Mean reward (over 100 episodes) = 128.830000
Episode 23179: Reward = 164.000000, Mean reward (over 100 episodes) = 129.190000
Episode 23180: Reward = 132.000000, Mean reward (over 100 episodes) = 129.040000
Episode 23181: Reward = 119.000000, Mean reward (over 100 episodes) = 129.020000
Episode 23182: Reward = 124.000000, Mean reward (over 100 episodes) = 129.050000
Episode 23183: Reward = 129.000000, Mean reward (over 100 episodes) = 129.180000
Episode 23184: Reward = 109.000000, Mean reward (over 100 episodes) = 129.160000
Episode 23185: Reward = 147.000000, Mean reward (over 100 episodes) = 129.230000
Episode 23186: Reward = 129.000000, Mean reward (over 100 episodes) = 129.020000
Episode 23187: Reward = 114.

Episode 23277: Reward = 102.000000, Mean reward (over 100 episodes) = 128.060000
Episode 23278: Reward = 132.000000, Mean reward (over 100 episodes) = 128.230000
Episode 23279: Reward = 127.000000, Mean reward (over 100 episodes) = 127.860000
Episode 23280: Reward = 133.000000, Mean reward (over 100 episodes) = 127.870000
Episode 23281: Reward = 115.000000, Mean reward (over 100 episodes) = 127.830000
Episode 23282: Reward = 130.000000, Mean reward (over 100 episodes) = 127.890000
Episode 23283: Reward = 137.000000, Mean reward (over 100 episodes) = 127.970000
Episode 23284: Reward = 110.000000, Mean reward (over 100 episodes) = 127.980000
Episode 23285: Reward = 154.000000, Mean reward (over 100 episodes) = 128.050000
Episode 23286: Reward = 109.000000, Mean reward (over 100 episodes) = 127.850000
Episode 23287: Reward = 133.000000, Mean reward (over 100 episodes) = 128.040000
Episode 23288: Reward = 130.000000, Mean reward (over 100 episodes) = 128.140000
Episode 23289: Reward = 137.

Episode 23381: Reward = 133.000000, Mean reward (over 100 episodes) = 125.790000
Episode 23382: Reward = 101.000000, Mean reward (over 100 episodes) = 125.500000
Episode 23383: Reward = 127.000000, Mean reward (over 100 episodes) = 125.400000
Episode 23384: Reward = 115.000000, Mean reward (over 100 episodes) = 125.450000
Episode 23385: Reward = 143.000000, Mean reward (over 100 episodes) = 125.340000
Episode 23386: Reward = 108.000000, Mean reward (over 100 episodes) = 125.330000
Episode 23387: Reward = 134.000000, Mean reward (over 100 episodes) = 125.340000
Episode 23388: Reward = 140.000000, Mean reward (over 100 episodes) = 125.440000
Episode 23389: Reward = 138.000000, Mean reward (over 100 episodes) = 125.450000
Episode 23390: Reward = 135.000000, Mean reward (over 100 episodes) = 125.480000
Episode 23391: Reward = 117.000000, Mean reward (over 100 episodes) = 125.470000
Episode 23392: Reward = 160.000000, Mean reward (over 100 episodes) = 125.870000
Episode 23393: Reward = 124.

Episode 23485: Reward = 153.000000, Mean reward (over 100 episodes) = 126.790000
Episode 23486: Reward = 105.000000, Mean reward (over 100 episodes) = 126.760000
Episode 23487: Reward = 115.000000, Mean reward (over 100 episodes) = 126.570000
Episode 23488: Reward = 132.000000, Mean reward (over 100 episodes) = 126.490000
Episode 23489: Reward = 106.000000, Mean reward (over 100 episodes) = 126.170000
Episode 23490: Reward = 124.000000, Mean reward (over 100 episodes) = 126.060000
Episode 23491: Reward = 151.000000, Mean reward (over 100 episodes) = 126.400000
Episode 23492: Reward = 104.000000, Mean reward (over 100 episodes) = 125.840000
Episode 23493: Reward = 131.000000, Mean reward (over 100 episodes) = 125.910000
Episode 23494: Reward = 169.000000, Mean reward (over 100 episodes) = 126.320000
Episode 23495: Reward = 119.000000, Mean reward (over 100 episodes) = 126.430000
Episode 23496: Reward = 134.000000, Mean reward (over 100 episodes) = 126.640000
Episode 23497: Reward = 111.

Episode 23589: Reward = 112.000000, Mean reward (over 100 episodes) = 125.500000
Episode 23590: Reward = 120.000000, Mean reward (over 100 episodes) = 125.460000
Episode 23591: Reward = 109.000000, Mean reward (over 100 episodes) = 125.040000
Episode 23592: Reward = 125.000000, Mean reward (over 100 episodes) = 125.250000
Episode 23593: Reward = 125.000000, Mean reward (over 100 episodes) = 125.190000
Episode 23594: Reward = 122.000000, Mean reward (over 100 episodes) = 124.720000
Episode 23595: Reward = 114.000000, Mean reward (over 100 episodes) = 124.670000
Episode 23596: Reward = 104.000000, Mean reward (over 100 episodes) = 124.370000
Episode 23597: Reward = 136.000000, Mean reward (over 100 episodes) = 124.620000
Episode 23598: Reward = 137.000000, Mean reward (over 100 episodes) = 124.760000
Episode 23599: Reward = 121.000000, Mean reward (over 100 episodes) = 124.590000
Episode 23600: Reward = 139.000000, Mean reward (over 100 episodes) = 124.720000
Episode 23601: Reward = 150.

Episode 23693: Reward = 121.000000, Mean reward (over 100 episodes) = 126.520000
Episode 23694: Reward = 146.000000, Mean reward (over 100 episodes) = 126.760000
Episode 23695: Reward = 123.000000, Mean reward (over 100 episodes) = 126.850000
Episode 23696: Reward = 131.000000, Mean reward (over 100 episodes) = 127.120000
Episode 23697: Reward = 103.000000, Mean reward (over 100 episodes) = 126.790000
Episode 23698: Reward = 131.000000, Mean reward (over 100 episodes) = 126.730000
Episode 23699: Reward = 149.000000, Mean reward (over 100 episodes) = 127.010000
Episode 23700: Reward = 115.000000, Mean reward (over 100 episodes) = 126.770000
Episode 23701: Reward = 124.000000, Mean reward (over 100 episodes) = 126.510000
Episode 23702: Reward = 132.000000, Mean reward (over 100 episodes) = 126.700000
Episode 23703: Reward = 146.000000, Mean reward (over 100 episodes) = 126.700000
Episode 23704: Reward = 121.000000, Mean reward (over 100 episodes) = 126.680000
Episode 23705: Reward = 106.

Episode 23796: Reward = 159.000000, Mean reward (over 100 episodes) = 127.130000
Episode 23797: Reward = 103.000000, Mean reward (over 100 episodes) = 127.130000
Episode 23798: Reward = 131.000000, Mean reward (over 100 episodes) = 127.130000
Episode 23799: Reward = 122.000000, Mean reward (over 100 episodes) = 126.860000
Episode 23800: Reward = 143.000000, Mean reward (over 100 episodes) = 127.140000
Episode 23801: Reward = 153.000000, Mean reward (over 100 episodes) = 127.430000
Episode 23802: Reward = 112.000000, Mean reward (over 100 episodes) = 127.230000
Episode 23803: Reward = 120.000000, Mean reward (over 100 episodes) = 126.970000
Episode 23804: Reward = 124.000000, Mean reward (over 100 episodes) = 127.000000
Episode 23805: Reward = 119.000000, Mean reward (over 100 episodes) = 127.130000
Episode 23806: Reward = 107.000000, Mean reward (over 100 episodes) = 127.000000
Episode 23807: Reward = 116.000000, Mean reward (over 100 episodes) = 126.720000
Episode 23808: Reward = 117.

Episode 23901: Reward = 131.000000, Mean reward (over 100 episodes) = 119.940000
Episode 23902: Reward = 149.000000, Mean reward (over 100 episodes) = 120.310000
Episode 23903: Reward = 114.000000, Mean reward (over 100 episodes) = 120.250000
Episode 23904: Reward = 113.000000, Mean reward (over 100 episodes) = 120.140000
Episode 23905: Reward = 128.000000, Mean reward (over 100 episodes) = 120.230000
Episode 23906: Reward = 124.000000, Mean reward (over 100 episodes) = 120.400000
Episode 23907: Reward = 119.000000, Mean reward (over 100 episodes) = 120.430000
Episode 23908: Reward = 107.000000, Mean reward (over 100 episodes) = 120.330000
Episode 23909: Reward = 108.000000, Mean reward (over 100 episodes) = 120.290000
Episode 23910: Reward = 128.000000, Mean reward (over 100 episodes) = 120.490000
Episode 23911: Reward = 135.000000, Mean reward (over 100 episodes) = 120.610000
Episode 23912: Reward = 116.000000, Mean reward (over 100 episodes) = 120.600000
Episode 23913: Reward = 148.

Episode 24004: Reward = 106.000000, Mean reward (over 100 episodes) = 127.000000
Episode 24005: Reward = 132.000000, Mean reward (over 100 episodes) = 127.040000
Episode 24006: Reward = 144.000000, Mean reward (over 100 episodes) = 127.240000
Episode 24007: Reward = 130.000000, Mean reward (over 100 episodes) = 127.350000
Episode 24008: Reward = 127.000000, Mean reward (over 100 episodes) = 127.550000
Episode 24009: Reward = 99.000000, Mean reward (over 100 episodes) = 127.460000
Episode 24010: Reward = 130.000000, Mean reward (over 100 episodes) = 127.480000
Episode 24011: Reward = 121.000000, Mean reward (over 100 episodes) = 127.340000
Episode 24012: Reward = 116.000000, Mean reward (over 100 episodes) = 127.340000
Episode 24013: Reward = 106.000000, Mean reward (over 100 episodes) = 126.920000
Episode 24014: Reward = 115.000000, Mean reward (over 100 episodes) = 126.820000
Episode 24015: Reward = 119.000000, Mean reward (over 100 episodes) = 126.670000
Episode 24016: Reward = 142.0

Episode 24108: Reward = 108.000000, Mean reward (over 100 episodes) = 125.030000
Episode 24109: Reward = 123.000000, Mean reward (over 100 episodes) = 125.270000
Episode 24110: Reward = 135.000000, Mean reward (over 100 episodes) = 125.320000
Episode 24111: Reward = 137.000000, Mean reward (over 100 episodes) = 125.480000
Episode 24112: Reward = 112.000000, Mean reward (over 100 episodes) = 125.440000
Episode 24113: Reward = 128.000000, Mean reward (over 100 episodes) = 125.660000
Episode 24114: Reward = 134.000000, Mean reward (over 100 episodes) = 125.850000
Episode 24115: Reward = 139.000000, Mean reward (over 100 episodes) = 126.050000
Episode 24116: Reward = 152.000000, Mean reward (over 100 episodes) = 126.150000
Episode 24117: Reward = 148.000000, Mean reward (over 100 episodes) = 126.370000
Episode 24118: Reward = 138.000000, Mean reward (over 100 episodes) = 126.330000
Episode 24119: Reward = 124.000000, Mean reward (over 100 episodes) = 126.280000
Episode 24120: Reward = 122.

Episode 24212: Reward = 135.000000, Mean reward (over 100 episodes) = 131.480000
Episode 24213: Reward = 134.000000, Mean reward (over 100 episodes) = 131.540000
Episode 24214: Reward = 113.000000, Mean reward (over 100 episodes) = 131.330000
Episode 24215: Reward = 150.000000, Mean reward (over 100 episodes) = 131.440000
Episode 24216: Reward = 143.000000, Mean reward (over 100 episodes) = 131.350000
Episode 24217: Reward = 132.000000, Mean reward (over 100 episodes) = 131.190000
Episode 24218: Reward = 111.000000, Mean reward (over 100 episodes) = 130.920000
Episode 24219: Reward = 129.000000, Mean reward (over 100 episodes) = 130.970000
Episode 24220: Reward = 113.000000, Mean reward (over 100 episodes) = 130.880000
Episode 24221: Reward = 135.000000, Mean reward (over 100 episodes) = 131.150000
Episode 24222: Reward = 127.000000, Mean reward (over 100 episodes) = 131.180000
Episode 24223: Reward = 156.000000, Mean reward (over 100 episodes) = 131.610000
Episode 24224: Reward = 113.

Episode 24314: Reward = 122.000000, Mean reward (over 100 episodes) = 131.310000
Episode 24315: Reward = 133.000000, Mean reward (over 100 episodes) = 131.140000
Episode 24316: Reward = 134.000000, Mean reward (over 100 episodes) = 131.050000
Episode 24317: Reward = 115.000000, Mean reward (over 100 episodes) = 130.880000
Episode 24318: Reward = 142.000000, Mean reward (over 100 episodes) = 131.190000
Episode 24319: Reward = 131.000000, Mean reward (over 100 episodes) = 131.210000
Episode 24320: Reward = 140.000000, Mean reward (over 100 episodes) = 131.480000
Episode 24321: Reward = 141.000000, Mean reward (over 100 episodes) = 131.540000
Episode 24322: Reward = 166.000000, Mean reward (over 100 episodes) = 131.930000
Episode 24323: Reward = 124.000000, Mean reward (over 100 episodes) = 131.610000
Episode 24324: Reward = 118.000000, Mean reward (over 100 episodes) = 131.660000
Episode 24325: Reward = 124.000000, Mean reward (over 100 episodes) = 131.670000
Episode 24326: Reward = 128.

Episode 24417: Reward = 144.000000, Mean reward (over 100 episodes) = 127.240000
Episode 24418: Reward = 133.000000, Mean reward (over 100 episodes) = 127.150000
Episode 24419: Reward = 133.000000, Mean reward (over 100 episodes) = 127.170000
Episode 24420: Reward = 138.000000, Mean reward (over 100 episodes) = 127.150000
Episode 24421: Reward = 132.000000, Mean reward (over 100 episodes) = 127.060000
Episode 24422: Reward = 100.000000, Mean reward (over 100 episodes) = 126.400000
Episode 24423: Reward = 132.000000, Mean reward (over 100 episodes) = 126.480000
Episode 24424: Reward = 125.000000, Mean reward (over 100 episodes) = 126.550000
Episode 24425: Reward = 116.000000, Mean reward (over 100 episodes) = 126.470000
Episode 24426: Reward = 135.000000, Mean reward (over 100 episodes) = 126.540000
Episode 24427: Reward = 109.000000, Mean reward (over 100 episodes) = 126.300000
Episode 24428: Reward = 135.000000, Mean reward (over 100 episodes) = 125.870000
Episode 24429: Reward = 119.

Episode 24521: Reward = 128.000000, Mean reward (over 100 episodes) = 123.850000
Episode 24522: Reward = 114.000000, Mean reward (over 100 episodes) = 123.990000
Episode 24523: Reward = 134.000000, Mean reward (over 100 episodes) = 124.010000
Episode 24524: Reward = 137.000000, Mean reward (over 100 episodes) = 124.130000
Episode 24525: Reward = 165.000000, Mean reward (over 100 episodes) = 124.620000
Episode 24526: Reward = 125.000000, Mean reward (over 100 episodes) = 124.520000
Episode 24527: Reward = 95.000000, Mean reward (over 100 episodes) = 124.380000
Episode 24528: Reward = 129.000000, Mean reward (over 100 episodes) = 124.320000
Episode 24529: Reward = 141.000000, Mean reward (over 100 episodes) = 124.540000
Episode 24530: Reward = 153.000000, Mean reward (over 100 episodes) = 124.830000
Episode 24531: Reward = 130.000000, Mean reward (over 100 episodes) = 124.950000
Episode 24532: Reward = 136.000000, Mean reward (over 100 episodes) = 125.040000
Episode 24533: Reward = 122.0

Episode 24625: Reward = 124.000000, Mean reward (over 100 episodes) = 125.410000
Episode 24626: Reward = 119.000000, Mean reward (over 100 episodes) = 125.350000
Episode 24627: Reward = 113.000000, Mean reward (over 100 episodes) = 125.530000
Episode 24628: Reward = 117.000000, Mean reward (over 100 episodes) = 125.410000
Episode 24629: Reward = 123.000000, Mean reward (over 100 episodes) = 125.230000
Episode 24630: Reward = 113.000000, Mean reward (over 100 episodes) = 124.830000
Episode 24631: Reward = 115.000000, Mean reward (over 100 episodes) = 124.680000
Episode 24632: Reward = 137.000000, Mean reward (over 100 episodes) = 124.690000
Episode 24633: Reward = 109.000000, Mean reward (over 100 episodes) = 124.560000
Episode 24634: Reward = 133.000000, Mean reward (over 100 episodes) = 124.660000
Episode 24635: Reward = 162.000000, Mean reward (over 100 episodes) = 125.070000
Episode 24636: Reward = 107.000000, Mean reward (over 100 episodes) = 124.880000
Episode 24637: Reward = 112.

Episode 24729: Reward = 138.000000, Mean reward (over 100 episodes) = 128.550000
Episode 24730: Reward = 143.000000, Mean reward (over 100 episodes) = 128.850000
Episode 24731: Reward = 132.000000, Mean reward (over 100 episodes) = 129.020000
Episode 24732: Reward = 126.000000, Mean reward (over 100 episodes) = 128.910000
Episode 24733: Reward = 115.000000, Mean reward (over 100 episodes) = 128.970000
Episode 24734: Reward = 149.000000, Mean reward (over 100 episodes) = 129.130000
Episode 24735: Reward = 112.000000, Mean reward (over 100 episodes) = 128.630000
Episode 24736: Reward = 105.000000, Mean reward (over 100 episodes) = 128.610000
Episode 24737: Reward = 132.000000, Mean reward (over 100 episodes) = 128.810000
Episode 24738: Reward = 131.000000, Mean reward (over 100 episodes) = 128.760000
Episode 24739: Reward = 128.000000, Mean reward (over 100 episodes) = 128.710000
Episode 24740: Reward = 200.000000, Mean reward (over 100 episodes) = 129.230000
Episode 24741: Reward = 136.

Episode 24832: Reward = 145.000000, Mean reward (over 100 episodes) = 130.110000
Episode 24833: Reward = 157.000000, Mean reward (over 100 episodes) = 130.530000
Episode 24834: Reward = 109.000000, Mean reward (over 100 episodes) = 130.130000
Episode 24835: Reward = 105.000000, Mean reward (over 100 episodes) = 130.060000
Episode 24836: Reward = 138.000000, Mean reward (over 100 episodes) = 130.390000
Episode 24837: Reward = 125.000000, Mean reward (over 100 episodes) = 130.320000
Episode 24838: Reward = 118.000000, Mean reward (over 100 episodes) = 130.190000
Episode 24839: Reward = 123.000000, Mean reward (over 100 episodes) = 130.140000
Episode 24840: Reward = 114.000000, Mean reward (over 100 episodes) = 129.280000
Episode 24841: Reward = 122.000000, Mean reward (over 100 episodes) = 129.140000
Episode 24842: Reward = 114.000000, Mean reward (over 100 episodes) = 129.120000
Episode 24843: Reward = 134.000000, Mean reward (over 100 episodes) = 129.280000
Episode 24844: Reward = 129.

Episode 24935: Reward = 136.000000, Mean reward (over 100 episodes) = 129.040000
Episode 24936: Reward = 117.000000, Mean reward (over 100 episodes) = 128.830000
Episode 24937: Reward = 128.000000, Mean reward (over 100 episodes) = 128.860000
Episode 24938: Reward = 200.000000, Mean reward (over 100 episodes) = 129.680000
Episode 24939: Reward = 118.000000, Mean reward (over 100 episodes) = 129.630000
Episode 24940: Reward = 137.000000, Mean reward (over 100 episodes) = 129.860000
Episode 24941: Reward = 152.000000, Mean reward (over 100 episodes) = 130.160000
Episode 24942: Reward = 129.000000, Mean reward (over 100 episodes) = 130.310000
Episode 24943: Reward = 115.000000, Mean reward (over 100 episodes) = 130.120000
Episode 24944: Reward = 127.000000, Mean reward (over 100 episodes) = 130.100000
Episode 24945: Reward = 107.000000, Mean reward (over 100 episodes) = 129.860000
Episode 24946: Reward = 121.000000, Mean reward (over 100 episodes) = 129.890000
Episode 24947: Reward = 111.

Episode 25039: Reward = 138.000000, Mean reward (over 100 episodes) = 125.540000
Episode 25040: Reward = 112.000000, Mean reward (over 100 episodes) = 125.290000
Episode 25041: Reward = 127.000000, Mean reward (over 100 episodes) = 125.040000
Episode 25042: Reward = 120.000000, Mean reward (over 100 episodes) = 124.950000
Episode 25043: Reward = 129.000000, Mean reward (over 100 episodes) = 125.090000
Episode 25044: Reward = 105.000000, Mean reward (over 100 episodes) = 124.870000
Episode 25045: Reward = 140.000000, Mean reward (over 100 episodes) = 125.200000
Episode 25046: Reward = 137.000000, Mean reward (over 100 episodes) = 125.360000
Episode 25047: Reward = 110.000000, Mean reward (over 100 episodes) = 125.350000
Episode 25048: Reward = 111.000000, Mean reward (over 100 episodes) = 125.310000
Episode 25049: Reward = 122.000000, Mean reward (over 100 episodes) = 125.240000
Episode 25050: Reward = 125.000000, Mean reward (over 100 episodes) = 125.350000
Episode 25051: Reward = 155.

Episode 25143: Reward = 121.000000, Mean reward (over 100 episodes) = 124.480000
Episode 25144: Reward = 129.000000, Mean reward (over 100 episodes) = 124.720000
Episode 25145: Reward = 119.000000, Mean reward (over 100 episodes) = 124.510000
Episode 25146: Reward = 129.000000, Mean reward (over 100 episodes) = 124.430000
Episode 25147: Reward = 125.000000, Mean reward (over 100 episodes) = 124.580000
Episode 25148: Reward = 128.000000, Mean reward (over 100 episodes) = 124.750000
Episode 25149: Reward = 123.000000, Mean reward (over 100 episodes) = 124.760000
Episode 25150: Reward = 126.000000, Mean reward (over 100 episodes) = 124.770000
Episode 25151: Reward = 175.000000, Mean reward (over 100 episodes) = 124.970000
Episode 25152: Reward = 139.000000, Mean reward (over 100 episodes) = 125.240000
Episode 25153: Reward = 100.000000, Mean reward (over 100 episodes) = 125.000000
Episode 25154: Reward = 110.000000, Mean reward (over 100 episodes) = 124.970000
Episode 25155: Reward = 123.

Episode 25246: Reward = 108.000000, Mean reward (over 100 episodes) = 127.250000
Episode 25247: Reward = 122.000000, Mean reward (over 100 episodes) = 127.220000
Episode 25248: Reward = 140.000000, Mean reward (over 100 episodes) = 127.340000
Episode 25249: Reward = 122.000000, Mean reward (over 100 episodes) = 127.330000
Episode 25250: Reward = 137.000000, Mean reward (over 100 episodes) = 127.440000
Episode 25251: Reward = 144.000000, Mean reward (over 100 episodes) = 127.130000
Episode 25252: Reward = 118.000000, Mean reward (over 100 episodes) = 126.920000
Episode 25253: Reward = 132.000000, Mean reward (over 100 episodes) = 127.240000
Episode 25254: Reward = 145.000000, Mean reward (over 100 episodes) = 127.590000
Episode 25255: Reward = 115.000000, Mean reward (over 100 episodes) = 127.510000
Episode 25256: Reward = 119.000000, Mean reward (over 100 episodes) = 127.100000
Episode 25257: Reward = 139.000000, Mean reward (over 100 episodes) = 127.300000
Episode 25258: Reward = 160.

Episode 25349: Reward = 144.000000, Mean reward (over 100 episodes) = 136.140000
Episode 25350: Reward = 131.000000, Mean reward (over 100 episodes) = 136.080000
Episode 25351: Reward = 142.000000, Mean reward (over 100 episodes) = 136.060000
Episode 25352: Reward = 139.000000, Mean reward (over 100 episodes) = 136.270000
Episode 25353: Reward = 142.000000, Mean reward (over 100 episodes) = 136.370000
Episode 25354: Reward = 140.000000, Mean reward (over 100 episodes) = 136.320000
Episode 25355: Reward = 126.000000, Mean reward (over 100 episodes) = 136.430000
Episode 25356: Reward = 109.000000, Mean reward (over 100 episodes) = 136.330000
Episode 25357: Reward = 119.000000, Mean reward (over 100 episodes) = 136.130000
Episode 25358: Reward = 146.000000, Mean reward (over 100 episodes) = 135.990000
Episode 25359: Reward = 139.000000, Mean reward (over 100 episodes) = 136.100000
Episode 25360: Reward = 132.000000, Mean reward (over 100 episodes) = 135.780000
Episode 25361: Reward = 150.

Episode 25453: Reward = 127.000000, Mean reward (over 100 episodes) = 131.490000
Episode 25454: Reward = 125.000000, Mean reward (over 100 episodes) = 131.340000
Episode 25455: Reward = 127.000000, Mean reward (over 100 episodes) = 131.350000
Episode 25456: Reward = 119.000000, Mean reward (over 100 episodes) = 131.450000
Episode 25457: Reward = 115.000000, Mean reward (over 100 episodes) = 131.410000
Episode 25458: Reward = 144.000000, Mean reward (over 100 episodes) = 131.390000
Episode 25459: Reward = 148.000000, Mean reward (over 100 episodes) = 131.480000
Episode 25460: Reward = 129.000000, Mean reward (over 100 episodes) = 131.450000
Episode 25461: Reward = 138.000000, Mean reward (over 100 episodes) = 131.330000
Episode 25462: Reward = 130.000000, Mean reward (over 100 episodes) = 131.410000
Episode 25463: Reward = 135.000000, Mean reward (over 100 episodes) = 131.320000
Episode 25464: Reward = 114.000000, Mean reward (over 100 episodes) = 131.260000
Episode 25465: Reward = 130.

Episode 25556: Reward = 126.000000, Mean reward (over 100 episodes) = 130.910000
Episode 25557: Reward = 130.000000, Mean reward (over 100 episodes) = 131.060000
Episode 25558: Reward = 121.000000, Mean reward (over 100 episodes) = 130.830000
Episode 25559: Reward = 108.000000, Mean reward (over 100 episodes) = 130.430000
Episode 25560: Reward = 146.000000, Mean reward (over 100 episodes) = 130.600000
Episode 25561: Reward = 131.000000, Mean reward (over 100 episodes) = 130.530000
Episode 25562: Reward = 200.000000, Mean reward (over 100 episodes) = 131.230000
Episode 25563: Reward = 135.000000, Mean reward (over 100 episodes) = 131.230000
Episode 25564: Reward = 133.000000, Mean reward (over 100 episodes) = 131.420000
Episode 25565: Reward = 136.000000, Mean reward (over 100 episodes) = 131.480000
Episode 25566: Reward = 127.000000, Mean reward (over 100 episodes) = 131.370000
Episode 25567: Reward = 127.000000, Mean reward (over 100 episodes) = 131.450000
Episode 25568: Reward = 137.

Episode 25660: Reward = 138.000000, Mean reward (over 100 episodes) = 132.720000
Episode 25661: Reward = 128.000000, Mean reward (over 100 episodes) = 132.690000
Episode 25662: Reward = 128.000000, Mean reward (over 100 episodes) = 131.970000
Episode 25663: Reward = 136.000000, Mean reward (over 100 episodes) = 131.980000
Episode 25664: Reward = 111.000000, Mean reward (over 100 episodes) = 131.760000
Episode 25665: Reward = 135.000000, Mean reward (over 100 episodes) = 131.750000
Episode 25666: Reward = 138.000000, Mean reward (over 100 episodes) = 131.860000
Episode 25667: Reward = 145.000000, Mean reward (over 100 episodes) = 132.040000
Episode 25668: Reward = 200.000000, Mean reward (over 100 episodes) = 132.670000
Episode 25669: Reward = 137.000000, Mean reward (over 100 episodes) = 132.480000
Episode 25670: Reward = 106.000000, Mean reward (over 100 episodes) = 132.410000
Episode 25671: Reward = 126.000000, Mean reward (over 100 episodes) = 132.250000
Episode 25672: Reward = 120.

Episode 25763: Reward = 130.000000, Mean reward (over 100 episodes) = 131.010000
Episode 25764: Reward = 165.000000, Mean reward (over 100 episodes) = 131.550000
Episode 25765: Reward = 117.000000, Mean reward (over 100 episodes) = 131.370000
Episode 25766: Reward = 115.000000, Mean reward (over 100 episodes) = 131.140000
Episode 25767: Reward = 134.000000, Mean reward (over 100 episodes) = 131.030000
Episode 25768: Reward = 100.000000, Mean reward (over 100 episodes) = 130.030000
Episode 25769: Reward = 145.000000, Mean reward (over 100 episodes) = 130.110000
Episode 25770: Reward = 135.000000, Mean reward (over 100 episodes) = 130.400000
Episode 25771: Reward = 116.000000, Mean reward (over 100 episodes) = 130.300000
Episode 25772: Reward = 124.000000, Mean reward (over 100 episodes) = 130.340000
Episode 25773: Reward = 113.000000, Mean reward (over 100 episodes) = 130.460000
Episode 25774: Reward = 146.000000, Mean reward (over 100 episodes) = 130.610000
Episode 25775: Reward = 120.

Episode 25866: Reward = 117.000000, Mean reward (over 100 episodes) = 130.850000
Episode 25867: Reward = 129.000000, Mean reward (over 100 episodes) = 130.800000
Episode 25868: Reward = 148.000000, Mean reward (over 100 episodes) = 131.280000
Episode 25869: Reward = 107.000000, Mean reward (over 100 episodes) = 130.900000
Episode 25870: Reward = 138.000000, Mean reward (over 100 episodes) = 130.930000
Episode 25871: Reward = 166.000000, Mean reward (over 100 episodes) = 131.430000
Episode 25872: Reward = 200.000000, Mean reward (over 100 episodes) = 132.190000
Episode 25873: Reward = 131.000000, Mean reward (over 100 episodes) = 132.370000
Episode 25874: Reward = 115.000000, Mean reward (over 100 episodes) = 132.060000
Episode 25875: Reward = 114.000000, Mean reward (over 100 episodes) = 132.000000
Episode 25876: Reward = 123.000000, Mean reward (over 100 episodes) = 131.790000
Episode 25877: Reward = 115.000000, Mean reward (over 100 episodes) = 131.760000
Episode 25878: Reward = 131.

Episode 25968: Reward = 133.000000, Mean reward (over 100 episodes) = 133.020000
Episode 25969: Reward = 121.000000, Mean reward (over 100 episodes) = 133.160000
Episode 25970: Reward = 130.000000, Mean reward (over 100 episodes) = 133.080000
Episode 25971: Reward = 117.000000, Mean reward (over 100 episodes) = 132.590000
Episode 25972: Reward = 126.000000, Mean reward (over 100 episodes) = 131.850000
Episode 25973: Reward = 112.000000, Mean reward (over 100 episodes) = 131.660000
Episode 25974: Reward = 126.000000, Mean reward (over 100 episodes) = 131.770000
Episode 25975: Reward = 137.000000, Mean reward (over 100 episodes) = 132.000000
Episode 25976: Reward = 128.000000, Mean reward (over 100 episodes) = 132.050000
Episode 25977: Reward = 122.000000, Mean reward (over 100 episodes) = 132.120000
Episode 25978: Reward = 120.000000, Mean reward (over 100 episodes) = 132.010000
Episode 25979: Reward = 155.000000, Mean reward (over 100 episodes) = 132.200000
Episode 25980: Reward = 131.

Episode 26070: Reward = 154.000000, Mean reward (over 100 episodes) = 130.970000
Episode 26071: Reward = 139.000000, Mean reward (over 100 episodes) = 131.190000
Episode 26072: Reward = 134.000000, Mean reward (over 100 episodes) = 131.270000
Episode 26073: Reward = 119.000000, Mean reward (over 100 episodes) = 131.340000
Episode 26074: Reward = 148.000000, Mean reward (over 100 episodes) = 131.560000
Episode 26075: Reward = 115.000000, Mean reward (over 100 episodes) = 131.340000
Episode 26076: Reward = 146.000000, Mean reward (over 100 episodes) = 131.520000
Episode 26077: Reward = 122.000000, Mean reward (over 100 episodes) = 131.520000
Episode 26078: Reward = 145.000000, Mean reward (over 100 episodes) = 131.770000
Episode 26079: Reward = 131.000000, Mean reward (over 100 episodes) = 131.530000
Episode 26080: Reward = 136.000000, Mean reward (over 100 episodes) = 131.580000
Episode 26081: Reward = 129.000000, Mean reward (over 100 episodes) = 131.140000
Episode 26082: Reward = 167.

Episode 26174: Reward = 131.000000, Mean reward (over 100 episodes) = 131.900000
Episode 26175: Reward = 116.000000, Mean reward (over 100 episodes) = 131.910000
Episode 26176: Reward = 135.000000, Mean reward (over 100 episodes) = 131.800000
Episode 26177: Reward = 124.000000, Mean reward (over 100 episodes) = 131.820000
Episode 26178: Reward = 135.000000, Mean reward (over 100 episodes) = 131.720000
Episode 26179: Reward = 158.000000, Mean reward (over 100 episodes) = 131.990000
Episode 26180: Reward = 162.000000, Mean reward (over 100 episodes) = 132.250000
Episode 26181: Reward = 131.000000, Mean reward (over 100 episodes) = 132.270000
Episode 26182: Reward = 115.000000, Mean reward (over 100 episodes) = 131.750000
Episode 26183: Reward = 147.000000, Mean reward (over 100 episodes) = 132.000000
Episode 26184: Reward = 157.000000, Mean reward (over 100 episodes) = 132.060000
Episode 26185: Reward = 135.000000, Mean reward (over 100 episodes) = 131.950000
Episode 26186: Reward = 127.

Episode 26277: Reward = 136.000000, Mean reward (over 100 episodes) = 130.450000
Episode 26278: Reward = 141.000000, Mean reward (over 100 episodes) = 130.510000
Episode 26279: Reward = 131.000000, Mean reward (over 100 episodes) = 130.240000
Episode 26280: Reward = 117.000000, Mean reward (over 100 episodes) = 129.790000
Episode 26281: Reward = 122.000000, Mean reward (over 100 episodes) = 129.700000
Episode 26282: Reward = 132.000000, Mean reward (over 100 episodes) = 129.870000
Episode 26283: Reward = 117.000000, Mean reward (over 100 episodes) = 129.570000
Episode 26284: Reward = 128.000000, Mean reward (over 100 episodes) = 129.280000
Episode 26285: Reward = 117.000000, Mean reward (over 100 episodes) = 129.100000
Episode 26286: Reward = 130.000000, Mean reward (over 100 episodes) = 129.130000
Episode 26287: Reward = 138.000000, Mean reward (over 100 episodes) = 129.050000
Episode 26288: Reward = 121.000000, Mean reward (over 100 episodes) = 129.120000
Episode 26289: Reward = 108.

Episode 26381: Reward = 161.000000, Mean reward (over 100 episodes) = 130.640000
Episode 26382: Reward = 111.000000, Mean reward (over 100 episodes) = 130.430000
Episode 26383: Reward = 113.000000, Mean reward (over 100 episodes) = 130.390000
Episode 26384: Reward = 149.000000, Mean reward (over 100 episodes) = 130.600000
Episode 26385: Reward = 155.000000, Mean reward (over 100 episodes) = 130.980000
Episode 26386: Reward = 120.000000, Mean reward (over 100 episodes) = 130.880000
Episode 26387: Reward = 142.000000, Mean reward (over 100 episodes) = 130.920000
Episode 26388: Reward = 147.000000, Mean reward (over 100 episodes) = 131.180000
Episode 26389: Reward = 132.000000, Mean reward (over 100 episodes) = 131.420000
Episode 26390: Reward = 121.000000, Mean reward (over 100 episodes) = 131.030000
Episode 26391: Reward = 157.000000, Mean reward (over 100 episodes) = 131.110000
Episode 26392: Reward = 142.000000, Mean reward (over 100 episodes) = 131.300000
Episode 26393: Reward = 130.

Episode 26484: Reward = 170.000000, Mean reward (over 100 episodes) = 132.100000
Episode 26485: Reward = 125.000000, Mean reward (over 100 episodes) = 131.800000
Episode 26486: Reward = 124.000000, Mean reward (over 100 episodes) = 131.840000
Episode 26487: Reward = 106.000000, Mean reward (over 100 episodes) = 131.480000
Episode 26488: Reward = 123.000000, Mean reward (over 100 episodes) = 131.240000
Episode 26489: Reward = 128.000000, Mean reward (over 100 episodes) = 131.200000
Episode 26490: Reward = 124.000000, Mean reward (over 100 episodes) = 131.230000
Episode 26491: Reward = 104.000000, Mean reward (over 100 episodes) = 130.700000
Episode 26492: Reward = 103.000000, Mean reward (over 100 episodes) = 130.310000
Episode 26493: Reward = 136.000000, Mean reward (over 100 episodes) = 130.370000
Episode 26494: Reward = 105.000000, Mean reward (over 100 episodes) = 129.910000
Episode 26495: Reward = 143.000000, Mean reward (over 100 episodes) = 130.070000
Episode 26496: Reward = 121.

In [None]:
import matplotlib.pyplot as plt

def plot(x, y, name):
    fig, ax = plt.subplots()
    ax.plot(x, y)
    
    ax.set(xlabel='Episode', ylabel='Reward', title=name)
    ax.grid()

    fig.savefig("%s.png" % name)
    plt.show()
    
plot(range(episode), plot_history_episode_rewards, 'REINFORCE')

In [None]:
while True:
    do_rollout(env=env, policy=policy, render=True)