In [1]:
pip install torch pandas

Note: you may need to restart the kernel to use updated packages.


In [None]:
"""
Implementation of PPO
ref: Schulman, John, et al. "Proximal policy optimization algorithms." arXiv preprint arXiv:1707.06347 (2017).
ref: https://github.com/Jiankai-Sun/Proximal-Policy-Optimization-in-Pytorch/blob/master/ppo.py
ref: https://github.com/openai/baselines/tree/master/baselines/ppo2
NOTICE:
    `Tensor2` means 2D-Tensor (num_samples, num_dims) 
"""

import gym
import torch
import torch.nn as nn
import torch.optim as opt
from torch import Tensor
from torch.autograd import Variable
from collections import namedtuple
from itertools import count
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
from os.path import join as joindir
from os import makedirs as mkdir
import pandas as pd
import numpy as np
import argparse
import datetime
import math


Transition = namedtuple('Transition', ('state', 'value', 'action', 'logproba', 'mask', 'next_state', 'reward'))
EPS = 1e-10
__file__ = '/home/cong/ray_results/'
RESULT_DIR = joindir('../result', '.'.join(__file__.split('.')[:-1]))
mkdir(RESULT_DIR, exist_ok=True)


class args(object):
    env_name = 'Hopper-v2'
    seed = 1234
    num_episode = 2000
    batch_size = 2048
    max_step_per_round = 2000
    gamma = 0.995
    lamda = 0.97
    log_num_episode = 1
    num_epoch = 10
    minibatch_size = 256
    clip = 0.2
    loss_coeff_value = 0.5
    loss_coeff_entropy = 0.01
    lr = 3e-4
    num_parallel_run = 5
    # tricks
    schedule_adam = 'linear'
    schedule_clip = 'linear'
    layer_norm = True
    state_norm = True
    advantage_norm = True
    lossvalue_norm = True


class RunningStat(object):
    def __init__(self, shape):
        self._n = 0
        self._M = np.zeros(shape)
        self._S = np.zeros(shape)

    def push(self, x):
        x = np.asarray(x)
        assert x.shape == self._M.shape
        self._n += 1
        if self._n == 1:
            self._M[...] = x
        else:
            oldM = self._M.copy()
            self._M[...] = oldM + (x - oldM) / self._n
            self._S[...] = self._S + (x - oldM) * (x - self._M)

    @property
    def n(self):
        return self._n

    @property
    def mean(self):
        return self._M

    @property
    def var(self):
        return self._S / (self._n - 1) if self._n > 1 else np.square(self._M)

    @property
    def std(self):
        return np.sqrt(self.var)

    @property
    def shape(self):
        return self._M.shape


class ZFilter:
    """
    y = (x-mean)/std
    using running estimates of mean,std
    """

    def __init__(self, shape, demean=True, destd=True, clip=10.0):
        self.demean = demean
        self.destd = destd
        self.clip = clip

        self.rs = RunningStat(shape)

    def __call__(self, x, update=True):
        if update: self.rs.push(x)
        if self.demean:
            x = x - self.rs.mean
        if self.destd:
            x = x / (self.rs.std + 1e-8)
        if self.clip:
            x = np.clip(x, -self.clip, self.clip)
        return x

    def output_shape(self, input_space):
        return input_space.shape


class ActorCritic(nn.Module):
    def __init__(self, num_inputs, num_outputs, layer_norm=True):
        super(ActorCritic, self).__init__()
        
        self.actor_fc1 = nn.Linear(num_inputs, 64)
        self.actor_fc2 = nn.Linear(64, 64)
        self.actor_fc3 = nn.Linear(64, num_outputs)
        self.actor_logstd = nn.Parameter(torch.zeros(1, num_outputs))

        self.critic_fc1 = nn.Linear(num_inputs, 64)
        self.critic_fc2 = nn.Linear(64, 64)
        self.critic_fc3 = nn.Linear(64, 1)

        if layer_norm:
            self.layer_norm(self.actor_fc1, std=1.0)
            self.layer_norm(self.actor_fc2, std=1.0)
            self.layer_norm(self.actor_fc3, std=0.01)

            self.layer_norm(self.critic_fc1, std=1.0)
            self.layer_norm(self.critic_fc2, std=1.0)
            self.layer_norm(self.critic_fc3, std=1.0)

    @staticmethod
    def layer_norm(layer, std=1.0, bias_const=0.0):
        torch.nn.init.orthogonal_(layer.weight, std)
        torch.nn.init.constant_(layer.bias, bias_const)

    def forward(self, states):
        """
        run policy network (actor) as well as value network (critic)
        :param states: a Tensor2 represents states
        :return: 3 Tensor2
        """
        action_mean, action_logstd = self._forward_actor(states)
        critic_value = self._forward_critic(states)
        return action_mean, action_logstd, critic_value

    def _forward_actor(self, states):
        x = torch.tanh(self.actor_fc1(states))
        x = torch.tanh(self.actor_fc2(x))
        action_mean = self.actor_fc3(x)
        action_logstd = self.actor_logstd.expand_as(action_mean)
        return action_mean, action_logstd

    def _forward_critic(self, states):
        x = torch.tanh(self.critic_fc1(states))
        x = torch.tanh(self.critic_fc2(x))
        critic_value = self.critic_fc3(x)
        return critic_value

    def select_action(self, action_mean, action_logstd, return_logproba=True):
        """
        given mean and std, sample an action from normal(mean, std)
        also returns probability of the given chosen
        """
        action_std = torch.exp(action_logstd)
        action = torch.normal(action_mean, action_std)
        if return_logproba:
            logproba = self._normal_logproba(action, action_mean, action_logstd, action_std)
        return action, logproba

    @staticmethod
    def _normal_logproba(x, mean, logstd, std=None):
        if std is None:
            std = torch.exp(logstd)

        std_sq = std.pow(2)
        logproba = - 0.5 * math.log(2 * math.pi) - logstd - (x - mean).pow(2) / (2 * std_sq)
        return logproba.sum(1)

    def get_logproba(self, states, actions):
        """
        return probability of chosen the given actions under corresponding states of current network
        :param states: Tensor
        :param actions: Tensor
        """
        action_mean, action_logstd = self._forward_actor(states)
        logproba = self._normal_logproba(actions, action_mean, action_logstd)
        return logproba

    
class Memory(object):
    def __init__(self):
        self.memory = []

    def push(self, *args):
        self.memory.append(Transition(*args))

    def sample(self):
        return Transition(*zip(*self.memory))

    def __len__(self):
        return len(self.memory)

def ppo(args):
    env = gym.make(args.env_name)
    num_inputs = env.observation_space.shape[0]
    num_actions = env.action_space.shape[0]

    env.seed(args.seed)
    torch.manual_seed(args.seed)

    network = ActorCritic(num_inputs, num_actions, layer_norm=args.layer_norm)
    optimizer = opt.Adam(network.parameters(), lr=args.lr)

    running_state = ZFilter((num_inputs,), clip=5.0)
    
    # record average 1-round cumulative reward in every episode
    reward_record = []
    global_steps = 0

    lr_now = args.lr
    clip_now = args.clip

    for i_episode in range(args.num_episode):
        # step1: perform current policy to collect trajectories
        # this is an on-policy method!
        memory = Memory()
        num_steps = 0
        reward_list = []
        len_list = []
        while num_steps < args.batch_size:
            state = env.reset()
            if args.state_norm:
                state = running_state(state)
            reward_sum = 0
            for t in range(args.max_step_per_round):
                action_mean, action_logstd, value = network(Tensor(state).unsqueeze(0))
                action, logproba = network.select_action(action_mean, action_logstd)
                action = action.data.numpy()[0]
                logproba = logproba.data.numpy()[0]
                next_state, reward, done, _ = env.step(action)
                reward_sum += reward
                if args.state_norm:
                    next_state = running_state(next_state)
                mask = 0 if done else 1

                memory.push(state, value, action, logproba, mask, next_state, reward)
                
                if done:
                    break
                    
                state = next_state
                
            num_steps += (t + 1)
            global_steps += (t + 1)
            reward_list.append(reward_sum)
            len_list.append(t + 1)
        reward_record.append({
            'episode': i_episode, 
            'steps': global_steps, 
            'meanepreward': np.mean(reward_list), 
            'meaneplen': np.mean(len_list)})

        batch = memory.sample()
        batch_size = len(memory)
        
        # step2: extract variables from trajectories
        rewards = Tensor(batch.reward)
        values = Tensor(batch.value)
        masks = Tensor(batch.mask)
        actions = Tensor(batch.action)
        states = Tensor(batch.state)
        oldlogproba = Tensor(batch.logproba)
        
        returns = Tensor(batch_size)
        deltas = Tensor(batch_size)
        advantages = Tensor(batch_size)

        prev_return = 0
        prev_value = 0
        prev_advantage = 0
        for i in reversed(range(batch_size)):
            returns[i] = rewards[i] + args.gamma * prev_return * masks[i]
            deltas[i] = rewards[i] + args.gamma * prev_value * masks[i] - values[i]
            # ref: https://arxiv.org/pdf/1506.02438.pdf (generalization advantage estimate)
            advantages[i] = deltas[i] + args.gamma * args.lamda * prev_advantage * masks[i]

            prev_return = returns[i]
            prev_value = values[i]
            prev_advantage = advantages[i]
        if args.advantage_norm:
            advantages = (advantages - advantages.mean()) / (advantages.std() + EPS)

        for i_epoch in range(int(args.num_epoch * batch_size / args.minibatch_size)):
            # sample from current batch
            minibatch_ind = np.random.choice(batch_size, args.minibatch_size, replace=False)
            minibatch_states = states[minibatch_ind]
            minibatch_actions = actions[minibatch_ind]
            minibatch_oldlogproba = oldlogproba[minibatch_ind]
            minibatch_newlogproba = network.get_logproba(minibatch_states, minibatch_actions)
            minibatch_advantages = advantages[minibatch_ind]
            minibatch_returns = returns[minibatch_ind]
            minibatch_newvalues = network._forward_critic(minibatch_states).flatten()

            ratio =  torch.exp(minibatch_newlogproba - minibatch_oldlogproba)
            surr1 = ratio * minibatch_advantages
            surr2 = ratio.clamp(1 - clip_now, 1 + clip_now) * minibatch_advantages
            loss_surr = - torch.mean(torch.min(surr1, surr2))

            # not sure the value loss should be clipped as well 
            # clip example: https://github.com/Jiankai-Sun/Proximal-Policy-Optimization-in-Pytorch/blob/master/ppo.py
            # however, it does not make sense to clip score-like value by a dimensionless clipping parameter
            # moreover, original paper does not mention clipped value 
            if args.lossvalue_norm:
                minibatch_return_6std = 6 * minibatch_returns.std()
                loss_value = torch.mean((minibatch_newvalues - minibatch_returns).pow(2)) / minibatch_return_6std
            else:
                loss_value = torch.mean((minibatch_newvalues - minibatch_returns).pow(2))

            loss_entropy = torch.mean(torch.exp(minibatch_newlogproba) * minibatch_newlogproba)

            total_loss = loss_surr + args.loss_coeff_value * loss_value + args.loss_coeff_entropy * loss_entropy
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

        if args.schedule_clip == 'linear':
            ep_ratio = 1 - (i_episode / args.num_episode)
            clip_now = args.clip * ep_ratio

        if args.schedule_adam == 'linear':
            ep_ratio = 1 - (i_episode / args.num_episode)
            lr_now = args.lr * ep_ratio
            # set learning rate
            # ref: https://stackoverflow.com/questions/48324152/
            for g in optimizer.param_groups:
                g['lr'] = lr_now

        if i_episode % args.log_num_episode == 0:
            print('Finished episode: {} Reward: {:.4f} total_loss = {:.4f} = {:.4f} + {} * {:.4f} + {} * {:.4f}' \
                .format(i_episode, reward_record[-1]['meanepreward'], total_loss.data, loss_surr.data, args.loss_coeff_value, 
                loss_value.data, args.loss_coeff_entropy, loss_entropy.data))
            print('-----------------')

    return reward_record

def test(args):
    record_dfs = []
    for i in range(args.num_parallel_run):
        args.seed += 1
        reward_record = pd.DataFrame(ppo(args))
        reward_record['#parallel_run'] = i
        record_dfs.append(reward_record)
    record_dfs = pd.concat(record_dfs, axis=0)
    record_dfs.to_csv(joindir(RESULT_DIR, 'ppo-record-{}.csv'.format(args.env_name)))
    
if __name__ == '__main__':

    for env in ['Walker2d-v2', 'Swimmer-v2', 'Hopper-v2', 'Humanoid-v2', 'HalfCheetah-v2', 'Reacher-v2']:
        args.env_name = env
        test(args)


Finished episode: 0 Reward: -1.2879 total_loss = 0.3117 = 0.0094 + 0.5 * 0.6046 + 0.01 * -0.0036
-----------------
Finished episode: 1 Reward: 1.1712 total_loss = 0.3627 = 0.0039 + 0.5 * 0.7178 + 0.01 * -0.0034
-----------------
Finished episode: 2 Reward: 5.5048 total_loss = 0.5387 = -0.0069 + 0.5 * 1.0912 + 0.01 * -0.0037
-----------------
Finished episode: 3 Reward: 8.0447 total_loss = 0.6352 = -0.0013 + 0.5 * 1.2731 + 0.01 * -0.0039
-----------------
Finished episode: 4 Reward: 19.0148 total_loss = 1.1188 = 0.0112 + 0.5 * 2.2153 + 0.01 * -0.0041
-----------------
Finished episode: 5 Reward: 22.7930 total_loss = 1.7821 = -0.0076 + 0.5 * 3.5794 + 0.01 * -0.0036
-----------------
Finished episode: 6 Reward: 37.7295 total_loss = 2.1005 = 0.0620 + 0.5 * 4.0772 + 0.01 * -0.0045
-----------------
Finished episode: 7 Reward: 85.8704 total_loss = 5.7744 = -0.0326 + 0.5 * 11.6140 + 0.01 * -0.0041
-----------------
Finished episode: 8 Reward: 121.0541 total_loss = 7.0265 = 0.0492 + 0.5 * 13.9

Finished episode: 70 Reward: 282.4383 total_loss = 2.8167 = -0.0422 + 0.5 * 5.7179 + 0.01 * -0.0058
-----------------
Finished episode: 71 Reward: 334.2741 total_loss = 3.8851 = -0.0840 + 0.5 * 7.9382 + 0.01 * -0.0051
-----------------
Finished episode: 72 Reward: 333.7252 total_loss = 4.0012 = -0.0292 + 0.5 * 8.0608 + 0.01 * -0.0049
-----------------
Finished episode: 73 Reward: 376.2391 total_loss = 4.6977 = 0.0117 + 0.5 * 9.3722 + 0.01 * -0.0054
-----------------
Finished episode: 74 Reward: 399.6986 total_loss = 4.9157 = -0.0675 + 0.5 * 9.9665 + 0.01 * -0.0053
-----------------
Finished episode: 75 Reward: 298.5119 total_loss = 2.5870 = -0.0165 + 0.5 * 5.2070 + 0.01 * -0.0046
-----------------
Finished episode: 76 Reward: 359.3043 total_loss = 3.9656 = -0.0974 + 0.5 * 8.1262 + 0.01 * -0.0055
-----------------
Finished episode: 77 Reward: 326.8635 total_loss = 3.7621 = 0.0198 + 0.5 * 7.4847 + 0.01 * -0.0054
-----------------
Finished episode: 78 Reward: 339.2131 total_loss = 3.5248 

Finished episode: 140 Reward: 580.4315 total_loss = 4.8657 = -0.1531 + 0.5 * 10.0377 + 0.01 * -0.0074
-----------------
Finished episode: 141 Reward: 591.5713 total_loss = 5.6418 = -0.0638 + 0.5 * 11.4113 + 0.01 * -0.0078
-----------------
Finished episode: 142 Reward: 567.7735 total_loss = 4.0787 = -0.0062 + 0.5 * 8.1700 + 0.01 * -0.0080
-----------------
Finished episode: 143 Reward: 442.4320 total_loss = 2.5461 = 0.0574 + 0.5 * 4.9776 + 0.01 * -0.0078
-----------------
Finished episode: 144 Reward: 584.2290 total_loss = 3.9049 = -0.1482 + 0.5 * 8.1063 + 0.01 * -0.0077
-----------------
Finished episode: 145 Reward: 727.8127 total_loss = 8.1779 = -0.1236 + 0.5 * 16.6031 + 0.01 * -0.0075
-----------------
Finished episode: 146 Reward: 670.1767 total_loss = 4.9416 = -0.0949 + 0.5 * 10.0732 + 0.01 * -0.0087
-----------------
Finished episode: 147 Reward: 672.1415 total_loss = 4.6266 = 0.0378 + 0.5 * 9.1780 + 0.01 * -0.0087
-----------------
Finished episode: 148 Reward: 919.8560 total_l

Finished episode: 209 Reward: 1070.0438 total_loss = 5.5636 = 0.0118 + 0.5 * 11.1039 + 0.01 * -0.0122
-----------------
Finished episode: 210 Reward: 1395.0217 total_loss = 9.1315 = -0.0494 + 0.5 * 18.3621 + 0.01 * -0.0112
-----------------
Finished episode: 211 Reward: 1286.0353 total_loss = 5.4016 = -0.0066 + 0.5 * 10.8166 + 0.01 * -0.0117
-----------------
Finished episode: 212 Reward: 1060.3058 total_loss = 6.7418 = 0.0335 + 0.5 * 13.4170 + 0.01 * -0.0122
-----------------
Finished episode: 213 Reward: 1204.1392 total_loss = 9.7693 = -0.0584 + 0.5 * 19.6556 + 0.01 * -0.0120
-----------------
Finished episode: 214 Reward: 980.7845 total_loss = 5.8141 = -0.0380 + 0.5 * 11.7044 + 0.01 * -0.0107
-----------------
Finished episode: 215 Reward: 1007.9625 total_loss = 5.8274 = -0.0834 + 0.5 * 11.8219 + 0.01 * -0.0134
-----------------
Finished episode: 216 Reward: 1275.5142 total_loss = 5.9326 = -0.0121 + 0.5 * 11.8898 + 0.01 * -0.0136
-----------------
Finished episode: 217 Reward: 2160.

Finished episode: 277 Reward: 1626.7912 total_loss = 4.7916 = -0.1144 + 0.5 * 9.8123 + 0.01 * -0.0159
-----------------
Finished episode: 278 Reward: 1708.9549 total_loss = 8.7294 = 0.0030 + 0.5 * 17.4531 + 0.01 * -0.0164
-----------------
Finished episode: 279 Reward: 2079.5586 total_loss = 10.6121 = -0.0109 + 0.5 * 21.2463 + 0.01 * -0.0134
-----------------
Finished episode: 280 Reward: 1286.3356 total_loss = 6.7708 = -0.0499 + 0.5 * 13.6417 + 0.01 * -0.0154
-----------------
Finished episode: 281 Reward: 1961.2077 total_loss = 7.6805 = 0.0387 + 0.5 * 15.2838 + 0.01 * -0.0145
-----------------
Finished episode: 282 Reward: 1314.2409 total_loss = 6.8657 = 0.0513 + 0.5 * 13.6291 + 0.01 * -0.0148
-----------------
Finished episode: 283 Reward: 1251.0070 total_loss = 4.5763 = -0.0483 + 0.5 * 9.2494 + 0.01 * -0.0165
-----------------
Finished episode: 284 Reward: 1944.9493 total_loss = 7.1432 = -0.0631 + 0.5 * 14.4131 + 0.01 * -0.0161
-----------------
Finished episode: 285 Reward: 2083.6

Finished episode: 345 Reward: 2469.8838 total_loss = 7.6871 = -0.0503 + 0.5 * 15.4752 + 0.01 * -0.0203
-----------------
Finished episode: 346 Reward: 2780.4808 total_loss = 12.2852 = 0.0015 + 0.5 * 24.5679 + 0.01 * -0.0206
-----------------
Finished episode: 347 Reward: 2105.2082 total_loss = 10.2216 = -0.0289 + 0.5 * 20.5014 + 0.01 * -0.0212
-----------------
Finished episode: 348 Reward: 1864.6708 total_loss = 8.5517 = -0.0726 + 0.5 * 17.2489 + 0.01 * -0.0193
-----------------
Finished episode: 349 Reward: 1649.3079 total_loss = 7.7001 = 0.0081 + 0.5 * 15.3845 + 0.01 * -0.0200
-----------------
Finished episode: 350 Reward: 1458.4600 total_loss = 4.5688 = -0.0566 + 0.5 * 9.2513 + 0.01 * -0.0202
-----------------
Finished episode: 351 Reward: 2057.5989 total_loss = 7.2031 = 0.0128 + 0.5 * 14.3810 + 0.01 * -0.0214
-----------------
Finished episode: 352 Reward: 1604.2064 total_loss = 4.1553 = -0.0082 + 0.5 * 8.3273 + 0.01 * -0.0199
-----------------
Finished episode: 353 Reward: 1888.

Finished episode: 413 Reward: 1578.1859 total_loss = 6.0305 = -0.0592 + 0.5 * 12.1799 + 0.01 * -0.0219
-----------------
Finished episode: 414 Reward: 922.3226 total_loss = 3.4026 = -0.0958 + 0.5 * 6.9971 + 0.01 * -0.0210
-----------------
Finished episode: 415 Reward: 2133.2941 total_loss = 5.0134 = -0.0886 + 0.5 * 10.2044 + 0.01 * -0.0218
-----------------
Finished episode: 416 Reward: 2155.2484 total_loss = 8.9133 = -0.0231 + 0.5 * 17.8734 + 0.01 * -0.0239
-----------------
Finished episode: 417 Reward: 2239.3753 total_loss = 3.6645 = 0.0467 + 0.5 * 7.2360 + 0.01 * -0.0191
-----------------
Finished episode: 418 Reward: 2011.8909 total_loss = 6.8331 = -0.0323 + 0.5 * 13.7313 + 0.01 * -0.0215
-----------------
Finished episode: 419 Reward: 2114.9936 total_loss = 9.3228 = -0.0438 + 0.5 * 18.7338 + 0.01 * -0.0221
-----------------
Finished episode: 420 Reward: 1641.0506 total_loss = 10.5792 = -0.0310 + 0.5 * 21.2209 + 0.01 * -0.0238
-----------------
Finished episode: 421 Reward: 2358.

Finished episode: 481 Reward: 2829.5510 total_loss = 13.7523 = -0.0226 + 0.5 * 27.5502 + 0.01 * -0.0214
-----------------
Finished episode: 482 Reward: 1794.7148 total_loss = 7.5631 = -0.0372 + 0.5 * 15.2012 + 0.01 * -0.0248
-----------------
Finished episode: 483 Reward: 1890.1796 total_loss = 6.9617 = 0.0444 + 0.5 * 13.8350 + 0.01 * -0.0224
-----------------
Finished episode: 484 Reward: 1775.6171 total_loss = 8.0567 = -0.0145 + 0.5 * 16.1429 + 0.01 * -0.0233
-----------------
Finished episode: 485 Reward: 2611.5782 total_loss = 7.4733 = 0.0614 + 0.5 * 14.8244 + 0.01 * -0.0231
-----------------
Finished episode: 486 Reward: 2387.4741 total_loss = 12.3938 = -0.0204 + 0.5 * 24.8289 + 0.01 * -0.0214
-----------------
Finished episode: 487 Reward: 1933.1285 total_loss = 9.2056 = 0.0726 + 0.5 * 18.2664 + 0.01 * -0.0199
-----------------
Finished episode: 488 Reward: 3028.6664 total_loss = 13.5716 = 0.0214 + 0.5 * 27.1009 + 0.01 * -0.0216
-----------------
Finished episode: 489 Reward: 239

Finished episode: 549 Reward: 2675.0608 total_loss = 12.1632 = 0.0010 + 0.5 * 24.3249 + 0.01 * -0.0239
-----------------
Finished episode: 550 Reward: 2106.9373 total_loss = 15.5862 = -0.0360 + 0.5 * 31.2447 + 0.01 * -0.0219
-----------------
Finished episode: 551 Reward: 2370.0790 total_loss = 10.6301 = 0.0620 + 0.5 * 21.1366 + 0.01 * -0.0222
-----------------
Finished episode: 552 Reward: 1870.8039 total_loss = 2.9208 = 0.0808 + 0.5 * 5.6805 + 0.01 * -0.0226
-----------------
Finished episode: 553 Reward: 2206.9388 total_loss = 6.9523 = 0.0552 + 0.5 * 13.7945 + 0.01 * -0.0188
-----------------
Finished episode: 554 Reward: 2998.7110 total_loss = 13.9979 = -0.0493 + 0.5 * 28.0949 + 0.01 * -0.0231
-----------------
Finished episode: 555 Reward: 1908.3505 total_loss = 10.2547 = -0.1011 + 0.5 * 20.7120 + 0.01 * -0.0207
-----------------
Finished episode: 556 Reward: 2304.8403 total_loss = 14.9529 = 0.0317 + 0.5 * 29.8430 + 0.01 * -0.0247
-----------------
Finished episode: 557 Reward: 33

Finished episode: 617 Reward: 2712.6744 total_loss = 14.7014 = 0.0244 + 0.5 * 29.3546 + 0.01 * -0.0245
-----------------
Finished episode: 618 Reward: 2390.2023 total_loss = 10.7466 = -0.0552 + 0.5 * 21.6041 + 0.01 * -0.0239
-----------------
Finished episode: 619 Reward: 2425.5160 total_loss = 3.2130 = 0.0165 + 0.5 * 6.3936 + 0.01 * -0.0232
-----------------
Finished episode: 620 Reward: 2470.3662 total_loss = 3.6291 = -0.0461 + 0.5 * 7.3509 + 0.01 * -0.0221
-----------------
Finished episode: 621 Reward: 2583.5104 total_loss = 11.6703 = -0.0285 + 0.5 * 23.3982 + 0.01 * -0.0246
-----------------
Finished episode: 622 Reward: 2860.1291 total_loss = 6.9307 = -0.0575 + 0.5 * 13.9769 + 0.01 * -0.0221
-----------------
Finished episode: 623 Reward: 3356.6925 total_loss = 13.0552 = 0.0781 + 0.5 * 25.9547 + 0.01 * -0.0247
-----------------
Finished episode: 624 Reward: 1996.3758 total_loss = 3.3516 = -0.0642 + 0.5 * 6.8320 + 0.01 * -0.0236
-----------------
Finished episode: 625 Reward: 2072

Finished episode: 685 Reward: 3448.0305 total_loss = 13.6233 = -0.0356 + 0.5 * 27.3183 + 0.01 * -0.0243
-----------------
Finished episode: 686 Reward: 2754.9816 total_loss = 8.7506 = -0.0715 + 0.5 * 17.6446 + 0.01 * -0.0230
-----------------
Finished episode: 687 Reward: 3667.9317 total_loss = 14.5730 = -0.1070 + 0.5 * 29.3605 + 0.01 * -0.0263
-----------------
Finished episode: 688 Reward: 2956.9091 total_loss = 6.8552 = -0.0845 + 0.5 * 13.8799 + 0.01 * -0.0224
-----------------
Finished episode: 689 Reward: 2436.1606 total_loss = 14.3712 = 0.0031 + 0.5 * 28.7368 + 0.01 * -0.0219
-----------------
Finished episode: 690 Reward: 2846.1093 total_loss = 8.8409 = -0.0451 + 0.5 * 17.7726 + 0.01 * -0.0224
-----------------
Finished episode: 691 Reward: 3538.0135 total_loss = 14.5508 = 0.0137 + 0.5 * 29.0748 + 0.01 * -0.0261
-----------------
Finished episode: 692 Reward: 3224.3034 total_loss = 12.7705 = -0.0968 + 0.5 * 25.7350 + 0.01 * -0.0221
-----------------
Finished episode: 693 Reward:

Finished episode: 753 Reward: 3387.1524 total_loss = 10.0935 = 0.0698 + 0.5 * 20.0478 + 0.01 * -0.0194
-----------------
Finished episode: 754 Reward: 2655.6367 total_loss = 14.8673 = -0.0335 + 0.5 * 29.8020 + 0.01 * -0.0224
-----------------
Finished episode: 755 Reward: 3422.6267 total_loss = 9.2938 = -0.0653 + 0.5 * 18.7186 + 0.01 * -0.0225
-----------------
Finished episode: 756 Reward: 3725.2716 total_loss = 15.4392 = 0.0080 + 0.5 * 30.8628 + 0.01 * -0.0216
-----------------
Finished episode: 757 Reward: 2684.0420 total_loss = 9.1197 = 0.0937 + 0.5 * 18.0525 + 0.01 * -0.0227
-----------------
Finished episode: 758 Reward: 3134.2869 total_loss = 13.8886 = -0.0118 + 0.5 * 27.8013 + 0.01 * -0.0231
-----------------
Finished episode: 759 Reward: 2376.2932 total_loss = 14.5205 = 0.0506 + 0.5 * 28.9401 + 0.01 * -0.0200
-----------------
Finished episode: 760 Reward: 3705.0812 total_loss = 15.8039 = 0.0469 + 0.5 * 31.5144 + 0.01 * -0.0197
-----------------
Finished episode: 761 Reward: 2

Finished episode: 821 Reward: 2836.1161 total_loss = 5.1858 = -0.1413 + 0.5 * 10.6548 + 0.01 * -0.0240
-----------------
Finished episode: 822 Reward: 3324.1153 total_loss = 13.0813 = -0.0060 + 0.5 * 26.1752 + 0.01 * -0.0238
-----------------
Finished episode: 823 Reward: 3114.2124 total_loss = 15.8326 = -0.0699 + 0.5 * 31.8054 + 0.01 * -0.0241
-----------------
Finished episode: 824 Reward: 2682.3338 total_loss = 6.8945 = 0.0441 + 0.5 * 13.7011 + 0.01 * -0.0228
-----------------
Finished episode: 825 Reward: 3593.7969 total_loss = 10.9966 = -0.0212 + 0.5 * 22.0361 + 0.01 * -0.0234
-----------------
Finished episode: 826 Reward: 3347.1989 total_loss = 7.7539 = -0.0248 + 0.5 * 15.5578 + 0.01 * -0.0221
-----------------
Finished episode: 827 Reward: 3132.4692 total_loss = 6.6372 = -0.0221 + 0.5 * 13.3192 + 0.01 * -0.0212
-----------------
Finished episode: 828 Reward: 2956.3323 total_loss = 14.1146 = 0.0922 + 0.5 * 28.0453 + 0.01 * -0.0230
-----------------
Finished episode: 829 Reward: 

Finished episode: 889 Reward: 2002.1900 total_loss = 2.9827 = -0.0512 + 0.5 * 6.0684 + 0.01 * -0.0223
-----------------
Finished episode: 890 Reward: 3071.9661 total_loss = 8.7929 = -0.0342 + 0.5 * 17.6546 + 0.01 * -0.0202
-----------------
Finished episode: 891 Reward: 3120.1807 total_loss = 9.9851 = 0.0146 + 0.5 * 19.9414 + 0.01 * -0.0208
-----------------
Finished episode: 892 Reward: 3464.5411 total_loss = 7.5818 = -0.0495 + 0.5 * 15.2630 + 0.01 * -0.0197
-----------------
Finished episode: 893 Reward: 3872.4655 total_loss = 12.8690 = -0.0015 + 0.5 * 25.7414 + 0.01 * -0.0205
-----------------
Finished episode: 894 Reward: 3266.6456 total_loss = 8.0001 = 0.0274 + 0.5 * 15.9458 + 0.01 * -0.0191
-----------------
Finished episode: 895 Reward: 2836.8679 total_loss = 17.3942 = -0.0096 + 0.5 * 34.8080 + 0.01 * -0.0198
-----------------
Finished episode: 896 Reward: 2744.9945 total_loss = 14.7935 = -0.0051 + 0.5 * 29.5975 + 0.01 * -0.0212
-----------------
Finished episode: 897 Reward: 37

Finished episode: 957 Reward: 2515.6495 total_loss = 13.6655 = 0.0133 + 0.5 * 27.3047 + 0.01 * -0.0194
-----------------
Finished episode: 958 Reward: 2975.1940 total_loss = 8.5813 = -0.0376 + 0.5 * 17.2383 + 0.01 * -0.0197
-----------------
Finished episode: 959 Reward: 3698.3762 total_loss = 11.6343 = -0.0811 + 0.5 * 23.4313 + 0.01 * -0.0192
-----------------
Finished episode: 960 Reward: 3198.0625 total_loss = 11.5338 = 0.0297 + 0.5 * 23.0086 + 0.01 * -0.0193
-----------------
Finished episode: 961 Reward: 3241.4384 total_loss = 10.5651 = -0.0572 + 0.5 * 21.2450 + 0.01 * -0.0197
-----------------
Finished episode: 962 Reward: 3681.1963 total_loss = 13.1767 = 0.0239 + 0.5 * 26.3059 + 0.01 * -0.0201
-----------------
Finished episode: 963 Reward: 2562.8384 total_loss = 14.0058 = -0.0717 + 0.5 * 28.1554 + 0.01 * -0.0182
-----------------
Finished episode: 964 Reward: 3131.4934 total_loss = 11.5778 = 0.0419 + 0.5 * 23.0722 + 0.01 * -0.0189
-----------------
Finished episode: 965 Reward:

Finished episode: 1025 Reward: 2483.3383 total_loss = 13.2715 = -0.0494 + 0.5 * 26.6421 + 0.01 * -0.0164
-----------------
Finished episode: 1026 Reward: 2448.0900 total_loss = 11.9070 = -0.0633 + 0.5 * 23.9409 + 0.01 * -0.0168
-----------------
Finished episode: 1027 Reward: 3683.4969 total_loss = 16.1707 = 0.0813 + 0.5 * 32.1792 + 0.01 * -0.0169
-----------------
Finished episode: 1028 Reward: 3601.5826 total_loss = 12.7189 = -0.0312 + 0.5 * 25.5003 + 0.01 * -0.0160
-----------------
Finished episode: 1029 Reward: 3591.8671 total_loss = 13.1777 = -0.1028 + 0.5 * 26.5613 + 0.01 * -0.0150
-----------------
Finished episode: 1030 Reward: 3613.5044 total_loss = 14.4026 = -0.0975 + 0.5 * 29.0004 + 0.01 * -0.0158
-----------------
Finished episode: 1031 Reward: 3457.1053 total_loss = 13.4045 = 0.0699 + 0.5 * 26.6696 + 0.01 * -0.0165
-----------------
Finished episode: 1032 Reward: 3560.8082 total_loss = 12.2383 = -0.0105 + 0.5 * 24.4978 + 0.01 * -0.0145
-----------------
Finished episode: 

Finished episode: 1092 Reward: 3305.0475 total_loss = 7.5990 = 0.0108 + 0.5 * 15.1766 + 0.01 * -0.0154
-----------------
Finished episode: 1093 Reward: 2452.3749 total_loss = 11.1671 = -0.0664 + 0.5 * 22.4672 + 0.01 * -0.0147
-----------------
Finished episode: 1094 Reward: 2436.6788 total_loss = 13.5010 = -0.0436 + 0.5 * 27.0895 + 0.01 * -0.0144
-----------------
Finished episode: 1095 Reward: 3661.8001 total_loss = 14.4637 = 0.0615 + 0.5 * 28.8047 + 0.01 * -0.0143
-----------------
Finished episode: 1096 Reward: 1832.5515 total_loss = 12.0461 = -0.0371 + 0.5 * 24.1668 + 0.01 * -0.0153
-----------------
Finished episode: 1097 Reward: 1433.4175 total_loss = 6.2548 = 0.0103 + 0.5 * 12.4893 + 0.01 * -0.0141
-----------------
Finished episode: 1098 Reward: 2881.1449 total_loss = 7.4991 = -0.0229 + 0.5 * 15.0445 + 0.01 * -0.0155
-----------------
Finished episode: 1099 Reward: 3469.0648 total_loss = 5.2616 = -0.1091 + 0.5 * 10.7417 + 0.01 * -0.0150
-----------------
Finished episode: 1100 

Finished episode: 1160 Reward: 3280.0457 total_loss = 9.3488 = -0.0994 + 0.5 * 18.8967 + 0.01 * -0.0165
-----------------
Finished episode: 1161 Reward: 3586.9496 total_loss = 10.9321 = -0.0049 + 0.5 * 21.8742 + 0.01 * -0.0143
-----------------
Finished episode: 1162 Reward: 3298.3878 total_loss = 9.4672 = -0.0482 + 0.5 * 19.0311 + 0.01 * -0.0158
-----------------
Finished episode: 1163 Reward: 3398.1299 total_loss = 6.2059 = -0.0065 + 0.5 * 12.4251 + 0.01 * -0.0149
-----------------
Finished episode: 1164 Reward: 3121.9294 total_loss = 12.6149 = -0.0381 + 0.5 * 25.3063 + 0.01 * -0.0139
-----------------
Finished episode: 1165 Reward: 3745.0329 total_loss = 12.4788 = 0.0897 + 0.5 * 24.7785 + 0.01 * -0.0153
-----------------
Finished episode: 1166 Reward: 2426.0671 total_loss = 11.6649 = 0.0946 + 0.5 * 23.1410 + 0.01 * -0.0156
-----------------
Finished episode: 1167 Reward: 3307.9556 total_loss = 9.2263 = 0.0056 + 0.5 * 18.4419 + 0.01 * -0.0166
-----------------
Finished episode: 1168 

Finished episode: 1228 Reward: 3101.6498 total_loss = 2.4228 = -0.0275 + 0.5 * 4.9008 + 0.01 * -0.0149
-----------------
Finished episode: 1229 Reward: 3658.7643 total_loss = 14.5540 = 0.0538 + 0.5 * 29.0008 + 0.01 * -0.0158
-----------------
Finished episode: 1230 Reward: 3638.8301 total_loss = 9.8882 = 0.0237 + 0.5 * 19.7293 + 0.01 * -0.0155
-----------------
Finished episode: 1231 Reward: 3562.8274 total_loss = 12.0314 = 0.0545 + 0.5 * 23.9541 + 0.01 * -0.0144
-----------------
Finished episode: 1232 Reward: 3442.9380 total_loss = 11.2121 = -0.0399 + 0.5 * 22.5043 + 0.01 * -0.0158
-----------------
Finished episode: 1233 Reward: 2076.4175 total_loss = 6.4295 = 0.0138 + 0.5 * 12.8317 + 0.01 * -0.0144
-----------------
Finished episode: 1234 Reward: 3696.8523 total_loss = 13.1707 = -0.1165 + 0.5 * 26.5747 + 0.01 * -0.0148
-----------------
Finished episode: 1235 Reward: 2465.7926 total_loss = 7.7973 = 0.0050 + 0.5 * 15.5849 + 0.01 * -0.0152
-----------------
Finished episode: 1236 Rew

Finished episode: 1296 Reward: 3560.3825 total_loss = 13.8232 = -0.0655 + 0.5 * 27.7776 + 0.01 * -0.0140
-----------------
Finished episode: 1297 Reward: 2705.4842 total_loss = 13.0126 = 0.0298 + 0.5 * 25.9660 + 0.01 * -0.0139
-----------------
Finished episode: 1298 Reward: 3566.0254 total_loss = 15.0225 = -0.0040 + 0.5 * 30.0533 + 0.01 * -0.0141
-----------------
Finished episode: 1299 Reward: 2721.9775 total_loss = 9.4384 = -0.0965 + 0.5 * 19.0702 + 0.01 * -0.0141
-----------------
Finished episode: 1300 Reward: 3351.2067 total_loss = 12.0072 = -0.0517 + 0.5 * 24.1182 + 0.01 * -0.0132
-----------------
Finished episode: 1301 Reward: 3052.0336 total_loss = 8.4306 = -0.0965 + 0.5 * 17.0546 + 0.01 * -0.0144
-----------------
Finished episode: 1302 Reward: 2604.2289 total_loss = 16.1100 = 0.0591 + 0.5 * 32.1021 + 0.01 * -0.0131
-----------------
Finished episode: 1303 Reward: 2473.7475 total_loss = 14.4341 = -0.0160 + 0.5 * 28.9005 + 0.01 * -0.0150
-----------------
Finished episode: 13

Finished episode: 1364 Reward: 3474.0282 total_loss = 8.1819 = -0.0448 + 0.5 * 16.4537 + 0.01 * -0.0131
-----------------
Finished episode: 1365 Reward: 2412.6636 total_loss = 1.8520 = 0.0496 + 0.5 * 3.6049 + 0.01 * -0.0126
-----------------
Finished episode: 1366 Reward: 3716.4001 total_loss = 12.1963 = -0.0702 + 0.5 * 24.5331 + 0.01 * -0.0124
-----------------
Finished episode: 1367 Reward: 2153.6136 total_loss = 7.6403 = 0.0024 + 0.5 * 15.2759 + 0.01 * -0.0136
-----------------
Finished episode: 1368 Reward: 3090.6942 total_loss = 13.7104 = 0.0595 + 0.5 * 27.3020 + 0.01 * -0.0133
-----------------
Finished episode: 1369 Reward: 2707.3359 total_loss = 15.2586 = 0.0347 + 0.5 * 30.4482 + 0.01 * -0.0130
-----------------
Finished episode: 1370 Reward: 3603.4166 total_loss = 13.7703 = 0.0557 + 0.5 * 27.4294 + 0.01 * -0.0131
-----------------
Finished episode: 1371 Reward: 2231.5124 total_loss = 10.1064 = 0.0194 + 0.5 * 20.1742 + 0.01 * -0.0131
-----------------
Finished episode: 1372 Rew

Finished episode: 1432 Reward: 2484.5468 total_loss = 16.6935 = -0.0435 + 0.5 * 33.4743 + 0.01 * -0.0117
-----------------
Finished episode: 1433 Reward: 3543.3574 total_loss = 12.8798 = -0.0462 + 0.5 * 25.8523 + 0.01 * -0.0113
-----------------
Finished episode: 1434 Reward: 2945.7994 total_loss = 11.9289 = -0.0024 + 0.5 * 23.8630 + 0.01 * -0.0108
-----------------
Finished episode: 1435 Reward: 2792.6638 total_loss = 8.9702 = -0.0437 + 0.5 * 18.0280 + 0.01 * -0.0120
-----------------
Finished episode: 1436 Reward: 3558.2675 total_loss = 13.9715 = 0.0369 + 0.5 * 27.8693 + 0.01 * -0.0119
-----------------
Finished episode: 1437 Reward: 2898.6867 total_loss = 10.9946 = -0.1027 + 0.5 * 22.1948 + 0.01 * -0.0111
-----------------
Finished episode: 1438 Reward: 2203.0028 total_loss = 4.3016 = 0.0236 + 0.5 * 8.5562 + 0.01 * -0.0110
-----------------
Finished episode: 1439 Reward: 3692.0122 total_loss = 12.6972 = -0.0154 + 0.5 * 25.4254 + 0.01 * -0.0123
-----------------
Finished episode: 144

Finished episode: 1500 Reward: 3658.8754 total_loss = 15.1615 = -0.0071 + 0.5 * 30.3374 + 0.01 * -0.0106
-----------------
Finished episode: 1501 Reward: 2622.7894 total_loss = 10.1837 = -0.0243 + 0.5 * 20.4164 + 0.01 * -0.0106
-----------------
Finished episode: 1502 Reward: 2774.6912 total_loss = 6.2512 = -0.0483 + 0.5 * 12.5991 + 0.01 * -0.0107
-----------------
Finished episode: 1503 Reward: 1943.2578 total_loss = 13.8003 = -0.0196 + 0.5 * 27.6399 + 0.01 * -0.0100
-----------------
Finished episode: 1504 Reward: 2223.9922 total_loss = 7.9685 = -0.0256 + 0.5 * 15.9885 + 0.01 * -0.0095
-----------------
Finished episode: 1505 Reward: 2828.3824 total_loss = 6.4557 = -0.0435 + 0.5 * 12.9986 + 0.01 * -0.0097
-----------------
Finished episode: 1506 Reward: 2523.2886 total_loss = 13.4119 = -0.0213 + 0.5 * 26.8665 + 0.01 * -0.0100
-----------------
Finished episode: 1507 Reward: 3627.2671 total_loss = 13.2044 = -0.0226 + 0.5 * 26.4543 + 0.01 * -0.0116
-----------------
Finished episode: 1

Finished episode: 1568 Reward: 3138.4047 total_loss = 11.1093 = -0.0683 + 0.5 * 22.3554 + 0.01 * -0.0110
-----------------
Finished episode: 1569 Reward: 2675.6692 total_loss = 5.6178 = -0.0613 + 0.5 * 11.3583 + 0.01 * -0.0092
-----------------
Finished episode: 1570 Reward: 2186.1674 total_loss = 8.2315 = 0.0728 + 0.5 * 16.3176 + 0.01 * -0.0099
-----------------
Finished episode: 1571 Reward: 2587.5417 total_loss = 6.6295 = 0.0885 + 0.5 * 13.0823 + 0.01 * -0.0092
-----------------
Finished episode: 1572 Reward: 3596.4051 total_loss = 10.7690 = -0.0613 + 0.5 * 21.6607 + 0.01 * -0.0100
-----------------
Finished episode: 1573 Reward: 3279.7213 total_loss = 10.0834 = 0.0813 + 0.5 * 20.0044 + 0.01 * -0.0102
-----------------
Finished episode: 1574 Reward: 3521.1562 total_loss = 9.6519 = -0.0711 + 0.5 * 19.4463 + 0.01 * -0.0098
-----------------
Finished episode: 1575 Reward: 3014.2771 total_loss = 5.0061 = -0.0083 + 0.5 * 10.0292 + 0.01 * -0.0113
-----------------
Finished episode: 1576 R

Finished episode: 1636 Reward: 3657.1806 total_loss = 10.8475 = -0.0757 + 0.5 * 21.8465 + 0.01 * -0.0100
-----------------
Finished episode: 1637 Reward: 2480.1544 total_loss = 10.5677 = -0.0760 + 0.5 * 21.2875 + 0.01 * -0.0099
-----------------
Finished episode: 1638 Reward: 3264.5766 total_loss = 9.0028 = 0.0344 + 0.5 * 17.9370 + 0.01 * -0.0093
-----------------
Finished episode: 1639 Reward: 3398.9704 total_loss = 10.3683 = -0.1104 + 0.5 * 20.9576 + 0.01 * -0.0106
-----------------
Finished episode: 1640 Reward: 3407.8763 total_loss = 12.0482 = 0.0977 + 0.5 * 23.9012 + 0.01 * -0.0097
-----------------
Finished episode: 1641 Reward: 3027.2872 total_loss = 7.3369 = -0.0303 + 0.5 * 14.7345 + 0.01 * -0.0100
-----------------
Finished episode: 1642 Reward: 3722.0872 total_loss = 14.1475 = 0.0518 + 0.5 * 28.1915 + 0.01 * -0.0099
-----------------
Finished episode: 1643 Reward: 2087.5224 total_loss = 10.0202 = 0.0439 + 0.5 * 19.9528 + 0.01 * -0.0101
-----------------
Finished episode: 1644

Finished episode: 1704 Reward: 2821.0292 total_loss = 10.5471 = 0.0623 + 0.5 * 20.9698 + 0.01 * -0.0095
-----------------
Finished episode: 1705 Reward: 1873.3017 total_loss = 2.7949 = 0.0127 + 0.5 * 5.5646 + 0.01 * -0.0091
-----------------
Finished episode: 1706 Reward: 2774.1296 total_loss = 3.3060 = 0.0793 + 0.5 * 6.4536 + 0.01 * -0.0098
-----------------
Finished episode: 1707 Reward: 2535.0077 total_loss = 10.6340 = -0.0565 + 0.5 * 21.3812 + 0.01 * -0.0104
-----------------
Finished episode: 1708 Reward: 3637.6775 total_loss = 14.4575 = 0.0146 + 0.5 * 28.8861 + 0.01 * -0.0083
-----------------
Finished episode: 1709 Reward: 3257.2447 total_loss = 10.7887 = -0.0572 + 0.5 * 21.6920 + 0.01 * -0.0099
-----------------
Finished episode: 1710 Reward: 2522.4947 total_loss = 9.4820 = -0.0477 + 0.5 * 19.0596 + 0.01 * -0.0085
-----------------
Finished episode: 1711 Reward: 2996.0517 total_loss = 8.8426 = -0.0339 + 0.5 * 17.7533 + 0.01 * -0.0092
-----------------
Finished episode: 1712 Rew

Finished episode: 1772 Reward: 2553.0369 total_loss = 8.3116 = -0.0686 + 0.5 * 16.7605 + 0.01 * -0.0094
-----------------
Finished episode: 1773 Reward: 3356.4452 total_loss = 5.2630 = -0.0083 + 0.5 * 10.5428 + 0.01 * -0.0088
-----------------
Finished episode: 1774 Reward: 3548.7259 total_loss = 13.2332 = 0.0145 + 0.5 * 26.4376 + 0.01 * -0.0094
-----------------
Finished episode: 1775 Reward: 2649.5525 total_loss = 10.9689 = -0.0411 + 0.5 * 22.0202 + 0.01 * -0.0098
-----------------
Finished episode: 1776 Reward: 2678.8471 total_loss = 13.3933 = -0.0181 + 0.5 * 26.8231 + 0.01 * -0.0095
-----------------
Finished episode: 1777 Reward: 3013.6305 total_loss = 8.7675 = 0.0205 + 0.5 * 17.4942 + 0.01 * -0.0101
-----------------
Finished episode: 1778 Reward: 3306.7142 total_loss = 6.9431 = 0.0831 + 0.5 * 13.7200 + 0.01 * -0.0092
-----------------
Finished episode: 1779 Reward: 3615.0490 total_loss = 12.4458 = 0.1810 + 0.5 * 24.5298 + 0.01 * -0.0082
-----------------
Finished episode: 1780 R

Finished episode: 1840 Reward: 2618.9072 total_loss = 11.4367 = -0.0771 + 0.5 * 23.0278 + 0.01 * -0.0092
-----------------
Finished episode: 1841 Reward: 2534.9551 total_loss = 2.4941 = 0.0766 + 0.5 * 4.8352 + 0.01 * -0.0084
-----------------
Finished episode: 1842 Reward: 3604.9298 total_loss = 14.2943 = -0.0354 + 0.5 * 28.6594 + 0.01 * -0.0092
-----------------
Finished episode: 1843 Reward: 2394.9613 total_loss = 9.0935 = 0.0795 + 0.5 * 18.0282 + 0.01 * -0.0080
-----------------
Finished episode: 1844 Reward: 3027.9052 total_loss = 10.0275 = 0.0448 + 0.5 * 19.9656 + 0.01 * -0.0090
-----------------
Finished episode: 1845 Reward: 3261.6527 total_loss = 12.8328 = 0.0335 + 0.5 * 25.5987 + 0.01 * -0.0085
-----------------
Finished episode: 1846 Reward: 2943.2851 total_loss = 15.6693 = -0.0492 + 0.5 * 31.4372 + 0.01 * -0.0097
-----------------
Finished episode: 1847 Reward: 2995.3832 total_loss = 13.5954 = -0.0190 + 0.5 * 27.2290 + 0.01 * -0.0088
-----------------
Finished episode: 1848 

Finished episode: 1908 Reward: 3298.2329 total_loss = 6.6002 = -0.0411 + 0.5 * 13.2826 + 0.01 * -0.0083
-----------------
Finished episode: 1909 Reward: 3263.8303 total_loss = 11.7736 = -0.0154 + 0.5 * 23.5782 + 0.01 * -0.0084
-----------------
Finished episode: 1910 Reward: 2723.0672 total_loss = 7.3106 = 0.0209 + 0.5 * 14.5795 + 0.01 * -0.0078
-----------------
Finished episode: 1911 Reward: 3650.9095 total_loss = 9.1186 = 0.0072 + 0.5 * 18.2230 + 0.01 * -0.0082
-----------------
Finished episode: 1912 Reward: 2216.7626 total_loss = 3.7863 = 0.0779 + 0.5 * 7.4170 + 0.01 * -0.0093
-----------------
Finished episode: 1913 Reward: 3681.5575 total_loss = 12.8515 = 0.0598 + 0.5 * 25.5837 + 0.01 * -0.0081
-----------------
Finished episode: 1914 Reward: 2305.2859 total_loss = 9.6378 = 0.0164 + 0.5 * 19.2429 + 0.01 * -0.0084
-----------------
Finished episode: 1915 Reward: 3727.3920 total_loss = 12.2040 = -0.0714 + 0.5 * 24.5509 + 0.01 * -0.0076
-----------------
Finished episode: 1916 Rewa

Finished episode: 1976 Reward: 3763.4446 total_loss = 13.9295 = 0.0339 + 0.5 * 27.7914 + 0.01 * -0.0085
-----------------
Finished episode: 1977 Reward: 3108.7819 total_loss = 12.7687 = 0.0731 + 0.5 * 25.3912 + 0.01 * -0.0084
-----------------
Finished episode: 1978 Reward: 1995.0330 total_loss = 14.3187 = 0.0717 + 0.5 * 28.4940 + 0.01 * -0.0082
-----------------
Finished episode: 1979 Reward: 2673.3040 total_loss = 9.2238 = 0.0262 + 0.5 * 18.3953 + 0.01 * -0.0079
-----------------
Finished episode: 1980 Reward: 2540.4617 total_loss = 12.3478 = 0.0288 + 0.5 * 24.6381 + 0.01 * -0.0087
-----------------
Finished episode: 1981 Reward: 2339.6607 total_loss = 2.7237 = 0.0799 + 0.5 * 5.2878 + 0.01 * -0.0091
-----------------
Finished episode: 1982 Reward: 2548.6976 total_loss = 13.8644 = 0.0100 + 0.5 * 27.7089 + 0.01 * -0.0086
-----------------
Finished episode: 1983 Reward: 2561.6117 total_loss = 12.9305 = 0.0688 + 0.5 * 25.7236 + 0.01 * -0.0079
-----------------
Finished episode: 1984 Rewa

Finished episode: 45 Reward: 305.4431 total_loss = 6.0175 = -0.0197 + 0.5 * 12.0745 + 0.01 * -0.0048
-----------------
Finished episode: 46 Reward: 382.4144 total_loss = 8.4671 = 0.0804 + 0.5 * 16.7735 + 0.01 * -0.0043
-----------------
Finished episode: 47 Reward: 313.5907 total_loss = 6.3287 = -0.1202 + 0.5 * 12.8980 + 0.01 * -0.0046
-----------------
Finished episode: 48 Reward: 320.1820 total_loss = 5.9646 = -0.0304 + 0.5 * 11.9902 + 0.01 * -0.0046
-----------------
Finished episode: 49 Reward: 279.3060 total_loss = 3.7759 = 0.0810 + 0.5 * 7.3900 + 0.01 * -0.0052
-----------------
Finished episode: 50 Reward: 347.5512 total_loss = 7.1339 = 0.0442 + 0.5 * 14.1795 + 0.01 * -0.0046
-----------------
Finished episode: 51 Reward: 315.8225 total_loss = 5.3371 = 0.1050 + 0.5 * 10.4644 + 0.01 * -0.0045
-----------------
Finished episode: 52 Reward: 289.5725 total_loss = 4.6454 = 0.0046 + 0.5 * 9.2817 + 0.01 * -0.0046
-----------------
Finished episode: 53 Reward: 298.5229 total_loss = 4.31

Finished episode: 115 Reward: 444.4951 total_loss = 2.3855 = -0.0909 + 0.5 * 4.9528 + 0.01 * -0.0057
-----------------
Finished episode: 116 Reward: 427.5062 total_loss = 1.8575 = 0.0202 + 0.5 * 3.6747 + 0.01 * -0.0061
-----------------
Finished episode: 117 Reward: 393.6395 total_loss = 1.1209 = 0.0069 + 0.5 * 2.2281 + 0.01 * -0.0061
-----------------
Finished episode: 118 Reward: 445.1821 total_loss = 2.3894 = -0.0729 + 0.5 * 4.9248 + 0.01 * -0.0056
-----------------
Finished episode: 119 Reward: 456.1285 total_loss = 2.6507 = -0.0747 + 0.5 * 5.4510 + 0.01 * -0.0062
-----------------
Finished episode: 120 Reward: 475.5804 total_loss = 2.5382 = -0.0581 + 0.5 * 5.1927 + 0.01 * -0.0057
-----------------
Finished episode: 121 Reward: 451.0033 total_loss = 3.5797 = -0.0649 + 0.5 * 7.2894 + 0.01 * -0.0062
-----------------
Finished episode: 122 Reward: 450.4825 total_loss = 1.9201 = -0.0467 + 0.5 * 3.9339 + 0.01 * -0.0066
-----------------
Finished episode: 123 Reward: 462.1521 total_loss 

Finished episode: 184 Reward: 657.1355 total_loss = 3.2959 = -0.0325 + 0.5 * 6.6569 + 0.01 * -0.0104
-----------------
Finished episode: 185 Reward: 1095.9825 total_loss = 9.5098 = -0.0148 + 0.5 * 19.0493 + 0.01 * -0.0090
-----------------
Finished episode: 186 Reward: 867.6248 total_loss = 4.9178 = 0.0022 + 0.5 * 9.8314 + 0.01 * -0.0080
-----------------
Finished episode: 187 Reward: 1164.6212 total_loss = 7.2412 = -0.0470 + 0.5 * 14.5765 + 0.01 * -0.0085
-----------------
Finished episode: 188 Reward: 762.5603 total_loss = 4.3507 = -0.0021 + 0.5 * 8.7058 + 0.01 * -0.0083
-----------------
Finished episode: 189 Reward: 703.9018 total_loss = 2.8858 = 0.0027 + 0.5 * 5.7663 + 0.01 * -0.0090
-----------------
Finished episode: 190 Reward: 1386.7766 total_loss = 12.0794 = -0.1342 + 0.5 * 24.4275 + 0.01 * -0.0085
-----------------
Finished episode: 191 Reward: 846.9063 total_loss = 4.9661 = -0.0336 + 0.5 * 9.9997 + 0.01 * -0.0091
-----------------
Finished episode: 192 Reward: 1421.7278 tot

Finished episode: 252 Reward: 1881.3198 total_loss = 7.7628 = -0.0662 + 0.5 * 15.6583 + 0.01 * -0.0115
-----------------
Finished episode: 253 Reward: 1385.5376 total_loss = 5.7893 = -0.0673 + 0.5 * 11.7135 + 0.01 * -0.0134
-----------------
Finished episode: 254 Reward: 1821.2729 total_loss = 5.7887 = 0.0281 + 0.5 * 11.5215 + 0.01 * -0.0132
-----------------
Finished episode: 255 Reward: 1157.9667 total_loss = 4.1859 = -0.1242 + 0.5 * 8.6203 + 0.01 * -0.0116
-----------------
Finished episode: 256 Reward: 1156.8307 total_loss = 3.3565 = 0.0188 + 0.5 * 6.6756 + 0.01 * -0.0125
-----------------
Finished episode: 257 Reward: 2202.4889 total_loss = 6.1651 = -0.0985 + 0.5 * 12.5275 + 0.01 * -0.0121
-----------------
Finished episode: 258 Reward: 1659.3463 total_loss = 4.6074 = 0.0156 + 0.5 * 9.1838 + 0.01 * -0.0120
-----------------
Finished episode: 259 Reward: 1735.6065 total_loss = 7.2790 = -0.0090 + 0.5 * 14.5762 + 0.01 * -0.0133
-----------------
Finished episode: 260 Reward: 1615.786

Finished episode: 320 Reward: 1877.1360 total_loss = 8.1050 = -0.0196 + 0.5 * 16.2495 + 0.01 * -0.0111
-----------------
Finished episode: 321 Reward: 2351.0363 total_loss = 7.0417 = -0.0306 + 0.5 * 14.1449 + 0.01 * -0.0110
-----------------
Finished episode: 322 Reward: 1519.2603 total_loss = 6.0717 = -0.0979 + 0.5 * 12.3396 + 0.01 * -0.0117
-----------------
Finished episode: 323 Reward: 1401.7900 total_loss = 3.3171 = -0.0406 + 0.5 * 6.7157 + 0.01 * -0.0106
-----------------
Finished episode: 324 Reward: 1782.0498 total_loss = 7.8081 = -0.0158 + 0.5 * 15.6480 + 0.01 * -0.0123
-----------------
Finished episode: 325 Reward: 2256.9016 total_loss = 6.6213 = -0.1009 + 0.5 * 13.4445 + 0.01 * -0.0122
-----------------
Finished episode: 326 Reward: 1585.1148 total_loss = 9.5995 = 0.0655 + 0.5 * 19.0682 + 0.01 * -0.0120
-----------------
Finished episode: 327 Reward: 2147.2565 total_loss = 8.7580 = 0.0349 + 0.5 * 17.4465 + 0.01 * -0.0118
-----------------
Finished episode: 328 Reward: 1494.

Finished episode: 388 Reward: 2001.2126 total_loss = 4.4697 = -0.0778 + 0.5 * 9.0953 + 0.01 * -0.0128
-----------------
Finished episode: 389 Reward: 2558.1885 total_loss = 8.3212 = -0.0548 + 0.5 * 16.7523 + 0.01 * -0.0116
-----------------
Finished episode: 390 Reward: 1315.1352 total_loss = 2.1429 = 0.0316 + 0.5 * 4.2229 + 0.01 * -0.0137
-----------------
Finished episode: 391 Reward: 1776.7768 total_loss = 7.1564 = -0.0709 + 0.5 * 14.4549 + 0.01 * -0.0126
-----------------
Finished episode: 392 Reward: 1676.1717 total_loss = 8.9355 = 0.0102 + 0.5 * 17.8509 + 0.01 * -0.0124
-----------------
Finished episode: 393 Reward: 2364.2935 total_loss = 7.2013 = -0.0007 + 0.5 * 14.4043 + 0.01 * -0.0128
-----------------
Finished episode: 394 Reward: 1340.2371 total_loss = 9.9722 = -0.0708 + 0.5 * 20.0863 + 0.01 * -0.0124
-----------------
Finished episode: 395 Reward: 1660.7747 total_loss = 7.0328 = -0.0711 + 0.5 * 14.2081 + 0.01 * -0.0126
-----------------
Finished episode: 396 Reward: 2122.2

Finished episode: 456 Reward: 1365.6170 total_loss = 2.1511 = -0.1183 + 0.5 * 4.5391 + 0.01 * -0.0191
-----------------
Finished episode: 457 Reward: 2186.4325 total_loss = 7.6515 = 0.0657 + 0.5 * 15.1718 + 0.01 * -0.0180
-----------------
Finished episode: 458 Reward: 1475.4588 total_loss = 3.9223 = -0.0395 + 0.5 * 7.9240 + 0.01 * -0.0185
-----------------
Finished episode: 459 Reward: 2117.0538 total_loss = 6.3419 = -0.1319 + 0.5 * 12.9479 + 0.01 * -0.0171
-----------------
Finished episode: 460 Reward: 1790.2442 total_loss = 2.8743 = -0.0016 + 0.5 * 5.7521 + 0.01 * -0.0170
-----------------
Finished episode: 461 Reward: 2759.0046 total_loss = 10.5584 = -0.0340 + 0.5 * 21.1851 + 0.01 * -0.0178
-----------------
Finished episode: 462 Reward: 2158.2457 total_loss = 11.0484 = -0.0023 + 0.5 * 22.1017 + 0.01 * -0.0187
-----------------
Finished episode: 463 Reward: 2212.5485 total_loss = 11.8671 = -0.0493 + 0.5 * 23.8331 + 0.01 * -0.0181
-----------------
Finished episode: 464 Reward: 171

Finished episode: 524 Reward: 1646.2146 total_loss = 3.8202 = 0.0233 + 0.5 * 7.5942 + 0.01 * -0.0186
-----------------
Finished episode: 525 Reward: 3059.5278 total_loss = 10.8409 = 0.0971 + 0.5 * 21.4880 + 0.01 * -0.0177
-----------------
Finished episode: 526 Reward: 2591.5045 total_loss = 8.7422 = -0.0621 + 0.5 * 17.6090 + 0.01 * -0.0200
-----------------
Finished episode: 527 Reward: 2827.6118 total_loss = 8.1426 = -0.0460 + 0.5 * 16.3774 + 0.01 * -0.0171
-----------------
Finished episode: 528 Reward: 3171.7027 total_loss = 10.3594 = -0.0354 + 0.5 * 20.7901 + 0.01 * -0.0187
-----------------
Finished episode: 529 Reward: 2758.8288 total_loss = 11.9563 = -0.0228 + 0.5 * 23.9587 + 0.01 * -0.0168
-----------------
Finished episode: 530 Reward: 3147.8130 total_loss = 13.4554 = 0.0556 + 0.5 * 26.8000 + 0.01 * -0.0189
-----------------
Finished episode: 531 Reward: 1974.8822 total_loss = 1.7653 = -0.0345 + 0.5 * 3.5998 + 0.01 * -0.0187
-----------------
Finished episode: 532 Reward: 294

Finished episode: 592 Reward: 2977.3171 total_loss = 9.6591 = -0.0664 + 0.5 * 19.4513 + 0.01 * -0.0173
-----------------
Finished episode: 593 Reward: 2699.0533 total_loss = 9.6418 = -0.0161 + 0.5 * 19.3163 + 0.01 * -0.0197
-----------------
Finished episode: 594 Reward: 3134.4302 total_loss = 13.1307 = 0.0179 + 0.5 * 26.2260 + 0.01 * -0.0168
-----------------
Finished episode: 595 Reward: 2539.1107 total_loss = 7.0546 = 0.0190 + 0.5 * 14.0715 + 0.01 * -0.0195
-----------------
Finished episode: 596 Reward: 3133.3717 total_loss = 11.5806 = -0.0762 + 0.5 * 23.3139 + 0.01 * -0.0189
-----------------
Finished episode: 597 Reward: 3184.7903 total_loss = 9.6100 = -0.0285 + 0.5 * 19.2774 + 0.01 * -0.0167
-----------------
Finished episode: 598 Reward: 3244.8604 total_loss = 12.0699 = -0.1092 + 0.5 * 24.3585 + 0.01 * -0.0189
-----------------
Finished episode: 599 Reward: 3000.3386 total_loss = 11.5314 = 0.1110 + 0.5 * 22.8411 + 0.01 * -0.0180
-----------------
Finished episode: 600 Reward: 2

Finished episode: 660 Reward: 2828.7588 total_loss = 3.8161 = 0.0578 + 0.5 * 7.5169 + 0.01 * -0.0161
-----------------
Finished episode: 661 Reward: 2711.9273 total_loss = 8.8626 = -0.0428 + 0.5 * 17.8112 + 0.01 * -0.0181
-----------------
Finished episode: 662 Reward: 3189.6982 total_loss = 10.3136 = -0.0179 + 0.5 * 20.6633 + 0.01 * -0.0170
-----------------
Finished episode: 663 Reward: 2903.7612 total_loss = 11.8710 = 0.0549 + 0.5 * 23.6324 + 0.01 * -0.0166
-----------------
Finished episode: 664 Reward: 3207.8531 total_loss = 6.9252 = -0.0622 + 0.5 * 13.9750 + 0.01 * -0.0178
-----------------
Finished episode: 665 Reward: 1926.3281 total_loss = 5.3582 = 0.0532 + 0.5 * 10.6105 + 0.01 * -0.0197
-----------------
Finished episode: 666 Reward: 2785.6794 total_loss = 7.4910 = 0.0130 + 0.5 * 14.9566 + 0.01 * -0.0195
-----------------
Finished episode: 667 Reward: 2471.7923 total_loss = 1.6885 = -0.0592 + 0.5 * 3.4958 + 0.01 * -0.0184
-----------------
Finished episode: 668 Reward: 3536.8

Finished episode: 728 Reward: 3634.8065 total_loss = 17.0678 = -0.0214 + 0.5 * 34.1789 + 0.01 * -0.0230
-----------------
Finished episode: 729 Reward: 2829.0401 total_loss = 9.3797 = -0.0123 + 0.5 * 18.7845 + 0.01 * -0.0249
-----------------
Finished episode: 730 Reward: 3562.2200 total_loss = 16.4088 = 0.1546 + 0.5 * 32.5088 + 0.01 * -0.0222
-----------------
Finished episode: 731 Reward: 3162.6686 total_loss = 10.3916 = 0.0503 + 0.5 * 20.6831 + 0.01 * -0.0213
-----------------
Finished episode: 732 Reward: 3659.1207 total_loss = 13.6669 = -0.0909 + 0.5 * 27.5160 + 0.01 * -0.0232
-----------------
Finished episode: 733 Reward: 3701.1130 total_loss = 12.8282 = 0.1000 + 0.5 * 25.4568 + 0.01 * -0.0211
-----------------
Finished episode: 734 Reward: 3690.3887 total_loss = 16.6554 = 0.0206 + 0.5 * 33.2702 + 0.01 * -0.0237
-----------------
Finished episode: 735 Reward: 2964.8031 total_loss = 5.5111 = -0.0631 + 0.5 * 11.1487 + 0.01 * -0.0202
-----------------
Finished episode: 736 Reward: 

Finished episode: 796 Reward: 3142.2361 total_loss = 11.7866 = 0.0100 + 0.5 * 23.5537 + 0.01 * -0.0230
-----------------
Finished episode: 797 Reward: 3799.4951 total_loss = 11.9038 = 0.0651 + 0.5 * 23.6779 + 0.01 * -0.0222
-----------------
Finished episode: 798 Reward: 3220.1319 total_loss = 12.2762 = -0.0329 + 0.5 * 24.6187 + 0.01 * -0.0197
-----------------
Finished episode: 799 Reward: 3938.2209 total_loss = 10.3316 = -0.1169 + 0.5 * 20.8975 + 0.01 * -0.0226
-----------------
Finished episode: 800 Reward: 3865.3409 total_loss = 15.8038 = -0.0076 + 0.5 * 31.6234 + 0.01 * -0.0238
-----------------
Finished episode: 801 Reward: 3919.6703 total_loss = 12.2856 = -0.0380 + 0.5 * 24.6475 + 0.01 * -0.0204
-----------------
Finished episode: 802 Reward: 3031.7343 total_loss = 7.9586 = -0.0820 + 0.5 * 16.0816 + 0.01 * -0.0204
-----------------
Finished episode: 803 Reward: 2914.2750 total_loss = 16.1705 = 0.0174 + 0.5 * 32.3066 + 0.01 * -0.0232
-----------------
Finished episode: 804 Reward

Finished episode: 864 Reward: 3235.8073 total_loss = 10.4071 = -0.0707 + 0.5 * 20.9560 + 0.01 * -0.0244
-----------------
Finished episode: 865 Reward: 3160.4946 total_loss = 13.9274 = 0.0557 + 0.5 * 27.7437 + 0.01 * -0.0203
-----------------
Finished episode: 866 Reward: 3842.6501 total_loss = 14.3764 = 0.0413 + 0.5 * 28.6707 + 0.01 * -0.0221
-----------------
Finished episode: 867 Reward: 3437.9338 total_loss = 9.3689 = -0.0710 + 0.5 * 18.8802 + 0.01 * -0.0222
-----------------
Finished episode: 868 Reward: 2854.2864 total_loss = 5.9775 = -0.0365 + 0.5 * 12.0285 + 0.01 * -0.0221
-----------------
Finished episode: 869 Reward: 3373.8284 total_loss = 10.2183 = 0.0681 + 0.5 * 20.3006 + 0.01 * -0.0196
-----------------
Finished episode: 870 Reward: 3611.7492 total_loss = 9.6336 = -0.0038 + 0.5 * 19.2753 + 0.01 * -0.0229
-----------------
Finished episode: 871 Reward: 3663.6372 total_loss = 9.2243 = -0.0165 + 0.5 * 18.4821 + 0.01 * -0.0217
-----------------
Finished episode: 872 Reward: 3

Finished episode: 932 Reward: 3595.5999 total_loss = 15.0679 = -0.0305 + 0.5 * 30.1973 + 0.01 * -0.0245
-----------------
Finished episode: 933 Reward: 3613.9320 total_loss = 15.1542 = 0.0823 + 0.5 * 30.1443 + 0.01 * -0.0219
-----------------
Finished episode: 934 Reward: 3547.3517 total_loss = 6.5996 = -0.0192 + 0.5 * 13.2379 + 0.01 * -0.0212
-----------------
Finished episode: 935 Reward: 3665.0155 total_loss = 13.0971 = -0.0280 + 0.5 * 26.2507 + 0.01 * -0.0211
-----------------
Finished episode: 936 Reward: 3628.1080 total_loss = 12.3271 = -0.0244 + 0.5 * 24.7034 + 0.01 * -0.0237
-----------------
Finished episode: 937 Reward: 3619.8874 total_loss = 11.4191 = 0.0195 + 0.5 * 22.7997 + 0.01 * -0.0219
-----------------
Finished episode: 938 Reward: 3111.1619 total_loss = 8.5415 = -0.0151 + 0.5 * 17.1136 + 0.01 * -0.0193
-----------------
Finished episode: 939 Reward: 3576.2972 total_loss = 11.5926 = 0.0865 + 0.5 * 23.0126 + 0.01 * -0.0219
-----------------
Finished episode: 940 Reward:

Finished episode: 1000 Reward: 3800.4803 total_loss = 9.3496 = 0.0752 + 0.5 * 18.5492 + 0.01 * -0.0203
-----------------
Finished episode: 1001 Reward: 3297.6457 total_loss = 8.3553 = -0.0099 + 0.5 * 16.7309 + 0.01 * -0.0227
-----------------
Finished episode: 1002 Reward: 3315.1787 total_loss = 13.4602 = -0.0353 + 0.5 * 26.9916 + 0.01 * -0.0219
-----------------
Finished episode: 1003 Reward: 3977.4232 total_loss = 15.4589 = 0.0544 + 0.5 * 30.8093 + 0.01 * -0.0196
-----------------
Finished episode: 1004 Reward: 3852.9885 total_loss = 8.7268 = 0.0554 + 0.5 * 17.3432 + 0.01 * -0.0218
-----------------
Finished episode: 1005 Reward: 3729.1445 total_loss = 8.1785 = -0.0715 + 0.5 * 16.5004 + 0.01 * -0.0225
-----------------
Finished episode: 1006 Reward: 3870.5284 total_loss = 9.0337 = 0.0270 + 0.5 * 18.0139 + 0.01 * -0.0236
-----------------
Finished episode: 1007 Reward: 4011.7380 total_loss = 11.4703 = -0.0527 + 0.5 * 23.0464 + 0.01 * -0.0196
-----------------
Finished episode: 1008 Re

Finished episode: 1068 Reward: 2580.9316 total_loss = 12.2903 = -0.0088 + 0.5 * 24.5986 + 0.01 * -0.0233
-----------------
Finished episode: 1069 Reward: 3488.6013 total_loss = 9.4947 = -0.0629 + 0.5 * 19.1157 + 0.01 * -0.0243
-----------------
Finished episode: 1070 Reward: 2845.5909 total_loss = 14.7572 = -0.0140 + 0.5 * 29.5430 + 0.01 * -0.0261
-----------------
Finished episode: 1071 Reward: 2690.4841 total_loss = 10.5455 = 0.0168 + 0.5 * 21.0579 + 0.01 * -0.0238
-----------------
Finished episode: 1072 Reward: 3943.8760 total_loss = 9.7998 = -0.0583 + 0.5 * 19.7166 + 0.01 * -0.0248
-----------------
Finished episode: 1073 Reward: 3882.1266 total_loss = 12.5414 = 0.0457 + 0.5 * 24.9919 + 0.01 * -0.0223
-----------------
Finished episode: 1074 Reward: 3522.6266 total_loss = 12.3293 = 0.0758 + 0.5 * 24.5074 + 0.01 * -0.0247
-----------------
Finished episode: 1075 Reward: 3021.8230 total_loss = 8.8486 = 0.0623 + 0.5 * 17.5730 + 0.01 * -0.0230
-----------------
Finished episode: 1076 

Finished episode: 1136 Reward: 3629.3093 total_loss = 12.8365 = 0.0420 + 0.5 * 25.5895 + 0.01 * -0.0235
-----------------
Finished episode: 1137 Reward: 3893.4081 total_loss = 13.0149 = -0.0457 + 0.5 * 26.1217 + 0.01 * -0.0239
-----------------
Finished episode: 1138 Reward: 3357.9610 total_loss = 10.0321 = 0.0099 + 0.5 * 20.0450 + 0.01 * -0.0233
-----------------
Finished episode: 1139 Reward: 3783.8111 total_loss = 11.4428 = 0.0209 + 0.5 * 22.8442 + 0.01 * -0.0189
-----------------
Finished episode: 1140 Reward: 3356.5829 total_loss = 9.7662 = 0.1096 + 0.5 * 19.3136 + 0.01 * -0.0220
-----------------
Finished episode: 1141 Reward: 2085.4497 total_loss = 4.1649 = -0.0765 + 0.5 * 8.4832 + 0.01 * -0.0224
-----------------
Finished episode: 1142 Reward: 2942.1576 total_loss = 7.3245 = -0.0226 + 0.5 * 14.6947 + 0.01 * -0.0244
-----------------
Finished episode: 1143 Reward: 2473.7050 total_loss = 8.6291 = 0.0588 + 0.5 * 17.1412 + 0.01 * -0.0226
-----------------
Finished episode: 1144 Rew

Finished episode: 1204 Reward: 3239.2637 total_loss = 5.7463 = 0.1224 + 0.5 * 11.2482 + 0.01 * -0.0197
-----------------
Finished episode: 1205 Reward: 2805.8995 total_loss = 9.9436 = 0.0185 + 0.5 * 19.8505 + 0.01 * -0.0190
-----------------
Finished episode: 1206 Reward: 2722.0466 total_loss = 4.2490 = -0.0575 + 0.5 * 8.6134 + 0.01 * -0.0197
-----------------
Finished episode: 1207 Reward: 2592.7633 total_loss = 10.6614 = 0.1031 + 0.5 * 21.1170 + 0.01 * -0.0204
-----------------
Finished episode: 1208 Reward: 2359.6474 total_loss = 4.8749 = -0.1666 + 0.5 * 10.0833 + 0.01 * -0.0215
-----------------
Finished episode: 1209 Reward: 3153.5489 total_loss = 11.2547 = -0.0591 + 0.5 * 22.6281 + 0.01 * -0.0201
-----------------
Finished episode: 1210 Reward: 3799.1853 total_loss = 13.8061 = -0.0881 + 0.5 * 27.7888 + 0.01 * -0.0211
-----------------
Finished episode: 1211 Reward: 3872.3274 total_loss = 12.0185 = -0.0956 + 0.5 * 24.2285 + 0.01 * -0.0202
-----------------
Finished episode: 1212 R

Finished episode: 1272 Reward: 3522.1454 total_loss = 6.4748 = 0.0002 + 0.5 * 12.9495 + 0.01 * -0.0195
-----------------
Finished episode: 1273 Reward: 3934.6821 total_loss = 14.3679 = 0.0759 + 0.5 * 28.5845 + 0.01 * -0.0214
-----------------
Finished episode: 1274 Reward: 3927.1120 total_loss = 10.6939 = -0.0618 + 0.5 * 21.5117 + 0.01 * -0.0194
-----------------
Finished episode: 1275 Reward: 3546.1951 total_loss = 8.9453 = 0.0283 + 0.5 * 17.8343 + 0.01 * -0.0209
-----------------
Finished episode: 1276 Reward: 3970.7877 total_loss = 14.4966 = -0.1041 + 0.5 * 29.2017 + 0.01 * -0.0191
-----------------
Finished episode: 1277 Reward: 3877.0848 total_loss = 13.0646 = 0.0213 + 0.5 * 26.0869 + 0.01 * -0.0202
-----------------
Finished episode: 1278 Reward: 3527.7157 total_loss = 12.1072 = -0.0324 + 0.5 * 24.2796 + 0.01 * -0.0192
-----------------
Finished episode: 1279 Reward: 3763.1038 total_loss = 11.1367 = -0.0295 + 0.5 * 22.3329 + 0.01 * -0.0218
-----------------
Finished episode: 1280

Finished episode: 1340 Reward: 3645.5633 total_loss = 12.0227 = -0.0461 + 0.5 * 24.1380 + 0.01 * -0.0195
-----------------
Finished episode: 1341 Reward: 3502.5462 total_loss = 10.3032 = -0.0286 + 0.5 * 20.6640 + 0.01 * -0.0226
-----------------
Finished episode: 1342 Reward: 3968.5425 total_loss = 16.9274 = -0.0266 + 0.5 * 33.9086 + 0.01 * -0.0212
-----------------
Finished episode: 1343 Reward: 3313.6853 total_loss = 8.2855 = -0.0680 + 0.5 * 16.7075 + 0.01 * -0.0197
-----------------
Finished episode: 1344 Reward: 3431.7711 total_loss = 10.8263 = -0.0210 + 0.5 * 21.6950 + 0.01 * -0.0211
-----------------
Finished episode: 1345 Reward: 3335.0793 total_loss = 9.0946 = -0.0436 + 0.5 * 18.2768 + 0.01 * -0.0222
-----------------
Finished episode: 1346 Reward: 3304.6345 total_loss = 18.2456 = 0.0281 + 0.5 * 36.4353 + 0.01 * -0.0190
-----------------
Finished episode: 1347 Reward: 3825.5379 total_loss = 9.3528 = -0.0730 + 0.5 * 18.8519 + 0.01 * -0.0186
-----------------
Finished episode: 13

Finished episode: 1407 Reward: 3948.2205 total_loss = 11.4249 = -0.0045 + 0.5 * 22.8591 + 0.01 * -0.0177
-----------------
Finished episode: 1408 Reward: 3830.3693 total_loss = 8.6639 = -0.0306 + 0.5 * 17.3894 + 0.01 * -0.0191
-----------------
Finished episode: 1409 Reward: 3519.8837 total_loss = 13.6880 = -0.0691 + 0.5 * 27.5147 + 0.01 * -0.0210
-----------------
Finished episode: 1410 Reward: 3926.4587 total_loss = 12.8474 = -0.0266 + 0.5 * 25.7485 + 0.01 * -0.0161
-----------------
Finished episode: 1411 Reward: 3736.9711 total_loss = 9.0455 = 0.0510 + 0.5 * 17.9892 + 0.01 * -0.0193
-----------------
Finished episode: 1412 Reward: 3853.8115 total_loss = 18.3381 = 0.1060 + 0.5 * 36.4645 + 0.01 * -0.0177
-----------------
Finished episode: 1413 Reward: 3820.8526 total_loss = 6.0541 = -0.1001 + 0.5 * 12.3089 + 0.01 * -0.0205
-----------------
Finished episode: 1414 Reward: 3646.6836 total_loss = 14.1173 = 0.0126 + 0.5 * 28.2098 + 0.01 * -0.0186
-----------------
Finished episode: 1415

Finished episode: 1475 Reward: 3889.4579 total_loss = 9.8615 = -0.0836 + 0.5 * 19.8906 + 0.01 * -0.0161
-----------------
Finished episode: 1476 Reward: 3958.2613 total_loss = 13.6641 = 0.0905 + 0.5 * 27.1476 + 0.01 * -0.0166
-----------------
Finished episode: 1477 Reward: 3695.3157 total_loss = 7.1079 = -0.0889 + 0.5 * 14.3938 + 0.01 * -0.0168
-----------------
Finished episode: 1478 Reward: 3117.7817 total_loss = 11.0180 = 0.1030 + 0.5 * 21.8302 + 0.01 * -0.0161
-----------------
Finished episode: 1479 Reward: 3810.6413 total_loss = 9.2214 = -0.0530 + 0.5 * 18.5491 + 0.01 * -0.0157
-----------------
Finished episode: 1480 Reward: 3758.3070 total_loss = 14.8895 = 0.0644 + 0.5 * 29.6505 + 0.01 * -0.0170
-----------------
Finished episode: 1481 Reward: 3777.9536 total_loss = 13.9920 = 0.0275 + 0.5 * 27.9293 + 0.01 * -0.0157
-----------------
Finished episode: 1482 Reward: 3770.1498 total_loss = 14.2647 = -0.0524 + 0.5 * 28.6345 + 0.01 * -0.0182
-----------------
Finished episode: 1483 

Finished episode: 1542 Reward: 3778.0178 total_loss = 16.4034 = 0.0380 + 0.5 * 32.7312 + 0.01 * -0.0147
-----------------
Finished episode: 1543 Reward: 2553.0756 total_loss = 13.9647 = -0.0095 + 0.5 * 27.9486 + 0.01 * -0.0145
-----------------
Finished episode: 1544 Reward: 2583.0315 total_loss = 9.3018 = 0.0388 + 0.5 * 18.5263 + 0.01 * -0.0139
-----------------
Finished episode: 1545 Reward: 3556.0755 total_loss = 12.8133 = 0.0343 + 0.5 * 25.5582 + 0.01 * -0.0150
-----------------
Finished episode: 1546 Reward: 3811.1646 total_loss = 16.7610 = 0.0408 + 0.5 * 33.4407 + 0.01 * -0.0136
-----------------
Finished episode: 1547 Reward: 3494.0630 total_loss = 9.5242 = -0.0107 + 0.5 * 19.0701 + 0.01 * -0.0153
-----------------
Finished episode: 1548 Reward: 3782.3488 total_loss = 14.1926 = 0.0259 + 0.5 * 28.3337 + 0.01 * -0.0156
-----------------
Finished episode: 1549 Reward: 3396.5532 total_loss = 8.6938 = -0.0307 + 0.5 * 17.4492 + 0.01 * -0.0149
-----------------
Finished episode: 1550 R

Finished episode: 1609 Reward: 3516.9199 total_loss = 11.5412 = -0.0142 + 0.5 * 23.1110 + 0.01 * -0.0142
-----------------
Finished episode: 1610 Reward: 3697.4311 total_loss = 13.5339 = -0.0095 + 0.5 * 27.0871 + 0.01 * -0.0148
-----------------
Finished episode: 1611 Reward: 3816.4274 total_loss = 14.9378 = -0.0334 + 0.5 * 29.9425 + 0.01 * -0.0129
-----------------
Finished episode: 1612 Reward: 3729.4287 total_loss = 13.6992 = -0.0354 + 0.5 * 27.4694 + 0.01 * -0.0143
-----------------
Finished episode: 1613 Reward: 3651.1788 total_loss = 12.6761 = -0.0978 + 0.5 * 25.5482 + 0.01 * -0.0159
-----------------
Finished episode: 1614 Reward: 2665.6834 total_loss = 11.1604 = -0.0687 + 0.5 * 22.4585 + 0.01 * -0.0139
-----------------
Finished episode: 1615 Reward: 3762.3043 total_loss = 13.1416 = 0.0555 + 0.5 * 26.1724 + 0.01 * -0.0138
-----------------
Finished episode: 1616 Reward: 3699.3406 total_loss = 11.2589 = 0.0098 + 0.5 * 22.4984 + 0.01 * -0.0133
-----------------
Finished episode: 

Finished episode: 1676 Reward: 3745.8357 total_loss = 10.1568 = 0.0648 + 0.5 * 20.1843 + 0.01 * -0.0132
-----------------
Finished episode: 1677 Reward: 3428.9837 total_loss = 9.9451 = -0.0121 + 0.5 * 19.9148 + 0.01 * -0.0124
-----------------
Finished episode: 1678 Reward: 3777.3424 total_loss = 13.2962 = -0.0350 + 0.5 * 26.6625 + 0.01 * -0.0130
-----------------
Finished episode: 1679 Reward: 3753.7325 total_loss = 14.0259 = 0.0436 + 0.5 * 27.9648 + 0.01 * -0.0121
-----------------
Finished episode: 1680 Reward: 3538.9330 total_loss = 10.7835 = -0.0373 + 0.5 * 21.6417 + 0.01 * -0.0116
-----------------
Finished episode: 1681 Reward: 3950.6451 total_loss = 15.6316 = -0.0326 + 0.5 * 31.3287 + 0.01 * -0.0115
-----------------
Finished episode: 1682 Reward: 2798.7965 total_loss = 13.2379 = -0.1668 + 0.5 * 26.8096 + 0.01 * -0.0115
-----------------
Finished episode: 1683 Reward: 3720.0479 total_loss = 16.2506 = 0.0903 + 0.5 * 32.3209 + 0.01 * -0.0130
-----------------
Finished episode: 16

Finished episode: 1744 Reward: 3978.7856 total_loss = 14.1990 = 0.0055 + 0.5 * 28.3873 + 0.01 * -0.0121
-----------------
Finished episode: 1745 Reward: 3609.2369 total_loss = 13.3545 = 0.0280 + 0.5 * 26.6531 + 0.01 * -0.0110
-----------------
Finished episode: 1746 Reward: 3697.9134 total_loss = 10.4328 = 0.0762 + 0.5 * 20.7135 + 0.01 * -0.0114
-----------------
Finished episode: 1747 Reward: 2594.4550 total_loss = 13.9458 = 0.0658 + 0.5 * 27.7603 + 0.01 * -0.0099
-----------------
Finished episode: 1748 Reward: 2464.0058 total_loss = 10.2204 = 0.0782 + 0.5 * 20.2846 + 0.01 * -0.0106
-----------------
Finished episode: 1749 Reward: 3503.1786 total_loss = 7.8607 = 0.0038 + 0.5 * 15.7140 + 0.01 * -0.0110
-----------------
Finished episode: 1750 Reward: 3903.4005 total_loss = 15.0245 = -0.0310 + 0.5 * 30.1113 + 0.01 * -0.0109
-----------------
Finished episode: 1751 Reward: 3734.2348 total_loss = 8.8373 = -0.0298 + 0.5 * 17.7346 + 0.01 * -0.0111
-----------------
Finished episode: 1752 R

Finished episode: 1812 Reward: 1874.6983 total_loss = 9.5180 = -0.0283 + 0.5 * 19.0928 + 0.01 * -0.0122
-----------------
Finished episode: 1813 Reward: 3630.3950 total_loss = 9.8276 = -0.0738 + 0.5 * 19.8030 + 0.01 * -0.0118
-----------------
Finished episode: 1814 Reward: 3189.7817 total_loss = 7.0219 = 0.0194 + 0.5 * 14.0053 + 0.01 * -0.0115
-----------------
Finished episode: 1815 Reward: 2134.2571 total_loss = 6.9951 = 0.0290 + 0.5 * 13.9326 + 0.01 * -0.0115
-----------------
Finished episode: 1816 Reward: 2820.0143 total_loss = 4.3269 = 0.0592 + 0.5 * 8.5357 + 0.01 * -0.0108
-----------------
Finished episode: 1817 Reward: 3470.6131 total_loss = 13.6395 = 0.0135 + 0.5 * 27.2523 + 0.01 * -0.0120
-----------------
Finished episode: 1818 Reward: 2758.1614 total_loss = 10.0953 = 0.1310 + 0.5 * 19.9289 + 0.01 * -0.0103
-----------------
Finished episode: 1819 Reward: 3861.1819 total_loss = 14.8281 = 0.0172 + 0.5 * 29.6220 + 0.01 * -0.0117
-----------------
Finished episode: 1820 Rewar

Finished episode: 1880 Reward: 2968.6901 total_loss = 9.5397 = 0.0712 + 0.5 * 18.9372 + 0.01 * -0.0118
-----------------
Finished episode: 1881 Reward: 3276.9014 total_loss = 9.2151 = 0.0484 + 0.5 * 18.3336 + 0.01 * -0.0099
-----------------
Finished episode: 1882 Reward: 2618.0270 total_loss = 3.9901 = 0.1201 + 0.5 * 7.7403 + 0.01 * -0.0110
-----------------
Finished episode: 1883 Reward: 3586.1350 total_loss = 10.3082 = -0.0473 + 0.5 * 20.7113 + 0.01 * -0.0101
-----------------
Finished episode: 1884 Reward: 3574.1149 total_loss = 12.3705 = 0.0304 + 0.5 * 24.6803 + 0.01 * -0.0104
-----------------
Finished episode: 1885 Reward: 3868.2727 total_loss = 10.8895 = 0.0284 + 0.5 * 21.7225 + 0.01 * -0.0100
-----------------
Finished episode: 1886 Reward: 3063.2140 total_loss = 11.2631 = -0.0873 + 0.5 * 22.7011 + 0.01 * -0.0105
-----------------
Finished episode: 1887 Reward: 3399.4096 total_loss = 7.7123 = -0.0474 + 0.5 * 15.5195 + 0.01 * -0.0116
-----------------
Finished episode: 1888 Rew

Finished episode: 1948 Reward: 2720.0638 total_loss = 14.5981 = 0.0059 + 0.5 * 29.1846 + 0.01 * -0.0104
-----------------
Finished episode: 1949 Reward: 3806.1728 total_loss = 15.9630 = 0.0603 + 0.5 * 31.8055 + 0.01 * -0.0096
-----------------
Finished episode: 1950 Reward: 3222.6555 total_loss = 10.3576 = -0.0136 + 0.5 * 20.7425 + 0.01 * -0.0098
-----------------
Finished episode: 1951 Reward: 3817.2233 total_loss = 13.2574 = -0.0539 + 0.5 * 26.6229 + 0.01 * -0.0097
-----------------
Finished episode: 1952 Reward: 2644.3928 total_loss = 19.4613 = 0.1137 + 0.5 * 38.6954 + 0.01 * -0.0106
-----------------
Finished episode: 1953 Reward: 3379.0440 total_loss = 8.8378 = -0.0055 + 0.5 * 17.6868 + 0.01 * -0.0106
-----------------
Finished episode: 1954 Reward: 3168.8013 total_loss = 7.2429 = -0.0086 + 0.5 * 14.5032 + 0.01 * -0.0103
-----------------
Finished episode: 1955 Reward: 3560.1424 total_loss = 10.8443 = -0.1417 + 0.5 * 21.9722 + 0.01 * -0.0112
-----------------
Finished episode: 195

Finished episode: 16 Reward: 213.4363 total_loss = 8.9073 = -0.0530 + 0.5 * 17.9207 + 0.01 * -0.0040
-----------------
Finished episode: 17 Reward: 221.2131 total_loss = 7.9548 = -0.0398 + 0.5 * 15.9893 + 0.01 * -0.0041
-----------------
Finished episode: 18 Reward: 194.9868 total_loss = 7.0025 = 0.0512 + 0.5 * 13.9027 + 0.01 * -0.0041
-----------------
Finished episode: 19 Reward: 222.3162 total_loss = 8.6441 = -0.0279 + 0.5 * 17.3442 + 0.01 * -0.0045
-----------------
Finished episode: 20 Reward: 260.2445 total_loss = 10.3349 = -0.0204 + 0.5 * 20.7107 + 0.01 * -0.0043
-----------------
Finished episode: 21 Reward: 351.8821 total_loss = 17.9293 = 0.0082 + 0.5 * 35.8422 + 0.01 * -0.0039
-----------------
Finished episode: 22 Reward: 321.5580 total_loss = 13.2969 = -0.0231 + 0.5 * 26.6401 + 0.01 * -0.0039
-----------------
Finished episode: 23 Reward: 268.8733 total_loss = 9.0573 = 0.1020 + 0.5 * 17.9108 + 0.01 * -0.0046
-----------------
Finished episode: 24 Reward: 285.3854 total_loss

Finished episode: 86 Reward: 345.4884 total_loss = 3.0368 = -0.0165 + 0.5 * 6.1066 + 0.01 * -0.0062
-----------------
Finished episode: 87 Reward: 354.1045 total_loss = 2.2481 = 0.0593 + 0.5 * 4.3777 + 0.01 * -0.0060
-----------------
Finished episode: 88 Reward: 412.0983 total_loss = 3.5116 = 0.0520 + 0.5 * 6.9192 + 0.01 * -0.0063
-----------------
Finished episode: 89 Reward: 392.5656 total_loss = 2.7019 = 0.0458 + 0.5 * 5.3122 + 0.01 * -0.0054
-----------------
Finished episode: 90 Reward: 382.0905 total_loss = 3.0515 = -0.0061 + 0.5 * 6.1153 + 0.01 * -0.0059
-----------------
Finished episode: 91 Reward: 443.0980 total_loss = 3.4890 = -0.0484 + 0.5 * 7.0751 + 0.01 * -0.0061
-----------------
Finished episode: 92 Reward: 376.4919 total_loss = 2.7731 = -0.0286 + 0.5 * 5.6036 + 0.01 * -0.0062
-----------------
Finished episode: 93 Reward: 383.0610 total_loss = 2.6964 = -0.0370 + 0.5 * 5.4671 + 0.01 * -0.0056
-----------------
Finished episode: 94 Reward: 409.2650 total_loss = 2.6834 =

Finished episode: 155 Reward: 1058.7044 total_loss = 12.8839 = -0.0117 + 0.5 * 25.7912 + 0.01 * -0.0091
-----------------
Finished episode: 156 Reward: 1030.5698 total_loss = 13.8999 = -0.0486 + 0.5 * 27.8972 + 0.01 * -0.0078
-----------------
Finished episode: 157 Reward: 936.6734 total_loss = 10.7397 = -0.0902 + 0.5 * 21.6601 + 0.01 * -0.0086
-----------------
Finished episode: 158 Reward: 1070.2918 total_loss = 14.7155 = -0.1029 + 0.5 * 29.6371 + 0.01 * -0.0086
-----------------
Finished episode: 159 Reward: 942.4351 total_loss = 7.4927 = -0.0381 + 0.5 * 15.0618 + 0.01 * -0.0084
-----------------
Finished episode: 160 Reward: 813.0534 total_loss = 8.6064 = 0.0069 + 0.5 * 17.1990 + 0.01 * -0.0081
-----------------
Finished episode: 161 Reward: 1057.6275 total_loss = 8.1493 = -0.0743 + 0.5 * 16.4474 + 0.01 * -0.0090
-----------------
Finished episode: 162 Reward: 821.5023 total_loss = 5.3981 = 0.0218 + 0.5 * 10.7527 + 0.01 * -0.0094
-----------------
Finished episode: 163 Reward: 958.

Finished episode: 224 Reward: 1072.8048 total_loss = 6.8520 = -0.1024 + 0.5 * 13.9091 + 0.01 * -0.0125
-----------------
Finished episode: 225 Reward: 1109.7868 total_loss = 5.5471 = -0.0167 + 0.5 * 11.1278 + 0.01 * -0.0118
-----------------
Finished episode: 226 Reward: 1044.4494 total_loss = 4.7577 = -0.1436 + 0.5 * 9.8029 + 0.01 * -0.0101
-----------------
Finished episode: 227 Reward: 1553.6808 total_loss = 10.8681 = -0.0871 + 0.5 * 21.9106 + 0.01 * -0.0120
-----------------
Finished episode: 228 Reward: 1555.5995 total_loss = 12.4358 = -0.0879 + 0.5 * 25.0477 + 0.01 * -0.0128
-----------------
Finished episode: 229 Reward: 1532.5601 total_loss = 8.8909 = -0.0135 + 0.5 * 17.8092 + 0.01 * -0.0113
-----------------
Finished episode: 230 Reward: 1135.2388 total_loss = 4.2547 = -0.0714 + 0.5 * 8.6525 + 0.01 * -0.0123
-----------------
Finished episode: 231 Reward: 1142.6907 total_loss = 5.4640 = -0.0364 + 0.5 * 11.0009 + 0.01 * -0.0128
-----------------
Finished episode: 232 Reward: 12

Finished episode: 293 Reward: 1408.3502 total_loss = 2.6857 = -0.0950 + 0.5 * 5.5617 + 0.01 * -0.0176
-----------------
Finished episode: 294 Reward: 1589.2632 total_loss = 5.7827 = -0.0310 + 0.5 * 11.6277 + 0.01 * -0.0182
-----------------
Finished episode: 295 Reward: 1878.6718 total_loss = 6.3162 = -0.0479 + 0.5 * 12.7285 + 0.01 * -0.0188
-----------------
Finished episode: 296 Reward: 2021.9017 total_loss = 3.4493 = -0.0097 + 0.5 * 6.9183 + 0.01 * -0.0160
-----------------
Finished episode: 297 Reward: 1557.4347 total_loss = 6.8723 = 0.0225 + 0.5 * 13.6999 + 0.01 * -0.0171
-----------------
Finished episode: 298 Reward: 1946.7198 total_loss = 6.6893 = 0.0026 + 0.5 * 13.3737 + 0.01 * -0.0174
-----------------
Finished episode: 299 Reward: 1619.2375 total_loss = 6.5825 = 0.1182 + 0.5 * 12.9288 + 0.01 * -0.0187
-----------------
Finished episode: 300 Reward: 1600.8424 total_loss = 7.1050 = 0.0169 + 0.5 * 14.1765 + 0.01 * -0.0187
-----------------
Finished episode: 301 Reward: 1980.061

Finished episode: 362 Reward: 1740.3626 total_loss = 1.4423 = -0.0385 + 0.5 * 2.9620 + 0.01 * -0.0219
-----------------
Finished episode: 363 Reward: 2710.0904 total_loss = 4.5834 = -0.0810 + 0.5 * 9.3295 + 0.01 * -0.0242
-----------------
Finished episode: 364 Reward: 1946.7568 total_loss = 8.0152 = 0.0494 + 0.5 * 15.9320 + 0.01 * -0.0215
-----------------
Finished episode: 365 Reward: 1676.4080 total_loss = 5.4228 = -0.0156 + 0.5 * 10.8771 + 0.01 * -0.0183
-----------------
Finished episode: 366 Reward: 1982.6863 total_loss = 5.9288 = -0.0093 + 0.5 * 11.8766 + 0.01 * -0.0209
-----------------
Finished episode: 367 Reward: 1632.0081 total_loss = 3.2387 = -0.0304 + 0.5 * 6.5386 + 0.01 * -0.0193
-----------------
Finished episode: 368 Reward: 1758.2213 total_loss = 1.5418 = 0.0228 + 0.5 * 3.0385 + 0.01 * -0.0225
-----------------
Finished episode: 369 Reward: 1754.6077 total_loss = 2.5078 = 0.0240 + 0.5 * 4.9680 + 0.01 * -0.0197
-----------------
Finished episode: 370 Reward: 2327.5946 

Finished episode: 430 Reward: 2209.4015 total_loss = 2.8503 = 0.0512 + 0.5 * 5.5987 + 0.01 * -0.0202
-----------------
Finished episode: 431 Reward: 1897.9096 total_loss = 8.3590 = -0.0580 + 0.5 * 16.8344 + 0.01 * -0.0238
-----------------
Finished episode: 432 Reward: 2274.1573 total_loss = 8.4839 = -0.1251 + 0.5 * 17.2185 + 0.01 * -0.0209
-----------------
Finished episode: 433 Reward: 1298.6467 total_loss = 5.1501 = -0.0083 + 0.5 * 10.3173 + 0.01 * -0.0229
-----------------
Finished episode: 434 Reward: 2411.9380 total_loss = 4.2542 = -0.0111 + 0.5 * 8.5310 + 0.01 * -0.0207
-----------------
Finished episode: 435 Reward: 2622.3044 total_loss = 9.6669 = -0.0518 + 0.5 * 19.4378 + 0.01 * -0.0225
-----------------
Finished episode: 436 Reward: 2444.5195 total_loss = 6.8780 = -0.0461 + 0.5 * 13.8486 + 0.01 * -0.0199
-----------------
Finished episode: 437 Reward: 2146.9876 total_loss = 8.2729 = -0.0481 + 0.5 * 16.6424 + 0.01 * -0.0204
-----------------
Finished episode: 438 Reward: 2030.

Finished episode: 498 Reward: 2122.2827 total_loss = 4.8707 = 0.0264 + 0.5 * 9.6891 + 0.01 * -0.0272
-----------------
Finished episode: 499 Reward: 2652.8321 total_loss = 6.9512 = -0.0094 + 0.5 * 13.9215 + 0.01 * -0.0240
-----------------
Finished episode: 500 Reward: 2345.9158 total_loss = 10.0035 = -0.0336 + 0.5 * 20.0746 + 0.01 * -0.0275
-----------------
Finished episode: 501 Reward: 2374.3481 total_loss = 10.7804 = -0.0170 + 0.5 * 21.5955 + 0.01 * -0.0267
-----------------
Finished episode: 502 Reward: 2611.4782 total_loss = 4.6562 = -0.0866 + 0.5 * 9.4861 + 0.01 * -0.0244
-----------------
Finished episode: 503 Reward: 2360.6324 total_loss = 10.0933 = -0.0134 + 0.5 * 20.2140 + 0.01 * -0.0249
-----------------
Finished episode: 504 Reward: 2181.1441 total_loss = 9.9136 = -0.0024 + 0.5 * 19.8324 + 0.01 * -0.0251
-----------------
Finished episode: 505 Reward: 2616.5981 total_loss = 10.4126 = -0.0255 + 0.5 * 20.8767 + 0.01 * -0.0254
-----------------
Finished episode: 506 Reward: 2

Finished episode: 566 Reward: 2168.1620 total_loss = 2.1575 = -0.0739 + 0.5 * 4.4633 + 0.01 * -0.0250
-----------------
Finished episode: 567 Reward: 2375.2603 total_loss = 4.7865 = 0.0147 + 0.5 * 9.5441 + 0.01 * -0.0228
-----------------
Finished episode: 568 Reward: 2799.8341 total_loss = 8.9859 = -0.0147 + 0.5 * 18.0017 + 0.01 * -0.0261
-----------------
Finished episode: 569 Reward: 2371.0737 total_loss = 10.8707 = 0.0569 + 0.5 * 21.6281 + 0.01 * -0.0238
-----------------
Finished episode: 570 Reward: 2568.8695 total_loss = 8.8368 = -0.0131 + 0.5 * 17.7001 + 0.01 * -0.0234
-----------------
Finished episode: 571 Reward: 2921.3379 total_loss = 9.1083 = -0.0663 + 0.5 * 18.3497 + 0.01 * -0.0277
-----------------
Finished episode: 572 Reward: 2771.2343 total_loss = 3.5795 = -0.0178 + 0.5 * 7.1951 + 0.01 * -0.0277
-----------------
Finished episode: 573 Reward: 2311.2158 total_loss = 5.3440 = -0.0567 + 0.5 * 10.8019 + 0.01 * -0.0267
-----------------
Finished episode: 574 Reward: 2706.5

Finished episode: 634 Reward: 2535.2816 total_loss = 10.1968 = -0.1355 + 0.5 * 20.6651 + 0.01 * -0.0261
-----------------
Finished episode: 635 Reward: 2631.0651 total_loss = 10.1992 = 0.0009 + 0.5 * 20.3972 + 0.01 * -0.0300
-----------------
Finished episode: 636 Reward: 1695.3421 total_loss = 5.5512 = -0.0665 + 0.5 * 11.2360 + 0.01 * -0.0264
-----------------
Finished episode: 637 Reward: 2276.4966 total_loss = 9.0180 = 0.0771 + 0.5 * 17.8824 + 0.01 * -0.0274
-----------------
Finished episode: 638 Reward: 2364.6277 total_loss = 9.8824 = 0.0773 + 0.5 * 19.6106 + 0.01 * -0.0279
-----------------
Finished episode: 639 Reward: 2654.2742 total_loss = 9.0573 = -0.0367 + 0.5 * 18.1885 + 0.01 * -0.0251
-----------------
Finished episode: 640 Reward: 2758.8383 total_loss = 9.7031 = -0.0069 + 0.5 * 19.4204 + 0.01 * -0.0270
-----------------
Finished episode: 641 Reward: 2654.5688 total_loss = 10.8353 = -0.0639 + 0.5 * 21.7990 + 0.01 * -0.0268
-----------------
Finished episode: 642 Reward: 24

Finished episode: 702 Reward: 2300.3548 total_loss = 4.9676 = 0.1126 + 0.5 * 9.7104 + 0.01 * -0.0241
-----------------
Finished episode: 703 Reward: 2878.5903 total_loss = 9.8370 = -0.0026 + 0.5 * 19.6796 + 0.01 * -0.0248
-----------------
Finished episode: 704 Reward: 3037.1889 total_loss = 9.7172 = -0.0637 + 0.5 * 19.5624 + 0.01 * -0.0250
-----------------
Finished episode: 705 Reward: 2353.5500 total_loss = 7.6679 = 0.0489 + 0.5 * 15.2386 + 0.01 * -0.0222
-----------------
Finished episode: 706 Reward: 2179.2358 total_loss = 2.6648 = -0.0885 + 0.5 * 5.5069 + 0.01 * -0.0232
-----------------
Finished episode: 707 Reward: 1479.9149 total_loss = 0.6416 = -0.0572 + 0.5 * 1.3981 + 0.01 * -0.0243
-----------------
Finished episode: 708 Reward: 2897.8184 total_loss = 7.6940 = -0.1681 + 0.5 * 15.7246 + 0.01 * -0.0241
-----------------
Finished episode: 709 Reward: 2596.1680 total_loss = 8.9757 = 0.0053 + 0.5 * 17.9414 + 0.01 * -0.0236
-----------------
Finished episode: 710 Reward: 2178.629

Finished episode: 770 Reward: 2647.6681 total_loss = 5.2550 = -0.0007 + 0.5 * 10.5118 + 0.01 * -0.0231
-----------------
Finished episode: 771 Reward: 2769.5257 total_loss = 4.8394 = -0.0520 + 0.5 * 9.7834 + 0.01 * -0.0212
-----------------
Finished episode: 772 Reward: 2958.2688 total_loss = 9.5226 = -0.0683 + 0.5 * 19.1822 + 0.01 * -0.0203
-----------------
Finished episode: 773 Reward: 2435.6795 total_loss = 9.1655 = 0.0533 + 0.5 * 18.2247 + 0.01 * -0.0205
-----------------
Finished episode: 774 Reward: 2378.9564 total_loss = 7.2967 = 0.0659 + 0.5 * 14.4621 + 0.01 * -0.0240
-----------------
Finished episode: 775 Reward: 2229.9973 total_loss = 5.6571 = -0.1054 + 0.5 * 11.5256 + 0.01 * -0.0205
-----------------
Finished episode: 776 Reward: 2256.4476 total_loss = 7.1771 = 0.0922 + 0.5 * 14.1702 + 0.01 * -0.0221
-----------------
Finished episode: 777 Reward: 2244.7545 total_loss = 6.6415 = -0.0718 + 0.5 * 13.4271 + 0.01 * -0.0233
-----------------
Finished episode: 778 Reward: 2489.7

Finished episode: 838 Reward: 2292.8281 total_loss = 6.5953 = 0.0192 + 0.5 * 13.1526 + 0.01 * -0.0222
-----------------
Finished episode: 839 Reward: 2950.7803 total_loss = 9.9149 = -0.0480 + 0.5 * 19.9262 + 0.01 * -0.0193
-----------------
Finished episode: 840 Reward: 2218.8450 total_loss = 1.5519 = -0.0651 + 0.5 * 3.2344 + 0.01 * -0.0205
-----------------
Finished episode: 841 Reward: 2172.1762 total_loss = 6.6212 = 0.0143 + 0.5 * 13.2142 + 0.01 * -0.0233
-----------------
Finished episode: 842 Reward: 2431.0351 total_loss = 7.7876 = 0.0179 + 0.5 * 15.5397 + 0.01 * -0.0207
-----------------
Finished episode: 843 Reward: 2046.4901 total_loss = 7.3839 = -0.0101 + 0.5 * 14.7883 + 0.01 * -0.0221
-----------------
Finished episode: 844 Reward: 3017.4276 total_loss = 10.4686 = 0.0012 + 0.5 * 20.9352 + 0.01 * -0.0211
-----------------
Finished episode: 845 Reward: 2878.2396 total_loss = 3.2689 = 0.0134 + 0.5 * 6.5114 + 0.01 * -0.0211
-----------------
Finished episode: 846 Reward: 1965.475

Finished episode: 907 Reward: 2649.7247 total_loss = 6.2330 = -0.0641 + 0.5 * 12.5946 + 0.01 * -0.0210
-----------------
Finished episode: 908 Reward: 2356.1406 total_loss = 12.8176 = 0.0239 + 0.5 * 25.5879 + 0.01 * -0.0203
-----------------
Finished episode: 909 Reward: 2400.6137 total_loss = 9.7496 = 0.0159 + 0.5 * 19.4677 + 0.01 * -0.0207
-----------------
Finished episode: 910 Reward: 2507.7556 total_loss = 10.1904 = -0.0533 + 0.5 * 20.4878 + 0.01 * -0.0185
-----------------
Finished episode: 911 Reward: 3022.5114 total_loss = 11.4645 = 0.0123 + 0.5 * 22.9048 + 0.01 * -0.0204
-----------------
Finished episode: 912 Reward: 2195.5721 total_loss = 8.4704 = 0.0191 + 0.5 * 16.9031 + 0.01 * -0.0223
-----------------
Finished episode: 913 Reward: 2432.3952 total_loss = 9.2208 = 0.0083 + 0.5 * 18.4255 + 0.01 * -0.0215
-----------------
Finished episode: 914 Reward: 1750.3782 total_loss = 4.2602 = 0.0232 + 0.5 * 8.4744 + 0.01 * -0.0177
-----------------
Finished episode: 915 Reward: 3051.4

Finished episode: 975 Reward: 2104.4208 total_loss = 1.4782 = -0.0514 + 0.5 * 3.0597 + 0.01 * -0.0156
-----------------
Finished episode: 976 Reward: 2285.0617 total_loss = 7.7357 = -0.0885 + 0.5 * 15.6487 + 0.01 * -0.0175
-----------------
Finished episode: 977 Reward: 2180.1052 total_loss = 5.8462 = -0.0241 + 0.5 * 11.7409 + 0.01 * -0.0184
-----------------
Finished episode: 978 Reward: 2628.2106 total_loss = 3.7309 = -0.0016 + 0.5 * 7.4652 + 0.01 * -0.0172
-----------------
Finished episode: 979 Reward: 1775.0858 total_loss = 1.0418 = 0.0826 + 0.5 * 1.9189 + 0.01 * -0.0180
-----------------
Finished episode: 980 Reward: 2337.3696 total_loss = 6.4877 = -0.0061 + 0.5 * 12.9879 + 0.01 * -0.0173
-----------------
Finished episode: 981 Reward: 3041.0461 total_loss = 8.9972 = 0.0135 + 0.5 * 17.9677 + 0.01 * -0.0178
-----------------
Finished episode: 982 Reward: 2877.3297 total_loss = 9.3309 = -0.0814 + 0.5 * 18.8251 + 0.01 * -0.0176
-----------------
Finished episode: 983 Reward: 1946.27

Finished episode: 1043 Reward: 2207.0670 total_loss = 1.8486 = 0.0307 + 0.5 * 3.6362 + 0.01 * -0.0178
-----------------
Finished episode: 1044 Reward: 3059.0379 total_loss = 12.3523 = -0.0301 + 0.5 * 24.7652 + 0.01 * -0.0162
-----------------
Finished episode: 1045 Reward: 2351.7696 total_loss = 10.0185 = -0.0353 + 0.5 * 20.1080 + 0.01 * -0.0151
-----------------
Finished episode: 1046 Reward: 2351.6294 total_loss = 5.4055 = -0.0744 + 0.5 * 10.9602 + 0.01 * -0.0169
-----------------
Finished episode: 1047 Reward: 3009.1791 total_loss = 11.2989 = -0.0821 + 0.5 * 22.7624 + 0.01 * -0.0158
-----------------
Finished episode: 1048 Reward: 2979.4103 total_loss = 10.9674 = 0.1093 + 0.5 * 21.7165 + 0.01 * -0.0164
-----------------
Finished episode: 1049 Reward: 2692.7829 total_loss = 6.2581 = 0.0075 + 0.5 * 12.5017 + 0.01 * -0.0165
-----------------
Finished episode: 1050 Reward: 2273.9217 total_loss = 7.2657 = -0.0241 + 0.5 * 14.5798 + 0.01 * -0.0162
-----------------
Finished episode: 1051 R

Finished episode: 1111 Reward: 3128.5538 total_loss = 11.7220 = -0.0986 + 0.5 * 23.6416 + 0.01 * -0.0171
-----------------
Finished episode: 1112 Reward: 2782.1949 total_loss = 8.1741 = -0.0792 + 0.5 * 16.5069 + 0.01 * -0.0170
-----------------
Finished episode: 1113 Reward: 2964.2590 total_loss = 12.0864 = -0.0347 + 0.5 * 24.2426 + 0.01 * -0.0150
-----------------
Finished episode: 1114 Reward: 2304.9638 total_loss = 11.0199 = -0.0104 + 0.5 * 22.0609 + 0.01 * -0.0167
-----------------
Finished episode: 1115 Reward: 3061.1882 total_loss = 9.9435 = -0.0372 + 0.5 * 19.9616 + 0.01 * -0.0187
-----------------
Finished episode: 1116 Reward: 2146.6118 total_loss = 1.9120 = -0.0139 + 0.5 * 3.8520 + 0.01 * -0.0156
-----------------
Finished episode: 1117 Reward: 2854.4455 total_loss = 9.8709 = -0.0018 + 0.5 * 19.7458 + 0.01 * -0.0171
-----------------
Finished episode: 1118 Reward: 2026.9490 total_loss = 1.7855 = -0.0378 + 0.5 * 3.6469 + 0.01 * -0.0171
-----------------
Finished episode: 1119 

Finished episode: 1179 Reward: 2248.1254 total_loss = 2.9502 = -0.0090 + 0.5 * 5.9186 + 0.01 * -0.0132
-----------------
Finished episode: 1180 Reward: 2291.2209 total_loss = 7.1591 = -0.0169 + 0.5 * 14.3523 + 0.01 * -0.0143
-----------------
Finished episode: 1181 Reward: 1765.1395 total_loss = 2.6745 = -0.0289 + 0.5 * 5.4071 + 0.01 * -0.0132
-----------------
Finished episode: 1182 Reward: 1950.0353 total_loss = 5.2196 = -0.0227 + 0.5 * 10.4848 + 0.01 * -0.0130
-----------------
Finished episode: 1183 Reward: 3019.6250 total_loss = 5.6619 = -0.0174 + 0.5 * 11.3590 + 0.01 * -0.0154
-----------------
Finished episode: 1184 Reward: 2734.8411 total_loss = 7.8731 = -0.0205 + 0.5 * 15.7874 + 0.01 * -0.0149
-----------------
Finished episode: 1185 Reward: 2030.4379 total_loss = 5.9731 = -0.0944 + 0.5 * 12.1351 + 0.01 * -0.0136
-----------------
Finished episode: 1186 Reward: 2706.6217 total_loss = 3.8131 = -0.0927 + 0.5 * 7.8120 + 0.01 * -0.0137
-----------------
Finished episode: 1187 Rewa

Finished episode: 1247 Reward: 2199.8636 total_loss = 8.6687 = -0.0549 + 0.5 * 17.4474 + 0.01 * -0.0146
-----------------
Finished episode: 1248 Reward: 1432.6960 total_loss = 1.1074 = -0.0051 + 0.5 * 2.2253 + 0.01 * -0.0130
-----------------
Finished episode: 1249 Reward: 1628.3188 total_loss = 8.2155 = 0.0212 + 0.5 * 16.3887 + 0.01 * -0.0127
-----------------
Finished episode: 1250 Reward: 2599.7396 total_loss = 5.9390 = -0.0882 + 0.5 * 12.0546 + 0.01 * -0.0122
-----------------
Finished episode: 1251 Reward: 1806.6468 total_loss = 1.6859 = 0.0050 + 0.5 * 3.3621 + 0.01 * -0.0128
-----------------
Finished episode: 1252 Reward: 2316.0740 total_loss = 9.3695 = 0.0615 + 0.5 * 18.6162 + 0.01 * -0.0122
-----------------
Finished episode: 1253 Reward: 2922.2531 total_loss = 5.3333 = -0.0247 + 0.5 * 10.7161 + 0.01 * -0.0128
-----------------
Finished episode: 1254 Reward: 2472.0395 total_loss = 10.9484 = -0.1458 + 0.5 * 22.1888 + 0.01 * -0.0128
-----------------
Finished episode: 1255 Rewar

Finished episode: 1315 Reward: 1706.1521 total_loss = 8.7210 = 0.0318 + 0.5 * 17.3787 + 0.01 * -0.0120
-----------------
Finished episode: 1316 Reward: 2551.8377 total_loss = 8.8367 = 0.0121 + 0.5 * 17.6494 + 0.01 * -0.0120
-----------------
Finished episode: 1317 Reward: 2630.3500 total_loss = 10.3643 = 0.0575 + 0.5 * 20.6138 + 0.01 * -0.0126
-----------------
Finished episode: 1318 Reward: 2016.3278 total_loss = 2.8464 = 0.0540 + 0.5 * 5.5850 + 0.01 * -0.0133
-----------------
Finished episode: 1319 Reward: 2286.6770 total_loss = 10.1195 = 0.0995 + 0.5 * 20.0403 + 0.01 * -0.0140
-----------------
Finished episode: 1320 Reward: 2878.8911 total_loss = 8.0149 = -0.1110 + 0.5 * 16.2521 + 0.01 * -0.0116
-----------------
Finished episode: 1321 Reward: 2656.2321 total_loss = 8.3355 = 0.1966 + 0.5 * 16.2780 + 0.01 * -0.0118
-----------------
Finished episode: 1322 Reward: 2328.6616 total_loss = 3.4215 = 0.0042 + 0.5 * 6.8350 + 0.01 * -0.0122
-----------------
Finished episode: 1323 Reward: 

Finished episode: 1383 Reward: 2607.8482 total_loss = 10.3960 = -0.0552 + 0.5 * 20.9025 + 0.01 * -0.0119
-----------------
Finished episode: 1384 Reward: 2540.6617 total_loss = 7.1630 = 0.0129 + 0.5 * 14.3004 + 0.01 * -0.0110
-----------------
Finished episode: 1385 Reward: 1654.6862 total_loss = 1.7486 = 0.0051 + 0.5 * 3.4872 + 0.01 * -0.0102
-----------------
Finished episode: 1386 Reward: 2614.7978 total_loss = 12.8388 = 0.1543 + 0.5 * 25.3693 + 0.01 * -0.0112
-----------------
Finished episode: 1387 Reward: 2965.9943 total_loss = 6.2236 = -0.0161 + 0.5 * 12.4797 + 0.01 * -0.0120
-----------------
Finished episode: 1388 Reward: 2633.5072 total_loss = 10.1251 = 0.0042 + 0.5 * 20.2421 + 0.01 * -0.0109
-----------------
Finished episode: 1389 Reward: 2697.8453 total_loss = 6.7015 = 0.0455 + 0.5 * 13.3121 + 0.01 * -0.0120
-----------------
Finished episode: 1390 Reward: 3123.9357 total_loss = 7.8545 = 0.0142 + 0.5 * 15.6809 + 0.01 * -0.0121
-----------------
Finished episode: 1391 Rewar

Finished episode: 1451 Reward: 2756.3553 total_loss = 5.5121 = -0.1799 + 0.5 * 11.3842 + 0.01 * -0.0104
-----------------
Finished episode: 1452 Reward: 3051.4502 total_loss = 13.0727 = 0.0691 + 0.5 * 26.0073 + 0.01 * -0.0102
-----------------
Finished episode: 1453 Reward: 2265.4130 total_loss = 10.8199 = 0.0249 + 0.5 * 21.5902 + 0.01 * -0.0113
-----------------
Finished episode: 1454 Reward: 2998.6793 total_loss = 7.5315 = -0.0638 + 0.5 * 15.1910 + 0.01 * -0.0102
-----------------
Finished episode: 1455 Reward: 2714.1929 total_loss = 8.7114 = -0.0714 + 0.5 * 17.5659 + 0.01 * -0.0092
-----------------
Finished episode: 1456 Reward: 2765.0197 total_loss = 12.6095 = 0.0618 + 0.5 * 25.0956 + 0.01 * -0.0112
-----------------
Finished episode: 1457 Reward: 3085.8997 total_loss = 12.4424 = -0.0291 + 0.5 * 24.9430 + 0.01 * -0.0096
-----------------
Finished episode: 1458 Reward: 2820.0779 total_loss = 9.6394 = -0.0392 + 0.5 * 19.3575 + 0.01 * -0.0109
-----------------
Finished episode: 1459 

Finished episode: 1519 Reward: 3203.6662 total_loss = 10.6641 = 0.0158 + 0.5 * 21.2968 + 0.01 * -0.0095
-----------------
Finished episode: 1520 Reward: 2765.3625 total_loss = 12.6737 = 0.0075 + 0.5 * 25.3325 + 0.01 * -0.0096
-----------------
Finished episode: 1521 Reward: 2473.2901 total_loss = 7.5651 = 0.0581 + 0.5 * 15.0143 + 0.01 * -0.0096
-----------------
Finished episode: 1522 Reward: 3202.1518 total_loss = 11.2319 = 0.0039 + 0.5 * 22.4561 + 0.01 * -0.0089
-----------------
Finished episode: 1523 Reward: 2812.8189 total_loss = 11.7375 = -0.0323 + 0.5 * 23.5397 + 0.01 * -0.0095
-----------------
Finished episode: 1524 Reward: 2565.3885 total_loss = 2.2035 = 0.0778 + 0.5 * 4.2516 + 0.01 * -0.0101
-----------------
Finished episode: 1525 Reward: 3228.8107 total_loss = 14.3478 = 0.0516 + 0.5 * 28.5925 + 0.01 * -0.0097
-----------------
Finished episode: 1526 Reward: 2379.8730 total_loss = 9.7150 = -0.0330 + 0.5 * 19.4961 + 0.01 * -0.0089
-----------------
Finished episode: 1527 Rew

Finished episode: 1587 Reward: 2402.1873 total_loss = 7.7544 = 0.0118 + 0.5 * 15.4855 + 0.01 * -0.0099
-----------------
Finished episode: 1588 Reward: 1884.0997 total_loss = 2.0250 = -0.0442 + 0.5 * 4.1384 + 0.01 * -0.0089
-----------------
Finished episode: 1589 Reward: 3120.1969 total_loss = 12.5695 = -0.0143 + 0.5 * 25.1677 + 0.01 * -0.0105
-----------------
Finished episode: 1590 Reward: 2948.5799 total_loss = 9.5917 = 0.0094 + 0.5 * 19.1647 + 0.01 * -0.0095
-----------------
Finished episode: 1591 Reward: 2708.2263 total_loss = 8.8099 = -0.0058 + 0.5 * 17.6317 + 0.01 * -0.0106
-----------------
Finished episode: 1592 Reward: 2870.6473 total_loss = 5.1642 = -0.0252 + 0.5 * 10.3789 + 0.01 * -0.0093
-----------------
Finished episode: 1593 Reward: 1798.1025 total_loss = 5.6079 = 0.0328 + 0.5 * 11.1505 + 0.01 * -0.0104
-----------------
Finished episode: 1594 Reward: 2771.2078 total_loss = 9.8186 = 0.0679 + 0.5 * 19.5014 + 0.01 * -0.0082
-----------------
Finished episode: 1595 Rewar

Finished episode: 1655 Reward: 2742.4162 total_loss = 5.7833 = 0.0315 + 0.5 * 11.5039 + 0.01 * -0.0087
-----------------
Finished episode: 1656 Reward: 1656.7753 total_loss = 2.2425 = -0.0899 + 0.5 * 4.6650 + 0.01 * -0.0090
-----------------
Finished episode: 1657 Reward: 2431.6273 total_loss = 0.9810 = 0.0146 + 0.5 * 1.9329 + 0.01 * -0.0101
-----------------
Finished episode: 1658 Reward: 2554.4626 total_loss = 10.1654 = 0.1108 + 0.5 * 20.1093 + 0.01 * -0.0104
-----------------
Finished episode: 1659 Reward: 1966.8366 total_loss = 3.5516 = 0.0714 + 0.5 * 6.9606 + 0.01 * -0.0094
-----------------
Finished episode: 1660 Reward: 1963.4689 total_loss = 6.8745 = -0.0834 + 0.5 * 13.9161 + 0.01 * -0.0088
-----------------
Finished episode: 1661 Reward: 3205.0499 total_loss = 12.3028 = -0.0502 + 0.5 * 24.7062 + 0.01 * -0.0090
-----------------
Finished episode: 1662 Reward: 2591.7872 total_loss = 10.8371 = -0.0265 + 0.5 * 21.7273 + 0.01 * -0.0088
-----------------
Finished episode: 1663 Rewar

Finished episode: 1723 Reward: 2227.9082 total_loss = 5.3367 = 0.0189 + 0.5 * 10.6359 + 0.01 * -0.0085
-----------------
Finished episode: 1724 Reward: 2320.5832 total_loss = 6.8442 = -0.0270 + 0.5 * 13.7426 + 0.01 * -0.0082
-----------------
Finished episode: 1725 Reward: 2921.7175 total_loss = 6.5407 = 0.0026 + 0.5 * 13.0764 + 0.01 * -0.0087
-----------------
Finished episode: 1726 Reward: 2309.9221 total_loss = 7.0071 = 0.0034 + 0.5 * 14.0077 + 0.01 * -0.0081
-----------------
Finished episode: 1727 Reward: 3207.4520 total_loss = 12.5331 = -0.0345 + 0.5 * 25.1352 + 0.01 * -0.0078
-----------------
Finished episode: 1728 Reward: 3188.2799 total_loss = 8.7349 = 0.0709 + 0.5 * 17.3281 + 0.01 * -0.0089
-----------------
Finished episode: 1729 Reward: 3031.2214 total_loss = 10.2328 = 0.0239 + 0.5 * 20.4179 + 0.01 * -0.0078
-----------------
Finished episode: 1730 Reward: 2302.9175 total_loss = 9.3927 = -0.0716 + 0.5 * 18.9288 + 0.01 * -0.0083
-----------------
Finished episode: 1731 Rewa

Finished episode: 1791 Reward: 2071.2459 total_loss = 5.0438 = -0.0539 + 0.5 * 10.1956 + 0.01 * -0.0077
-----------------
Finished episode: 1792 Reward: 3096.8392 total_loss = 11.6472 = -0.0941 + 0.5 * 23.4829 + 0.01 * -0.0080
-----------------
Finished episode: 1793 Reward: 2688.2072 total_loss = 7.6564 = -0.0680 + 0.5 * 15.4490 + 0.01 * -0.0076
-----------------
Finished episode: 1794 Reward: 3242.0047 total_loss = 10.9880 = 0.0200 + 0.5 * 21.9362 + 0.01 * -0.0084
-----------------
Finished episode: 1795 Reward: 2326.2352 total_loss = 6.8451 = 0.0314 + 0.5 * 13.6276 + 0.01 * -0.0073
-----------------
Finished episode: 1796 Reward: 2993.9632 total_loss = 8.9169 = -0.0451 + 0.5 * 17.9242 + 0.01 * -0.0087
-----------------
Finished episode: 1797 Reward: 2286.8240 total_loss = 8.2744 = -0.0745 + 0.5 * 16.6980 + 0.01 * -0.0089
-----------------
Finished episode: 1798 Reward: 2329.9405 total_loss = 6.7255 = 0.0283 + 0.5 * 13.3944 + 0.01 * -0.0083
-----------------
Finished episode: 1799 Re

Finished episode: 1859 Reward: 2905.8084 total_loss = 12.2591 = 0.2227 + 0.5 * 24.0729 + 0.01 * -0.0069
-----------------
Finished episode: 1860 Reward: 3118.8765 total_loss = 10.7138 = -0.0252 + 0.5 * 21.4783 + 0.01 * -0.0073
-----------------
Finished episode: 1861 Reward: 2567.3336 total_loss = 6.3452 = -0.0934 + 0.5 * 12.8773 + 0.01 * -0.0081
-----------------
Finished episode: 1862 Reward: 2355.5073 total_loss = 5.1802 = -0.1298 + 0.5 * 10.6203 + 0.01 * -0.0087
-----------------
Finished episode: 1863 Reward: 2561.1795 total_loss = 6.0119 = -0.0309 + 0.5 * 12.0858 + 0.01 * -0.0083
-----------------
Finished episode: 1864 Reward: 2126.7529 total_loss = 7.4041 = -0.0209 + 0.5 * 14.8500 + 0.01 * -0.0079
-----------------
Finished episode: 1865 Reward: 2283.0389 total_loss = 11.6431 = -0.0110 + 0.5 * 23.3084 + 0.01 * -0.0071
-----------------
Finished episode: 1866 Reward: 1974.8616 total_loss = 4.0066 = -0.0306 + 0.5 * 8.0746 + 0.01 * -0.0076
-----------------
Finished episode: 1867 

Finished episode: 1927 Reward: 2489.5469 total_loss = 4.7568 = -0.0421 + 0.5 * 9.5980 + 0.01 * -0.0072
-----------------
Finished episode: 1928 Reward: 2314.5764 total_loss = 8.8011 = -0.0769 + 0.5 * 17.7562 + 0.01 * -0.0075
-----------------
Finished episode: 1929 Reward: 2936.0642 total_loss = 7.9856 = 0.1047 + 0.5 * 15.7620 + 0.01 * -0.0072
-----------------
Finished episode: 1930 Reward: 2034.4081 total_loss = 5.8034 = 0.0043 + 0.5 * 11.5983 + 0.01 * -0.0070
-----------------
Finished episode: 1931 Reward: 3232.1478 total_loss = 12.8714 = 0.0110 + 0.5 * 25.7209 + 0.01 * -0.0073
-----------------
Finished episode: 1932 Reward: 2378.9702 total_loss = 3.6278 = -0.0273 + 0.5 * 7.3104 + 0.01 * -0.0076
-----------------
Finished episode: 1933 Reward: 2719.3427 total_loss = 9.2588 = -0.0224 + 0.5 * 18.5626 + 0.01 * -0.0076
-----------------
Finished episode: 1934 Reward: 1754.6274 total_loss = 9.0196 = 0.0640 + 0.5 * 17.9114 + 0.01 * -0.0078
-----------------
Finished episode: 1935 Reward

Finished episode: 1995 Reward: 3142.0890 total_loss = 4.9791 = 0.0356 + 0.5 * 9.8871 + 0.01 * -0.0064
-----------------
Finished episode: 1996 Reward: 2721.2061 total_loss = 4.7012 = -0.1000 + 0.5 * 9.6025 + 0.01 * -0.0077
-----------------
Finished episode: 1997 Reward: 2746.1304 total_loss = 8.9110 = -0.0256 + 0.5 * 17.8735 + 0.01 * -0.0077
-----------------
Finished episode: 1998 Reward: 1284.5771 total_loss = 3.0457 = 0.0693 + 0.5 * 5.9530 + 0.01 * -0.0075
-----------------
Finished episode: 1999 Reward: 3037.4073 total_loss = 10.1974 = -0.0547 + 0.5 * 20.5043 + 0.01 * -0.0077
-----------------
Finished episode: 0 Reward: -1.5993 total_loss = 0.2557 = -0.0727 + 0.5 * 0.6570 + 0.01 * -0.0036
-----------------
Finished episode: 1 Reward: 0.8483 total_loss = 0.2671 = -0.1068 + 0.5 * 0.7479 + 0.01 * -0.0038
-----------------
Finished episode: 2 Reward: 3.6594 total_loss = 0.5576 = -0.0014 + 0.5 * 1.1182 + 0.01 * -0.0035
-----------------
Finished episode: 3 Reward: 8.2185 total_loss = 

Finished episode: 65 Reward: 304.4229 total_loss = 4.0313 = -0.0791 + 0.5 * 8.2208 + 0.01 * -0.0059
-----------------
Finished episode: 66 Reward: 325.9470 total_loss = 3.6181 = -0.0613 + 0.5 * 7.3589 + 0.01 * -0.0059
-----------------
Finished episode: 67 Reward: 311.8185 total_loss = 3.4400 = 0.0067 + 0.5 * 6.8667 + 0.01 * -0.0053
-----------------
Finished episode: 68 Reward: 315.1960 total_loss = 3.3955 = -0.0020 + 0.5 * 6.7952 + 0.01 * -0.0058
-----------------
Finished episode: 69 Reward: 336.1661 total_loss = 3.3394 = 0.0023 + 0.5 * 6.6745 + 0.01 * -0.0061
-----------------
Finished episode: 70 Reward: 318.2499 total_loss = 3.2659 = -0.0141 + 0.5 * 6.5602 + 0.01 * -0.0058
-----------------
Finished episode: 71 Reward: 309.4053 total_loss = 3.1784 = -0.0590 + 0.5 * 6.4749 + 0.01 * -0.0059
-----------------
Finished episode: 72 Reward: 328.5061 total_loss = 2.9480 = -0.0555 + 0.5 * 6.0071 + 0.01 * -0.0059
-----------------
Finished episode: 73 Reward: 299.3547 total_loss = 2.6731 

Finished episode: 135 Reward: 425.5809 total_loss = 2.5404 = -0.0235 + 0.5 * 5.1278 + 0.01 * -0.0070
-----------------
Finished episode: 136 Reward: 464.8594 total_loss = 1.9513 = -0.0093 + 0.5 * 3.9212 + 0.01 * -0.0066
-----------------
Finished episode: 137 Reward: 470.8923 total_loss = 2.0549 = -0.0180 + 0.5 * 4.1459 + 0.01 * -0.0065
-----------------
Finished episode: 138 Reward: 469.6208 total_loss = 2.9897 = -0.0314 + 0.5 * 6.0423 + 0.01 * -0.0056
-----------------
Finished episode: 139 Reward: 446.5845 total_loss = 2.6962 = -0.1400 + 0.5 * 5.6725 + 0.01 * -0.0056
-----------------
Finished episode: 140 Reward: 429.1595 total_loss = 1.9212 = -0.0368 + 0.5 * 3.9161 + 0.01 * -0.0060
-----------------
Finished episode: 141 Reward: 401.9578 total_loss = 2.6926 = -0.0542 + 0.5 * 5.4937 + 0.01 * -0.0063
-----------------
Finished episode: 142 Reward: 516.4504 total_loss = 4.3868 = -0.0483 + 0.5 * 8.8704 + 0.01 * -0.0056
-----------------
Finished episode: 143 Reward: 486.2176 total_los

Finished episode: 204 Reward: 600.7400 total_loss = 4.8183 = -0.0760 + 0.5 * 9.7887 + 0.01 * -0.0085
-----------------
Finished episode: 205 Reward: 446.9480 total_loss = 4.4220 = -0.0927 + 0.5 * 9.0295 + 0.01 * -0.0095
-----------------
Finished episode: 206 Reward: 707.2024 total_loss = 4.6588 = -0.0887 + 0.5 * 9.4952 + 0.01 * -0.0088
-----------------
Finished episode: 207 Reward: 657.9806 total_loss = 8.6361 = 0.0021 + 0.5 * 17.2682 + 0.01 * -0.0089
-----------------
Finished episode: 208 Reward: 1017.4628 total_loss = 7.3889 = -0.1523 + 0.5 * 15.0825 + 0.01 * -0.0090
-----------------
Finished episode: 209 Reward: 658.6030 total_loss = 5.0380 = -0.0520 + 0.5 * 10.1801 + 0.01 * -0.0084
-----------------
Finished episode: 210 Reward: 615.5046 total_loss = 6.0693 = -0.0806 + 0.5 * 12.3000 + 0.01 * -0.0080
-----------------
Finished episode: 211 Reward: 993.5042 total_loss = 7.0075 = -0.0826 + 0.5 * 14.1803 + 0.01 * -0.0097
-----------------
Finished episode: 212 Reward: 748.0214 tota

Finished episode: 273 Reward: 878.0352 total_loss = 4.1265 = 0.0087 + 0.5 * 8.2357 + 0.01 * -0.0107
-----------------
Finished episode: 274 Reward: 1639.4185 total_loss = 8.3206 = -0.2300 + 0.5 * 17.1014 + 0.01 * -0.0095
-----------------
Finished episode: 275 Reward: 997.9571 total_loss = 6.5703 = -0.0584 + 0.5 * 13.2576 + 0.01 * -0.0105
-----------------
Finished episode: 276 Reward: 967.5107 total_loss = 5.9555 = -0.0242 + 0.5 * 11.9597 + 0.01 * -0.0100
-----------------
Finished episode: 277 Reward: 1064.6804 total_loss = 6.6269 = 0.0062 + 0.5 * 13.2416 + 0.01 * -0.0110
-----------------
Finished episode: 278 Reward: 1202.7728 total_loss = 5.3197 = -0.0612 + 0.5 * 10.7620 + 0.01 * -0.0110
-----------------
Finished episode: 279 Reward: 1183.3077 total_loss = 4.9238 = -0.0275 + 0.5 * 9.9027 + 0.01 * -0.0123
-----------------
Finished episode: 280 Reward: 1186.8406 total_loss = 4.5430 = -0.0130 + 0.5 * 9.1122 + 0.01 * -0.0123
-----------------
Finished episode: 281 Reward: 1038.4422 

Finished episode: 342 Reward: 980.9981 total_loss = 6.9951 = -0.0271 + 0.5 * 14.0446 + 0.01 * -0.0105
-----------------
Finished episode: 343 Reward: 1988.4376 total_loss = 8.5804 = -0.0405 + 0.5 * 17.2421 + 0.01 * -0.0127
-----------------
Finished episode: 344 Reward: 1516.0420 total_loss = 5.1498 = -0.0785 + 0.5 * 10.4568 + 0.01 * -0.0124
-----------------
Finished episode: 345 Reward: 1654.9597 total_loss = 2.3157 = -0.0670 + 0.5 * 4.7658 + 0.01 * -0.0147
-----------------
Finished episode: 346 Reward: 2308.2751 total_loss = 8.8661 = 0.0098 + 0.5 * 17.7128 + 0.01 * -0.0130
-----------------
Finished episode: 347 Reward: 1734.0189 total_loss = 4.1761 = -0.0154 + 0.5 * 8.3833 + 0.01 * -0.0135
-----------------
Finished episode: 348 Reward: 2102.9344 total_loss = 7.9694 = 0.0178 + 0.5 * 15.9035 + 0.01 * -0.0137
-----------------
Finished episode: 349 Reward: 997.0203 total_loss = 4.0805 = -0.0977 + 0.5 * 8.3566 + 0.01 * -0.0135
-----------------
Finished episode: 350 Reward: 1551.8774

Finished episode: 411 Reward: 1574.5707 total_loss = 6.9244 = -0.0181 + 0.5 * 13.8853 + 0.01 * -0.0145
-----------------
Finished episode: 412 Reward: 1420.4260 total_loss = 5.4316 = -0.0365 + 0.5 * 10.9365 + 0.01 * -0.0161
-----------------
Finished episode: 413 Reward: 1614.9914 total_loss = 6.5694 = -0.0171 + 0.5 * 13.1732 + 0.01 * -0.0162
-----------------
Finished episode: 414 Reward: 1755.3443 total_loss = 10.4636 = 0.0659 + 0.5 * 20.7957 + 0.01 * -0.0150
-----------------
Finished episode: 415 Reward: 1178.4560 total_loss = 4.1293 = -0.0167 + 0.5 * 8.2924 + 0.01 * -0.0152
-----------------
Finished episode: 416 Reward: 1856.2914 total_loss = 12.1517 = 0.0523 + 0.5 * 24.1991 + 0.01 * -0.0149
-----------------
Finished episode: 417 Reward: 1305.1465 total_loss = 4.6707 = -0.1173 + 0.5 * 9.5763 + 0.01 * -0.0159
-----------------
Finished episode: 418 Reward: 1734.7311 total_loss = 6.0470 = -0.0182 + 0.5 * 12.1307 + 0.01 * -0.0164
-----------------
Finished episode: 419 Reward: 1912

Finished episode: 479 Reward: 1802.6116 total_loss = 4.7762 = 0.0143 + 0.5 * 9.5241 + 0.01 * -0.0174
-----------------
Finished episode: 480 Reward: 1469.3559 total_loss = 7.1243 = 0.0258 + 0.5 * 14.1973 + 0.01 * -0.0186
-----------------
Finished episode: 481 Reward: 1537.0373 total_loss = 7.6296 = 0.0224 + 0.5 * 15.2146 + 0.01 * -0.0182
-----------------
Finished episode: 482 Reward: 2236.9614 total_loss = 9.5836 = 0.0050 + 0.5 * 19.1575 + 0.01 * -0.0179
-----------------
Finished episode: 483 Reward: 2119.1499 total_loss = 9.3340 = -0.0482 + 0.5 * 18.7647 + 0.01 * -0.0167
-----------------
Finished episode: 484 Reward: 1988.4687 total_loss = 7.4324 = -0.0279 + 0.5 * 14.9210 + 0.01 * -0.0199
-----------------
Finished episode: 485 Reward: 2229.6424 total_loss = 10.8313 = -0.0218 + 0.5 * 21.7066 + 0.01 * -0.0175
-----------------
Finished episode: 486 Reward: 2334.7490 total_loss = 7.6654 = -0.0577 + 0.5 * 15.4466 + 0.01 * -0.0169
-----------------
Finished episode: 487 Reward: 1505.3

Finished episode: 547 Reward: 2311.8688 total_loss = 8.1626 = -0.0546 + 0.5 * 16.4347 + 0.01 * -0.0168
-----------------
Finished episode: 548 Reward: 1542.2627 total_loss = 4.2974 = -0.0026 + 0.5 * 8.6003 + 0.01 * -0.0201
-----------------
Finished episode: 549 Reward: 2259.9717 total_loss = 4.5029 = -0.0200 + 0.5 * 9.0462 + 0.01 * -0.0236
-----------------
Finished episode: 550 Reward: 2190.6824 total_loss = 7.6844 = -0.0720 + 0.5 * 15.5133 + 0.01 * -0.0209
-----------------
Finished episode: 551 Reward: 2439.2204 total_loss = 10.4992 = -0.0009 + 0.5 * 21.0007 + 0.01 * -0.0209
-----------------
Finished episode: 552 Reward: 2027.9479 total_loss = 6.0213 = -0.0387 + 0.5 * 12.1205 + 0.01 * -0.0211
-----------------
Finished episode: 553 Reward: 2177.7535 total_loss = 9.2732 = 0.0351 + 0.5 * 18.4767 + 0.01 * -0.0227
-----------------
Finished episode: 554 Reward: 1598.7513 total_loss = 4.6270 = -0.0416 + 0.5 * 9.3377 + 0.01 * -0.0233
-----------------
Finished episode: 555 Reward: 2254.

Finished episode: 615 Reward: 1594.8006 total_loss = 7.1696 = -0.0927 + 0.5 * 14.5251 + 0.01 * -0.0240
-----------------
Finished episode: 616 Reward: 2171.2237 total_loss = 9.5671 = -0.0102 + 0.5 * 19.1550 + 0.01 * -0.0239
-----------------
Finished episode: 617 Reward: 2350.3448 total_loss = 7.4724 = -0.0816 + 0.5 * 15.1084 + 0.01 * -0.0220
-----------------
Finished episode: 618 Reward: 2156.5005 total_loss = 5.4831 = -0.0144 + 0.5 * 10.9955 + 0.01 * -0.0240
-----------------
Finished episode: 619 Reward: 2877.6172 total_loss = 11.6395 = -0.0460 + 0.5 * 23.3715 + 0.01 * -0.0238
-----------------
Finished episode: 620 Reward: 1636.8576 total_loss = 8.7351 = 0.0225 + 0.5 * 17.4255 + 0.01 * -0.0257
-----------------
Finished episode: 621 Reward: 1648.8213 total_loss = 7.1708 = 0.0013 + 0.5 * 14.3395 + 0.01 * -0.0223
-----------------
Finished episode: 622 Reward: 1584.3436 total_loss = 9.3448 = -0.0543 + 0.5 * 18.7985 + 0.01 * -0.0207
-----------------
Finished episode: 623 Reward: 154

Finished episode: 683 Reward: 1368.1845 total_loss = 5.9574 = 0.0825 + 0.5 * 11.7503 + 0.01 * -0.0267
-----------------
Finished episode: 684 Reward: 2680.1430 total_loss = 8.4498 = -0.0402 + 0.5 * 16.9806 + 0.01 * -0.0252
-----------------
Finished episode: 685 Reward: 2826.0416 total_loss = 10.5879 = 0.0019 + 0.5 * 21.1725 + 0.01 * -0.0265
-----------------
Finished episode: 686 Reward: 1401.2462 total_loss = 4.0617 = -0.0275 + 0.5 * 8.1789 + 0.01 * -0.0254
-----------------
Finished episode: 687 Reward: 1751.8566 total_loss = 8.3075 = -0.0482 + 0.5 * 16.7120 + 0.01 * -0.0251
-----------------
Finished episode: 688 Reward: 1397.5835 total_loss = 3.0194 = -0.0071 + 0.5 * 6.0535 + 0.01 * -0.0273
-----------------
Finished episode: 689 Reward: 1938.1856 total_loss = 9.3289 = 0.1133 + 0.5 * 18.4318 + 0.01 * -0.0268
-----------------
Finished episode: 690 Reward: 1777.8974 total_loss = 4.3626 = -0.0701 + 0.5 * 8.8659 + 0.01 * -0.0262
-----------------
Finished episode: 691 Reward: 1735.86

Finished episode: 752 Reward: 2488.2128 total_loss = 8.1142 = 0.0045 + 0.5 * 16.2198 + 0.01 * -0.0261
-----------------
Finished episode: 753 Reward: 2369.4715 total_loss = 7.9054 = -0.0619 + 0.5 * 15.9352 + 0.01 * -0.0248
-----------------
Finished episode: 754 Reward: 1280.7906 total_loss = 6.7168 = -0.0696 + 0.5 * 13.5733 + 0.01 * -0.0228
-----------------
Finished episode: 755 Reward: 1902.6288 total_loss = 7.0006 = -0.0275 + 0.5 * 14.0568 + 0.01 * -0.0282
-----------------
Finished episode: 756 Reward: 1461.2846 total_loss = 6.2613 = 0.0303 + 0.5 * 12.4624 + 0.01 * -0.0228
-----------------
Finished episode: 757 Reward: 1604.5701 total_loss = 4.3254 = -0.1019 + 0.5 * 8.8550 + 0.01 * -0.0259
-----------------
Finished episode: 758 Reward: 2386.1702 total_loss = 10.5083 = 0.0101 + 0.5 * 20.9968 + 0.01 * -0.0247
-----------------
Finished episode: 759 Reward: 2484.6538 total_loss = 4.6771 = -0.0503 + 0.5 * 9.4553 + 0.01 * -0.0253
-----------------
Finished episode: 760 Reward: 2026.7

Finished episode: 820 Reward: 3053.2438 total_loss = 11.5752 = -0.1303 + 0.5 * 23.4113 + 0.01 * -0.0232
-----------------
Finished episode: 821 Reward: 2457.7948 total_loss = 12.2086 = 0.0515 + 0.5 * 24.3147 + 0.01 * -0.0256
-----------------
Finished episode: 822 Reward: 2971.8399 total_loss = 9.7054 = -0.0662 + 0.5 * 19.5437 + 0.01 * -0.0255
-----------------
Finished episode: 823 Reward: 3056.0113 total_loss = 13.2639 = 0.0088 + 0.5 * 26.5107 + 0.01 * -0.0214
-----------------
Finished episode: 824 Reward: 3216.1150 total_loss = 13.7638 = -0.0304 + 0.5 * 27.5889 + 0.01 * -0.0237
-----------------
Finished episode: 825 Reward: 2369.3097 total_loss = 10.7611 = 0.0059 + 0.5 * 21.5110 + 0.01 * -0.0265
-----------------
Finished episode: 826 Reward: 2856.0242 total_loss = 10.0136 = -0.1395 + 0.5 * 20.3068 + 0.01 * -0.0247
-----------------
Finished episode: 827 Reward: 2945.0352 total_loss = 10.5350 = -0.0483 + 0.5 * 21.1669 + 0.01 * -0.0219
-----------------
Finished episode: 828 Reward

Finished episode: 888 Reward: 2539.5821 total_loss = 10.9824 = -0.0644 + 0.5 * 22.0940 + 0.01 * -0.0218
-----------------
Finished episode: 889 Reward: 2435.0441 total_loss = 8.0592 = -0.0630 + 0.5 * 16.2449 + 0.01 * -0.0231
-----------------
Finished episode: 890 Reward: 1915.9193 total_loss = 9.2267 = 0.0126 + 0.5 * 18.4286 + 0.01 * -0.0229
-----------------
Finished episode: 891 Reward: 2893.2343 total_loss = 10.3588 = 0.0051 + 0.5 * 20.7079 + 0.01 * -0.0250
-----------------
Finished episode: 892 Reward: 2095.0160 total_loss = 11.2058 = 0.0558 + 0.5 * 22.3006 + 0.01 * -0.0235
-----------------
Finished episode: 893 Reward: 2109.9956 total_loss = 11.2623 = -0.0121 + 0.5 * 22.5493 + 0.01 * -0.0230
-----------------
Finished episode: 894 Reward: 3185.9426 total_loss = 11.0215 = -0.1353 + 0.5 * 22.3140 + 0.01 * -0.0231
-----------------
Finished episode: 895 Reward: 2003.0622 total_loss = 9.1783 = -0.0764 + 0.5 * 18.5098 + 0.01 * -0.0231
-----------------
Finished episode: 896 Reward: 

Finished episode: 956 Reward: 2588.9134 total_loss = 3.9573 = 0.0278 + 0.5 * 7.8593 + 0.01 * -0.0220
-----------------
Finished episode: 957 Reward: 2378.2057 total_loss = 7.7494 = -0.0551 + 0.5 * 15.6093 + 0.01 * -0.0193
-----------------
Finished episode: 958 Reward: 2928.0925 total_loss = 10.5795 = -0.1250 + 0.5 * 21.4095 + 0.01 * -0.0231
-----------------
Finished episode: 959 Reward: 2305.9389 total_loss = 7.8072 = -0.0096 + 0.5 * 15.6339 + 0.01 * -0.0201
-----------------
Finished episode: 960 Reward: 2300.8712 total_loss = 7.7895 = -0.0727 + 0.5 * 15.7250 + 0.01 * -0.0193
-----------------
Finished episode: 961 Reward: 2566.1836 total_loss = 6.4963 = -0.0491 + 0.5 * 13.0911 + 0.01 * -0.0194
-----------------
Finished episode: 962 Reward: 2110.3465 total_loss = 10.0414 = -0.0504 + 0.5 * 20.1838 + 0.01 * -0.0190
-----------------
Finished episode: 963 Reward: 2536.0591 total_loss = 10.0681 = -0.0447 + 0.5 * 20.2260 + 0.01 * -0.0188
-----------------
Finished episode: 964 Reward: 2

Finished episode: 1024 Reward: 2303.9672 total_loss = 13.8888 = -0.1145 + 0.5 * 28.0070 + 0.01 * -0.0170
-----------------
Finished episode: 1025 Reward: 2769.6124 total_loss = 11.1284 = 0.0338 + 0.5 * 22.1895 + 0.01 * -0.0172
-----------------
Finished episode: 1026 Reward: 2716.1372 total_loss = 8.2984 = -0.0314 + 0.5 * 16.6600 + 0.01 * -0.0190
-----------------
Finished episode: 1027 Reward: 3075.3318 total_loss = 11.3473 = 0.0026 + 0.5 * 22.6898 + 0.01 * -0.0186
-----------------
Finished episode: 1028 Reward: 2598.8508 total_loss = 7.1759 = 0.0445 + 0.5 * 14.2631 + 0.01 * -0.0179
-----------------
Finished episode: 1029 Reward: 3089.9399 total_loss = 13.4145 = -0.0287 + 0.5 * 26.8868 + 0.01 * -0.0170
-----------------
Finished episode: 1030 Reward: 2634.7192 total_loss = 11.2943 = 0.0120 + 0.5 * 22.5650 + 0.01 * -0.0202
-----------------
Finished episode: 1031 Reward: 1799.9219 total_loss = 12.3118 = -0.0783 + 0.5 * 24.7807 + 0.01 * -0.0213
-----------------
Finished episode: 1032

Finished episode: 1092 Reward: 2030.2745 total_loss = 10.9184 = -0.0522 + 0.5 * 21.9415 + 0.01 * -0.0164
-----------------
Finished episode: 1093 Reward: 2430.2656 total_loss = 6.5715 = -0.0415 + 0.5 * 13.2262 + 0.01 * -0.0180
-----------------
Finished episode: 1094 Reward: 1881.4870 total_loss = 11.1817 = 0.0082 + 0.5 * 22.3472 + 0.01 * -0.0173
-----------------
Finished episode: 1095 Reward: 2603.1613 total_loss = 9.4472 = 0.0260 + 0.5 * 18.8428 + 0.01 * -0.0162
-----------------
Finished episode: 1096 Reward: 2981.3539 total_loss = 12.5401 = -0.0372 + 0.5 * 25.1550 + 0.01 * -0.0170
-----------------
Finished episode: 1097 Reward: 2320.9053 total_loss = 9.9794 = 0.0194 + 0.5 * 19.9205 + 0.01 * -0.0168
-----------------
Finished episode: 1098 Reward: 2147.5301 total_loss = 5.1062 = 0.0381 + 0.5 * 10.1368 + 0.01 * -0.0184
-----------------
Finished episode: 1099 Reward: 3136.3714 total_loss = 14.5011 = 0.0347 + 0.5 * 28.9333 + 0.01 * -0.0170
-----------------
Finished episode: 1100 Re

Finished episode: 1160 Reward: 2412.6400 total_loss = 8.1270 = 0.0756 + 0.5 * 16.1032 + 0.01 * -0.0153
-----------------
Finished episode: 1161 Reward: 2910.8461 total_loss = 12.2035 = 0.0519 + 0.5 * 24.3034 + 0.01 * -0.0161
-----------------
Finished episode: 1162 Reward: 2732.9248 total_loss = 12.0319 = 0.0200 + 0.5 * 24.0242 + 0.01 * -0.0160
-----------------
Finished episode: 1163 Reward: 2961.2066 total_loss = 11.0314 = -0.1221 + 0.5 * 22.3073 + 0.01 * -0.0143
-----------------
Finished episode: 1164 Reward: 3167.1430 total_loss = 13.3278 = 0.0349 + 0.5 * 26.5859 + 0.01 * -0.0121
-----------------
Finished episode: 1165 Reward: 2457.8855 total_loss = 7.0354 = 0.0078 + 0.5 * 14.0554 + 0.01 * -0.0142
-----------------
Finished episode: 1166 Reward: 2766.3255 total_loss = 5.9596 = -0.1015 + 0.5 * 12.1225 + 0.01 * -0.0142
-----------------
Finished episode: 1167 Reward: 2199.6813 total_loss = 6.7303 = -0.0156 + 0.5 * 13.4921 + 0.01 * -0.0154
-----------------
Finished episode: 1168 Re

Finished episode: 1228 Reward: 2306.7599 total_loss = 8.4967 = -0.0408 + 0.5 * 17.0753 + 0.01 * -0.0148
-----------------
Finished episode: 1229 Reward: 2916.1011 total_loss = 10.4850 = -0.0036 + 0.5 * 20.9776 + 0.01 * -0.0125
-----------------
Finished episode: 1230 Reward: 3090.2374 total_loss = 14.1216 = -0.0379 + 0.5 * 28.3192 + 0.01 * -0.0125
-----------------
Finished episode: 1231 Reward: 2901.7822 total_loss = 10.3902 = -0.0193 + 0.5 * 20.8193 + 0.01 * -0.0129
-----------------
Finished episode: 1232 Reward: 2550.7795 total_loss = 12.2956 = -0.0125 + 0.5 * 24.6164 + 0.01 * -0.0133
-----------------
Finished episode: 1233 Reward: 2889.5796 total_loss = 4.9542 = 0.0155 + 0.5 * 9.8778 + 0.01 * -0.0148
-----------------
Finished episode: 1234 Reward: 2578.6781 total_loss = 11.3119 = 0.0454 + 0.5 * 22.5332 + 0.01 * -0.0125
-----------------
Finished episode: 1235 Reward: 2693.8886 total_loss = 9.6131 = -0.0161 + 0.5 * 19.2589 + 0.01 * -0.0136
-----------------
Finished episode: 1236

Finished episode: 1296 Reward: 2717.4611 total_loss = 6.2004 = 0.0667 + 0.5 * 12.2676 + 0.01 * -0.0136
-----------------
Finished episode: 1297 Reward: 2629.6475 total_loss = 11.1307 = 0.0484 + 0.5 * 22.1650 + 0.01 * -0.0139
-----------------
Finished episode: 1298 Reward: 3015.3977 total_loss = 8.4808 = -0.0361 + 0.5 * 17.0340 + 0.01 * -0.0117
-----------------
Finished episode: 1299 Reward: 2190.3380 total_loss = 6.7238 = -0.0539 + 0.5 * 13.5557 + 0.01 * -0.0128
-----------------
Finished episode: 1300 Reward: 1453.9632 total_loss = 7.0029 = 0.0506 + 0.5 * 13.9047 + 0.01 * -0.0136
-----------------
Finished episode: 1301 Reward: 2620.1253 total_loss = 11.3342 = -0.0080 + 0.5 * 22.6847 + 0.01 * -0.0124
-----------------
Finished episode: 1302 Reward: 2423.4284 total_loss = 6.7441 = -0.0823 + 0.5 * 13.6529 + 0.01 * -0.0123
-----------------
Finished episode: 1303 Reward: 2507.7632 total_loss = 9.6572 = -0.0476 + 0.5 * 19.4099 + 0.01 * -0.0133
-----------------
Finished episode: 1304 Re

Finished episode: 1364 Reward: 3054.4798 total_loss = 11.7698 = -0.0068 + 0.5 * 23.5535 + 0.01 * -0.0137
-----------------
Finished episode: 1365 Reward: 3154.0913 total_loss = 11.4736 = -0.0292 + 0.5 * 23.0060 + 0.01 * -0.0137
-----------------
Finished episode: 1366 Reward: 2429.4338 total_loss = 9.7630 = 0.0692 + 0.5 * 19.3879 + 0.01 * -0.0139
-----------------
Finished episode: 1367 Reward: 2228.4981 total_loss = 7.0860 = 0.0111 + 0.5 * 14.1500 + 0.01 * -0.0125
-----------------
Finished episode: 1368 Reward: 2421.3255 total_loss = 9.3310 = -0.0544 + 0.5 * 18.7711 + 0.01 * -0.0116
-----------------
Finished episode: 1369 Reward: 2717.9260 total_loss = 6.2476 = -0.0576 + 0.5 * 12.6106 + 0.01 * -0.0134
-----------------
Finished episode: 1370 Reward: 2558.5997 total_loss = 10.2562 = -0.0437 + 0.5 * 20.6001 + 0.01 * -0.0132
-----------------
Finished episode: 1371 Reward: 2765.3769 total_loss = 11.7522 = 0.0184 + 0.5 * 23.4678 + 0.01 * -0.0146
-----------------
Finished episode: 1372 

Finished episode: 1432 Reward: 2158.5856 total_loss = 8.5129 = -0.0011 + 0.5 * 17.0281 + 0.01 * -0.0119
-----------------
Finished episode: 1433 Reward: 2765.4726 total_loss = 10.5373 = -0.0600 + 0.5 * 21.1947 + 0.01 * -0.0114
-----------------
Finished episode: 1434 Reward: 2607.9444 total_loss = 11.3303 = 0.0510 + 0.5 * 22.5588 + 0.01 * -0.0124
-----------------
Finished episode: 1435 Reward: 2811.5443 total_loss = 10.0538 = -0.0709 + 0.5 * 20.2497 + 0.01 * -0.0129
-----------------
Finished episode: 1436 Reward: 1740.7374 total_loss = 7.2091 = 0.0885 + 0.5 * 14.2415 + 0.01 * -0.0124
-----------------
Finished episode: 1437 Reward: 2440.1917 total_loss = 7.9524 = 0.0239 + 0.5 * 15.8572 + 0.01 * -0.0123
-----------------
Finished episode: 1438 Reward: 2760.7077 total_loss = 7.3485 = -0.0601 + 0.5 * 14.8175 + 0.01 * -0.0120
-----------------
Finished episode: 1439 Reward: 1647.5034 total_loss = 11.8625 = -0.0537 + 0.5 * 23.8327 + 0.01 * -0.0127
-----------------
Finished episode: 1440 

Finished episode: 1500 Reward: 1661.9208 total_loss = 8.9022 = 0.0690 + 0.5 * 17.6666 + 0.01 * -0.0114
-----------------
Finished episode: 1501 Reward: 2422.0811 total_loss = 10.3034 = 0.1253 + 0.5 * 20.3565 + 0.01 * -0.0120
-----------------
Finished episode: 1502 Reward: 3053.2554 total_loss = 14.2422 = 0.0259 + 0.5 * 28.4328 + 0.01 * -0.0110
-----------------
Finished episode: 1503 Reward: 2624.5769 total_loss = 13.4931 = 0.0580 + 0.5 * 26.8704 + 0.01 * -0.0114
-----------------
Finished episode: 1504 Reward: 1676.6540 total_loss = 8.2069 = -0.0492 + 0.5 * 16.5123 + 0.01 * -0.0128
-----------------
Finished episode: 1505 Reward: 2462.6040 total_loss = 11.4859 = 0.1481 + 0.5 * 22.6759 + 0.01 * -0.0115
-----------------
Finished episode: 1506 Reward: 2318.2577 total_loss = 11.0170 = -0.0061 + 0.5 * 22.0463 + 0.01 * -0.0114
-----------------
Finished episode: 1507 Reward: 3161.6056 total_loss = 9.4921 = -0.0200 + 0.5 * 19.0245 + 0.01 * -0.0118
-----------------
Finished episode: 1508 R

Finished episode: 1568 Reward: 1929.0880 total_loss = 9.2764 = 0.0184 + 0.5 * 18.5161 + 0.01 * -0.0110
-----------------
Finished episode: 1569 Reward: 2149.3467 total_loss = 10.7421 = 0.0716 + 0.5 * 21.3413 + 0.01 * -0.0117
-----------------
Finished episode: 1570 Reward: 2465.0285 total_loss = 8.4384 = -0.0356 + 0.5 * 16.9482 + 0.01 * -0.0125
-----------------
Finished episode: 1571 Reward: 2893.7912 total_loss = 5.4488 = -0.0039 + 0.5 * 10.9057 + 0.01 * -0.0126
-----------------
Finished episode: 1572 Reward: 2350.2521 total_loss = 10.6710 = -0.0024 + 0.5 * 21.3471 + 0.01 * -0.0127
-----------------
Finished episode: 1573 Reward: 2267.5858 total_loss = 9.0676 = 0.0065 + 0.5 * 18.1224 + 0.01 * -0.0105
-----------------
Finished episode: 1574 Reward: 2548.9988 total_loss = 6.2902 = -0.0368 + 0.5 * 12.6541 + 0.01 * -0.0123
-----------------
Finished episode: 1575 Reward: 3213.8215 total_loss = 12.7821 = -0.0063 + 0.5 * 25.5770 + 0.01 * -0.0120
-----------------
Finished episode: 1576 R

Finished episode: 1636 Reward: 2486.4870 total_loss = 13.1532 = -0.0336 + 0.5 * 26.3739 + 0.01 * -0.0113
-----------------
Finished episode: 1637 Reward: 3106.9214 total_loss = 10.1538 = 0.1449 + 0.5 * 20.0180 + 0.01 * -0.0110
-----------------
Finished episode: 1638 Reward: 2546.9169 total_loss = 7.2813 = -0.0284 + 0.5 * 14.6195 + 0.01 * -0.0112
-----------------
Finished episode: 1639 Reward: 2683.5929 total_loss = 11.2580 = -0.0696 + 0.5 * 22.6555 + 0.01 * -0.0109
-----------------
Finished episode: 1640 Reward: 2151.8180 total_loss = 9.0342 = -0.0729 + 0.5 * 18.2143 + 0.01 * -0.0111
-----------------
Finished episode: 1641 Reward: 2541.9267 total_loss = 8.2209 = 0.0273 + 0.5 * 16.3875 + 0.01 * -0.0102
-----------------
Finished episode: 1642 Reward: 3034.9151 total_loss = 11.9188 = 0.0305 + 0.5 * 23.7768 + 0.01 * -0.0099
-----------------
Finished episode: 1643 Reward: 2929.3377 total_loss = 7.0184 = 0.0561 + 0.5 * 13.9249 + 0.01 * -0.0111
-----------------
Finished episode: 1644 R

Finished episode: 1704 Reward: 2153.0782 total_loss = 4.8874 = -0.0602 + 0.5 * 9.8955 + 0.01 * -0.0117
-----------------
Finished episode: 1705 Reward: 2459.0262 total_loss = 9.7987 = 0.0059 + 0.5 * 19.5857 + 0.01 * -0.0099
-----------------
Finished episode: 1706 Reward: 2528.5281 total_loss = 8.9175 = 0.0291 + 0.5 * 17.7770 + 0.01 * -0.0101
-----------------
Finished episode: 1707 Reward: 2734.2681 total_loss = 11.0550 = -0.0782 + 0.5 * 22.2668 + 0.01 * -0.0112
-----------------
Finished episode: 1708 Reward: 2759.3221 total_loss = 13.7956 = 0.1727 + 0.5 * 27.2459 + 0.01 * -0.0108
-----------------
Finished episode: 1709 Reward: 2231.8142 total_loss = 10.2525 = -0.0836 + 0.5 * 20.6724 + 0.01 * -0.0098
-----------------
Finished episode: 1710 Reward: 2891.1376 total_loss = 9.8342 = -0.0514 + 0.5 * 19.7713 + 0.01 * -0.0110
-----------------
Finished episode: 1711 Reward: 2729.4628 total_loss = 8.1793 = -0.1090 + 0.5 * 16.5768 + 0.01 * -0.0103
-----------------
Finished episode: 1712 Re

Finished episode: 1772 Reward: 1780.4656 total_loss = 2.9686 = 0.0089 + 0.5 * 5.9197 + 0.01 * -0.0103
-----------------
Finished episode: 1773 Reward: 2727.6655 total_loss = 9.7525 = -0.0886 + 0.5 * 19.6825 + 0.01 * -0.0103
-----------------
Finished episode: 1774 Reward: 1762.6891 total_loss = 11.5885 = -0.0247 + 0.5 * 23.2267 + 0.01 * -0.0092
-----------------
Finished episode: 1775 Reward: 1874.0668 total_loss = 5.7672 = -0.0450 + 0.5 * 11.6246 + 0.01 * -0.0107
-----------------
Finished episode: 1776 Reward: 2740.5136 total_loss = 8.4369 = 0.0259 + 0.5 * 16.8223 + 0.01 * -0.0111
-----------------
Finished episode: 1777 Reward: 1413.2410 total_loss = 10.8605 = -0.0374 + 0.5 * 21.7961 + 0.01 * -0.0108
-----------------
Finished episode: 1778 Reward: 2295.3310 total_loss = 8.9835 = -0.0447 + 0.5 * 18.0567 + 0.01 * -0.0094
-----------------
Finished episode: 1779 Reward: 3199.4469 total_loss = 14.6648 = 0.1130 + 0.5 * 29.1038 + 0.01 * -0.0113
-----------------
Finished episode: 1780 Re

Finished episode: 1840 Reward: 2752.0321 total_loss = 5.8648 = -0.0011 + 0.5 * 11.7320 + 0.01 * -0.0095
-----------------
Finished episode: 1841 Reward: 3269.5450 total_loss = 13.7419 = 0.0565 + 0.5 * 27.3711 + 0.01 * -0.0091
-----------------
Finished episode: 1842 Reward: 2321.0934 total_loss = 5.8993 = 0.1362 + 0.5 * 11.5264 + 0.01 * -0.0099
-----------------
Finished episode: 1843 Reward: 1587.9894 total_loss = 11.4764 = 0.0583 + 0.5 * 22.8366 + 0.01 * -0.0104
-----------------
Finished episode: 1844 Reward: 2842.2141 total_loss = 8.4858 = 0.1506 + 0.5 * 16.6707 + 0.01 * -0.0097
-----------------
Finished episode: 1845 Reward: 3312.6023 total_loss = 10.5399 = 0.0329 + 0.5 * 21.0142 + 0.01 * -0.0096
-----------------
Finished episode: 1846 Reward: 2407.3235 total_loss = 12.4334 = -0.0341 + 0.5 * 24.9352 + 0.01 * -0.0099
-----------------
Finished episode: 1847 Reward: 3277.4505 total_loss = 15.5795 = -0.0761 + 0.5 * 31.3114 + 0.01 * -0.0104
-----------------
Finished episode: 1848 R

Finished episode: 1908 Reward: 1732.8668 total_loss = 11.4548 = 0.0685 + 0.5 * 22.7729 + 0.01 * -0.0086
-----------------
Finished episode: 1909 Reward: 2843.5311 total_loss = 7.5829 = -0.0152 + 0.5 * 15.1965 + 0.01 * -0.0096
-----------------
Finished episode: 1910 Reward: 2323.9467 total_loss = 9.4251 = -0.0333 + 0.5 * 18.9169 + 0.01 * -0.0098
-----------------
Finished episode: 1911 Reward: 2907.2159 total_loss = 9.3859 = 0.0053 + 0.5 * 18.7613 + 0.01 * -0.0094
-----------------
Finished episode: 1912 Reward: 2523.7623 total_loss = 12.3063 = -0.0120 + 0.5 * 24.6368 + 0.01 * -0.0100
-----------------
Finished episode: 1913 Reward: 2963.0796 total_loss = 9.0969 = 0.1214 + 0.5 * 17.9512 + 0.01 * -0.0106
-----------------
Finished episode: 1914 Reward: 2042.6548 total_loss = 6.4450 = 0.0099 + 0.5 * 12.8705 + 0.01 * -0.0093
-----------------
Finished episode: 1915 Reward: 2265.3834 total_loss = 11.0934 = 0.0376 + 0.5 * 22.1119 + 0.01 * -0.0093
-----------------
Finished episode: 1916 Rew

Finished episode: 1976 Reward: 3017.8410 total_loss = 11.6825 = -0.0547 + 0.5 * 23.4746 + 0.01 * -0.0092
-----------------
Finished episode: 1977 Reward: 2611.6502 total_loss = 10.5727 = 0.1162 + 0.5 * 20.9131 + 0.01 * -0.0087
-----------------
Finished episode: 1978 Reward: 1763.8541 total_loss = 5.7718 = 0.0025 + 0.5 * 11.5387 + 0.01 * -0.0095
-----------------
Finished episode: 1979 Reward: 3242.5107 total_loss = 13.1298 = -0.0054 + 0.5 * 26.2707 + 0.01 * -0.0093
-----------------
Finished episode: 1980 Reward: 1810.3142 total_loss = 11.7003 = 0.0056 + 0.5 * 23.3896 + 0.01 * -0.0091
-----------------
Finished episode: 1981 Reward: 2033.1087 total_loss = 2.8656 = 0.0100 + 0.5 * 5.7115 + 0.01 * -0.0088
-----------------
Finished episode: 1982 Reward: 3384.4435 total_loss = 12.7974 = -0.0112 + 0.5 * 25.6174 + 0.01 * -0.0109
-----------------
Finished episode: 1983 Reward: 3176.2411 total_loss = 14.8839 = 0.0042 + 0.5 * 29.7596 + 0.01 * -0.0088
-----------------
Finished episode: 1984 R

Finished episode: 45 Reward: 313.0733 total_loss = 6.7191 = -0.0433 + 0.5 * 13.5248 + 0.01 * -0.0046
-----------------
Finished episode: 46 Reward: 337.7833 total_loss = 7.3431 = -0.0975 + 0.5 * 14.8813 + 0.01 * -0.0044
-----------------
Finished episode: 47 Reward: 327.5873 total_loss = 5.8641 = 0.0869 + 0.5 * 11.5545 + 0.01 * -0.0037
-----------------
Finished episode: 48 Reward: 322.5419 total_loss = 6.2493 = 0.0044 + 0.5 * 12.4898 + 0.01 * -0.0042
-----------------
Finished episode: 49 Reward: 237.6587 total_loss = 3.3439 = 0.0028 + 0.5 * 6.6823 + 0.01 * -0.0044
-----------------
Finished episode: 50 Reward: 335.5020 total_loss = 6.2197 = -0.0251 + 0.5 * 12.4898 + 0.01 * -0.0043
-----------------
Finished episode: 51 Reward: 347.1681 total_loss = 7.6496 = -0.0324 + 0.5 * 15.3640 + 0.01 * -0.0045
-----------------
Finished episode: 52 Reward: 329.1440 total_loss = 6.2316 = 0.0040 + 0.5 * 12.4552 + 0.01 * -0.0050
-----------------
Finished episode: 53 Reward: 354.2928 total_loss = 7.

Finished episode: 115 Reward: 557.8114 total_loss = 4.0298 = -0.0372 + 0.5 * 8.1342 + 0.01 * -0.0050
-----------------
Finished episode: 116 Reward: 543.4437 total_loss = 3.9618 = -0.0613 + 0.5 * 8.0462 + 0.01 * -0.0052
-----------------
Finished episode: 117 Reward: 509.8407 total_loss = 3.9565 = 0.0018 + 0.5 * 7.9095 + 0.01 * -0.0050
-----------------
Finished episode: 118 Reward: 448.7543 total_loss = 2.5883 = -0.0624 + 0.5 * 5.3015 + 0.01 * -0.0051
-----------------
Finished episode: 119 Reward: 488.4513 total_loss = 4.4940 = -0.1660 + 0.5 * 9.3201 + 0.01 * -0.0049
-----------------
Finished episode: 120 Reward: 513.1241 total_loss = 3.8591 = -0.0765 + 0.5 * 7.8713 + 0.01 * -0.0049
-----------------
Finished episode: 121 Reward: 560.7059 total_loss = 5.3020 = 0.1579 + 0.5 * 10.2883 + 0.01 * -0.0050
-----------------
Finished episode: 122 Reward: 528.6444 total_loss = 4.8244 = -0.0453 + 0.5 * 9.7395 + 0.01 * -0.0045
-----------------
Finished episode: 123 Reward: 533.5472 total_loss

Finished episode: 184 Reward: 996.8133 total_loss = 7.4783 = 0.0088 + 0.5 * 14.9391 + 0.01 * -0.0079
-----------------
Finished episode: 185 Reward: 872.9068 total_loss = 4.2708 = 0.0111 + 0.5 * 8.5195 + 0.01 * -0.0078
-----------------
Finished episode: 186 Reward: 1163.9743 total_loss = 9.0951 = -0.1052 + 0.5 * 18.4006 + 0.01 * -0.0084
-----------------
Finished episode: 187 Reward: 1100.2940 total_loss = 7.4106 = 0.0173 + 0.5 * 14.7868 + 0.01 * -0.0080
-----------------
Finished episode: 188 Reward: 980.8955 total_loss = 8.5750 = -0.0802 + 0.5 * 17.3106 + 0.01 * -0.0089
-----------------
Finished episode: 189 Reward: 885.9118 total_loss = 3.3805 = -0.0393 + 0.5 * 6.8398 + 0.01 * -0.0088
-----------------
Finished episode: 190 Reward: 792.2464 total_loss = 4.2810 = -0.0103 + 0.5 * 8.5827 + 0.01 * -0.0085
-----------------
Finished episode: 191 Reward: 896.2066 total_loss = 6.7756 = -0.0285 + 0.5 * 13.6084 + 0.01 * -0.0081
-----------------
Finished episode: 192 Reward: 874.6776 total

Finished episode: 253 Reward: 1046.0753 total_loss = 6.2624 = 0.0246 + 0.5 * 12.4758 + 0.01 * -0.0109
-----------------
Finished episode: 254 Reward: 1136.6714 total_loss = 3.3573 = -0.0134 + 0.5 * 6.7416 + 0.01 * -0.0105
-----------------
Finished episode: 255 Reward: 1126.8036 total_loss = 5.0492 = 0.0224 + 0.5 * 10.0538 + 0.01 * -0.0107
-----------------
Finished episode: 256 Reward: 1477.9042 total_loss = 4.7448 = 0.0353 + 0.5 * 9.4193 + 0.01 * -0.0120
-----------------
Finished episode: 257 Reward: 1405.0145 total_loss = 5.3558 = -0.0971 + 0.5 * 10.9059 + 0.01 * -0.0096
-----------------
Finished episode: 258 Reward: 2246.9569 total_loss = 10.9825 = -0.0257 + 0.5 * 22.0168 + 0.01 * -0.0112
-----------------
Finished episode: 259 Reward: 1541.5078 total_loss = 6.8640 = -0.0847 + 0.5 * 13.8975 + 0.01 * -0.0104
-----------------
Finished episode: 260 Reward: 859.4267 total_loss = 6.4591 = 0.0228 + 0.5 * 12.8728 + 0.01 * -0.0106
-----------------
Finished episode: 261 Reward: 864.0079

Finished episode: 322 Reward: 2032.4840 total_loss = 6.6915 = -0.1251 + 0.5 * 13.6335 + 0.01 * -0.0130
-----------------
Finished episode: 323 Reward: 1927.9831 total_loss = 8.0327 = 0.0206 + 0.5 * 16.0244 + 0.01 * -0.0131
-----------------
Finished episode: 324 Reward: 2106.6151 total_loss = 7.5948 = 0.0188 + 0.5 * 15.1522 + 0.01 * -0.0119
-----------------
Finished episode: 325 Reward: 1437.8328 total_loss = 5.3160 = -0.0346 + 0.5 * 10.7015 + 0.01 * -0.0130
-----------------
Finished episode: 326 Reward: 1388.2856 total_loss = 8.2653 = -0.0370 + 0.5 * 16.6048 + 0.01 * -0.0139
-----------------
Finished episode: 327 Reward: 2801.7328 total_loss = 13.5764 = -0.0552 + 0.5 * 27.2636 + 0.01 * -0.0122
-----------------
Finished episode: 328 Reward: 1760.7176 total_loss = 8.9222 = 0.0636 + 0.5 * 17.7174 + 0.01 * -0.0125
-----------------
Finished episode: 329 Reward: 1665.5218 total_loss = 7.6741 = -0.0531 + 0.5 * 15.4548 + 0.01 * -0.0136
-----------------
Finished episode: 330 Reward: 1234

Finished episode: 390 Reward: 1862.7904 total_loss = 9.1381 = -0.0222 + 0.5 * 18.3208 + 0.01 * -0.0125
-----------------
Finished episode: 391 Reward: 2585.0430 total_loss = 11.2954 = 0.0291 + 0.5 * 22.5328 + 0.01 * -0.0111
-----------------
Finished episode: 392 Reward: 1367.8267 total_loss = 5.2552 = 0.0594 + 0.5 * 10.3918 + 0.01 * -0.0130
-----------------
Finished episode: 393 Reward: 2151.5975 total_loss = 11.4308 = -0.0387 + 0.5 * 22.9391 + 0.01 * -0.0129
-----------------
Finished episode: 394 Reward: 2469.4751 total_loss = 8.5785 = -0.0905 + 0.5 * 17.3383 + 0.01 * -0.0131
-----------------
Finished episode: 395 Reward: 2099.1122 total_loss = 6.7549 = -0.0611 + 0.5 * 13.6323 + 0.01 * -0.0131
-----------------
Finished episode: 396 Reward: 1724.9576 total_loss = 7.3653 = 0.0144 + 0.5 * 14.7019 + 0.01 * -0.0119
-----------------
Finished episode: 397 Reward: 2684.2114 total_loss = 9.7680 = -0.0200 + 0.5 * 19.5762 + 0.01 * -0.0137
-----------------
Finished episode: 398 Reward: 198

Finished episode: 458 Reward: 2679.8389 total_loss = 10.5724 = -0.0139 + 0.5 * 21.1727 + 0.01 * -0.0119
-----------------
Finished episode: 459 Reward: 2378.7371 total_loss = 8.2993 = -0.0375 + 0.5 * 16.6738 + 0.01 * -0.0117
-----------------
Finished episode: 460 Reward: 2813.3077 total_loss = 8.4323 = -0.1003 + 0.5 * 17.0653 + 0.01 * -0.0122
-----------------
Finished episode: 461 Reward: 1894.7269 total_loss = 14.3052 = -0.0072 + 0.5 * 28.6251 + 0.01 * -0.0123
-----------------
Finished episode: 462 Reward: 2574.0466 total_loss = 5.8884 = -0.1308 + 0.5 * 12.0387 + 0.01 * -0.0128
-----------------
Finished episode: 463 Reward: 2671.5204 total_loss = 8.6624 = 0.0608 + 0.5 * 17.2035 + 0.01 * -0.0128
-----------------
Finished episode: 464 Reward: 2979.5706 total_loss = 13.1720 = 0.0674 + 0.5 * 26.2095 + 0.01 * -0.0113
-----------------
Finished episode: 465 Reward: 2557.4372 total_loss = 10.2001 = -0.0224 + 0.5 * 20.4452 + 0.01 * -0.0111
-----------------
Finished episode: 466 Reward: 

Finished episode: 526 Reward: 2393.8332 total_loss = 11.8631 = -0.0211 + 0.5 * 23.7687 + 0.01 * -0.0102
-----------------
Finished episode: 527 Reward: 2358.7828 total_loss = 10.8603 = -0.0458 + 0.5 * 21.8125 + 0.01 * -0.0114
-----------------
Finished episode: 528 Reward: 1954.0270 total_loss = 5.0183 = 0.0136 + 0.5 * 10.0096 + 0.01 * -0.0114
-----------------
Finished episode: 529 Reward: 2623.9463 total_loss = 6.7843 = -0.0166 + 0.5 * 13.6021 + 0.01 * -0.0114
-----------------
Finished episode: 530 Reward: 2698.6482 total_loss = 14.9172 = 0.1020 + 0.5 * 29.6306 + 0.01 * -0.0120
-----------------
Finished episode: 531 Reward: 2578.0888 total_loss = 7.2047 = -0.0765 + 0.5 * 14.5626 + 0.01 * -0.0099
-----------------
Finished episode: 532 Reward: 1491.5355 total_loss = 11.9286 = 0.0346 + 0.5 * 23.7883 + 0.01 * -0.0111
-----------------
Finished episode: 533 Reward: 2995.3098 total_loss = 10.8878 = -0.0141 + 0.5 * 21.8039 + 0.01 * -0.0101
-----------------
Finished episode: 534 Reward: 

Finished episode: 594 Reward: 3043.4644 total_loss = 11.0990 = -0.0232 + 0.5 * 22.2446 + 0.01 * -0.0094
-----------------
Finished episode: 595 Reward: 2221.3798 total_loss = 13.6206 = -0.0886 + 0.5 * 27.4186 + 0.01 * -0.0086
-----------------
Finished episode: 596 Reward: 2477.4261 total_loss = 9.1849 = -0.0431 + 0.5 * 18.4562 + 0.01 * -0.0085
-----------------
Finished episode: 597 Reward: 2878.4847 total_loss = 8.2440 = -0.0495 + 0.5 * 16.5871 + 0.01 * -0.0090
-----------------
Finished episode: 598 Reward: 2910.1775 total_loss = 10.2571 = -0.0756 + 0.5 * 20.6657 + 0.01 * -0.0088
-----------------
Finished episode: 599 Reward: 2253.5026 total_loss = 13.5705 = 0.1293 + 0.5 * 26.8826 + 0.01 * -0.0097
-----------------
Finished episode: 600 Reward: 2380.3004 total_loss = 11.5624 = 0.0079 + 0.5 * 23.1092 + 0.01 * -0.0087
-----------------
Finished episode: 601 Reward: 1990.7296 total_loss = 6.8320 = -0.0180 + 0.5 * 13.7003 + 0.01 * -0.0089
-----------------
Finished episode: 602 Reward:

Finished episode: 662 Reward: 2288.7982 total_loss = 7.3710 = 0.0030 + 0.5 * 14.7363 + 0.01 * -0.0091
-----------------
Finished episode: 663 Reward: 2913.3335 total_loss = 13.1321 = 0.0629 + 0.5 * 26.1387 + 0.01 * -0.0082
-----------------
Finished episode: 664 Reward: 2351.8850 total_loss = 12.0795 = -0.0019 + 0.5 * 24.1629 + 0.01 * -0.0080
-----------------
Finished episode: 665 Reward: 2990.9798 total_loss = 13.2632 = 0.0123 + 0.5 * 26.5019 + 0.01 * -0.0080
-----------------
Finished episode: 666 Reward: 2367.3492 total_loss = 13.3091 = 0.0279 + 0.5 * 26.5625 + 0.01 * -0.0086
-----------------
Finished episode: 667 Reward: 2941.1304 total_loss = 11.4572 = 0.0156 + 0.5 * 22.8833 + 0.01 * -0.0086
-----------------
Finished episode: 668 Reward: 2654.1836 total_loss = 8.3574 = -0.0630 + 0.5 * 16.8410 + 0.01 * -0.0079
-----------------
Finished episode: 669 Reward: 2835.4153 total_loss = 9.8399 = -0.0235 + 0.5 * 19.7270 + 0.01 * -0.0089
-----------------
Finished episode: 670 Reward: 31

Finished episode: 730 Reward: 2882.6605 total_loss = 9.2323 = -0.0220 + 0.5 * 18.5087 + 0.01 * -0.0095
-----------------
Finished episode: 731 Reward: 2023.6023 total_loss = 8.5889 = -0.0067 + 0.5 * 17.1914 + 0.01 * -0.0092
-----------------
Finished episode: 732 Reward: 2443.0308 total_loss = 9.6857 = 0.0136 + 0.5 * 19.3444 + 0.01 * -0.0094
-----------------
Finished episode: 733 Reward: 3100.4168 total_loss = 13.0437 = 0.0143 + 0.5 * 26.0591 + 0.01 * -0.0086
-----------------
Finished episode: 734 Reward: 2814.1442 total_loss = 9.6666 = -0.0115 + 0.5 * 19.3564 + 0.01 * -0.0088
-----------------
Finished episode: 735 Reward: 3200.3195 total_loss = 13.3689 = -0.0364 + 0.5 * 26.8106 + 0.01 * -0.0086
-----------------
Finished episode: 736 Reward: 2874.6935 total_loss = 10.8647 = 0.0451 + 0.5 * 21.6394 + 0.01 * -0.0096
-----------------
Finished episode: 737 Reward: 2705.3273 total_loss = 7.6193 = -0.0952 + 0.5 * 15.4293 + 0.01 * -0.0091
-----------------
Finished episode: 738 Reward: 28

Finished episode: 798 Reward: 3096.5084 total_loss = 12.6218 = -0.0367 + 0.5 * 25.3171 + 0.01 * -0.0071
-----------------
Finished episode: 799 Reward: 2449.8825 total_loss = 5.8659 = -0.0479 + 0.5 * 11.8276 + 0.01 * -0.0074
-----------------
Finished episode: 800 Reward: 2389.9086 total_loss = 12.0690 = -0.0825 + 0.5 * 24.3033 + 0.01 * -0.0083
-----------------
Finished episode: 801 Reward: 3171.3977 total_loss = 13.3846 = 0.0339 + 0.5 * 26.7016 + 0.01 * -0.0078
-----------------
Finished episode: 802 Reward: 2495.1626 total_loss = 11.3716 = -0.0423 + 0.5 * 22.8281 + 0.01 * -0.0068
-----------------
Finished episode: 803 Reward: 3075.1151 total_loss = 12.5482 = -0.0393 + 0.5 * 25.1751 + 0.01 * -0.0080
-----------------
Finished episode: 804 Reward: 3162.9851 total_loss = 13.4011 = -0.0046 + 0.5 * 26.8115 + 0.01 * -0.0068
-----------------
Finished episode: 805 Reward: 3051.9051 total_loss = 13.0466 = -0.0147 + 0.5 * 26.1229 + 0.01 * -0.0077
-----------------
Finished episode: 806 Rewa

Finished episode: 866 Reward: 2406.0351 total_loss = 10.6823 = -0.0008 + 0.5 * 21.3663 + 0.01 * -0.0058
-----------------
Finished episode: 867 Reward: 2183.5680 total_loss = 7.7403 = 0.0018 + 0.5 * 15.4772 + 0.01 * -0.0067
-----------------
Finished episode: 868 Reward: 1939.6861 total_loss = 8.2632 = -0.0285 + 0.5 * 16.5835 + 0.01 * -0.0061
-----------------
Finished episode: 869 Reward: 3055.6793 total_loss = 14.7625 = -0.0111 + 0.5 * 29.5473 + 0.01 * -0.0059
-----------------
Finished episode: 870 Reward: 2426.4152 total_loss = 7.2779 = -0.0678 + 0.5 * 14.6914 + 0.01 * -0.0059
-----------------
Finished episode: 871 Reward: 2741.2147 total_loss = 10.9945 = -0.0613 + 0.5 * 22.1118 + 0.01 * -0.0061
-----------------
Finished episode: 872 Reward: 3022.4538 total_loss = 7.4660 = 0.0265 + 0.5 * 14.8789 + 0.01 * -0.0061
-----------------
Finished episode: 873 Reward: 2853.1633 total_loss = 9.7152 = -0.0794 + 0.5 * 19.5894 + 0.01 * -0.0057
-----------------
Finished episode: 874 Reward: 2

Finished episode: 934 Reward: 2637.8732 total_loss = 12.2090 = 0.0305 + 0.5 * 24.3572 + 0.01 * -0.0051
-----------------
Finished episode: 935 Reward: 3111.9018 total_loss = 16.5866 = 0.1165 + 0.5 * 32.9403 + 0.01 * -0.0057
-----------------
Finished episode: 936 Reward: 3143.6137 total_loss = 10.6977 = 0.0811 + 0.5 * 21.2333 + 0.01 * -0.0057
-----------------
Finished episode: 937 Reward: 2484.8478 total_loss = 11.1132 = -0.0247 + 0.5 * 22.2761 + 0.01 * -0.0057
-----------------
Finished episode: 938 Reward: 3342.3843 total_loss = 15.9314 = 0.0150 + 0.5 * 31.8329 + 0.01 * -0.0056
-----------------
Finished episode: 939 Reward: 2318.9516 total_loss = 11.3731 = -0.0733 + 0.5 * 22.8930 + 0.01 * -0.0055
-----------------
Finished episode: 940 Reward: 3170.4716 total_loss = 14.4163 = -0.0480 + 0.5 * 28.9287 + 0.01 * -0.0057
-----------------
Finished episode: 941 Reward: 3213.3200 total_loss = 13.3473 = -0.0219 + 0.5 * 26.7387 + 0.01 * -0.0050
-----------------
Finished episode: 942 Reward

Finished episode: 1002 Reward: 3223.5006 total_loss = 12.5495 = -0.0638 + 0.5 * 25.2267 + 0.01 * -0.0046
-----------------
Finished episode: 1003 Reward: 2372.1046 total_loss = 8.9671 = -0.0825 + 0.5 * 18.0994 + 0.01 * -0.0046
-----------------
Finished episode: 1004 Reward: 3341.3615 total_loss = 14.0424 = -0.0383 + 0.5 * 28.1614 + 0.01 * -0.0045
-----------------
Finished episode: 1005 Reward: 3343.0052 total_loss = 13.5764 = -0.0314 + 0.5 * 27.2157 + 0.01 * -0.0047
-----------------
Finished episode: 1006 Reward: 3083.9139 total_loss = 14.3397 = 0.0344 + 0.5 * 28.6108 + 0.01 * -0.0044
-----------------
Finished episode: 1007 Reward: 2890.0648 total_loss = 10.7540 = -0.1296 + 0.5 * 21.7673 + 0.01 * -0.0040
-----------------
Finished episode: 1008 Reward: 3043.5504 total_loss = 14.0883 = 0.0268 + 0.5 * 28.1232 + 0.01 * -0.0043
-----------------
Finished episode: 1009 Reward: 3107.4108 total_loss = 12.7844 = -0.0348 + 0.5 * 25.6384 + 0.01 * -0.0045
-----------------
Finished episode: 1

Finished episode: 1069 Reward: 3217.5506 total_loss = 12.6339 = 0.0850 + 0.5 * 25.0979 + 0.01 * -0.0039
-----------------
Finished episode: 1070 Reward: 2991.8190 total_loss = 12.0600 = -0.0682 + 0.5 * 24.2565 + 0.01 * -0.0044
-----------------
Finished episode: 1071 Reward: 2371.8029 total_loss = 8.0028 = 0.0658 + 0.5 * 15.8740 + 0.01 * -0.0048
-----------------
Finished episode: 1072 Reward: 3137.7117 total_loss = 10.1305 = -0.0083 + 0.5 * 20.2777 + 0.01 * -0.0042
-----------------
Finished episode: 1073 Reward: 2662.9268 total_loss = 8.5937 = -0.0376 + 0.5 * 17.2628 + 0.01 * -0.0046
-----------------
Finished episode: 1074 Reward: 2571.3630 total_loss = 8.1316 = -0.0401 + 0.5 * 16.3434 + 0.01 * -0.0040
-----------------
Finished episode: 1075 Reward: 2930.2072 total_loss = 11.2268 = 0.0068 + 0.5 * 22.4399 + 0.01 * -0.0047
-----------------
Finished episode: 1076 Reward: 3344.2841 total_loss = 14.9280 = -0.0030 + 0.5 * 29.8620 + 0.01 * -0.0042
-----------------
Finished episode: 1077

Finished episode: 1136 Reward: 3255.0607 total_loss = 13.7327 = 0.0652 + 0.5 * 27.3351 + 0.01 * -0.0037
-----------------
Finished episode: 1137 Reward: 3353.7160 total_loss = 13.8641 = -0.0049 + 0.5 * 27.7381 + 0.01 * -0.0038
-----------------
Finished episode: 1138 Reward: 2596.0654 total_loss = 12.0951 = -0.0345 + 0.5 * 24.2593 + 0.01 * -0.0038
-----------------
Finished episode: 1139 Reward: 2956.9917 total_loss = 11.3957 = 0.1265 + 0.5 * 22.5384 + 0.01 * -0.0035
-----------------
Finished episode: 1140 Reward: 2048.4877 total_loss = 10.2321 = 0.0524 + 0.5 * 20.3596 + 0.01 * -0.0039
-----------------
Finished episode: 1141 Reward: 3279.4722 total_loss = 14.8864 = 0.0089 + 0.5 * 29.7551 + 0.01 * -0.0040
-----------------
Finished episode: 1142 Reward: 3196.1598 total_loss = 13.3071 = -0.0311 + 0.5 * 26.6764 + 0.01 * -0.0037
-----------------
Finished episode: 1143 Reward: 2193.5130 total_loss = 9.8770 = 0.0628 + 0.5 * 19.6284 + 0.01 * -0.0037
-----------------
Finished episode: 1144

Finished episode: 1203 Reward: 3287.8881 total_loss = 14.5025 = 0.0798 + 0.5 * 28.8454 + 0.01 * -0.0032
-----------------
Finished episode: 1204 Reward: 3195.9047 total_loss = 13.2309 = -0.0792 + 0.5 * 26.6204 + 0.01 * -0.0036
-----------------
Finished episode: 1205 Reward: 2836.7239 total_loss = 11.6757 = -0.0842 + 0.5 * 23.5199 + 0.01 * -0.0035
-----------------
Finished episode: 1206 Reward: 3344.5706 total_loss = 12.9397 = -0.0223 + 0.5 * 25.9241 + 0.01 * -0.0037
-----------------
Finished episode: 1207 Reward: 2753.1963 total_loss = 7.5262 = -0.0887 + 0.5 * 15.2299 + 0.01 * -0.0034
-----------------
Finished episode: 1208 Reward: 3453.8378 total_loss = 12.8924 = -0.0329 + 0.5 * 25.8507 + 0.01 * -0.0035
-----------------
Finished episode: 1209 Reward: 2663.9307 total_loss = 11.3032 = 0.0209 + 0.5 * 22.5648 + 0.01 * -0.0034
-----------------
Finished episode: 1210 Reward: 3216.3335 total_loss = 12.9737 = -0.0731 + 0.5 * 26.0937 + 0.01 * -0.0036
-----------------
Finished episode: 1

Finished episode: 1270 Reward: 3157.1495 total_loss = 15.4091 = 0.0072 + 0.5 * 30.8040 + 0.01 * -0.0035
-----------------
Finished episode: 1271 Reward: 3260.3507 total_loss = 12.7009 = -0.1100 + 0.5 * 25.6219 + 0.01 * -0.0033
-----------------
Finished episode: 1272 Reward: 3347.3197 total_loss = 13.0757 = 0.0476 + 0.5 * 26.0563 + 0.01 * -0.0032
-----------------
Finished episode: 1273 Reward: 3253.5128 total_loss = 13.9591 = -0.0582 + 0.5 * 28.0346 + 0.01 * -0.0032
-----------------
Finished episode: 1274 Reward: 3368.5580 total_loss = 11.5797 = -0.0993 + 0.5 * 23.3582 + 0.01 * -0.0030
-----------------
Finished episode: 1275 Reward: 2637.7105 total_loss = 13.2844 = 0.0460 + 0.5 * 26.4769 + 0.01 * -0.0035
-----------------
Finished episode: 1276 Reward: 3332.9423 total_loss = 10.9167 = 0.0282 + 0.5 * 21.7769 + 0.01 * -0.0033
-----------------
Finished episode: 1277 Reward: 3165.9423 total_loss = 13.9208 = -0.0530 + 0.5 * 27.9477 + 0.01 * -0.0033
-----------------
Finished episode: 12

Finished episode: 1337 Reward: 3194.6485 total_loss = 13.4066 = -0.0326 + 0.5 * 26.8785 + 0.01 * -0.0029
-----------------
Finished episode: 1338 Reward: 3242.0435 total_loss = 13.5756 = 0.0629 + 0.5 * 27.0255 + 0.01 * -0.0030
-----------------
Finished episode: 1339 Reward: 2454.4706 total_loss = 13.1470 = -0.0341 + 0.5 * 26.3624 + 0.01 * -0.0028
-----------------
Finished episode: 1340 Reward: 3096.7514 total_loss = 11.9075 = -0.0056 + 0.5 * 23.8261 + 0.01 * -0.0027
-----------------
Finished episode: 1341 Reward: 3126.9795 total_loss = 11.9432 = -0.0715 + 0.5 * 24.0295 + 0.01 * -0.0032
-----------------
Finished episode: 1342 Reward: 3089.8329 total_loss = 12.3557 = -0.0318 + 0.5 * 24.7750 + 0.01 * -0.0026
-----------------
Finished episode: 1343 Reward: 3165.0441 total_loss = 12.9814 = -0.0754 + 0.5 * 26.1137 + 0.01 * -0.0029
-----------------
Finished episode: 1344 Reward: 3141.7383 total_loss = 14.6032 = -0.0424 + 0.5 * 29.2912 + 0.01 * -0.0031
-----------------
Finished episode:

Finished episode: 1404 Reward: 3169.5343 total_loss = 10.9383 = -0.0894 + 0.5 * 22.0555 + 0.01 * -0.0021
-----------------
Finished episode: 1405 Reward: 3080.0276 total_loss = 9.9233 = -0.0278 + 0.5 * 19.9023 + 0.01 * -0.0024
-----------------
Finished episode: 1406 Reward: 3193.0708 total_loss = 9.5525 = -0.0255 + 0.5 * 19.1559 + 0.01 * -0.0026
-----------------
Finished episode: 1407 Reward: 2429.0635 total_loss = 11.2139 = -0.0533 + 0.5 * 22.5345 + 0.01 * -0.0027
-----------------
Finished episode: 1408 Reward: 3114.4386 total_loss = 11.3511 = 0.0180 + 0.5 * 22.6663 + 0.01 * -0.0025
-----------------
Finished episode: 1409 Reward: 2464.6404 total_loss = 7.7601 = -0.0383 + 0.5 * 15.5968 + 0.01 * -0.0023
-----------------
Finished episode: 1410 Reward: 3124.0143 total_loss = 10.5270 = 0.1017 + 0.5 * 20.8507 + 0.01 * -0.0024
-----------------
Finished episode: 1411 Reward: 2852.4949 total_loss = 5.4995 = -0.0473 + 0.5 * 11.0936 + 0.01 * -0.0025
-----------------
Finished episode: 1412

Finished episode: 1471 Reward: 2956.3890 total_loss = 11.3838 = 0.0176 + 0.5 * 22.7323 + 0.01 * -0.0022
-----------------
Finished episode: 1472 Reward: 2623.2221 total_loss = 9.1822 = 0.0268 + 0.5 * 18.3108 + 0.01 * -0.0022
-----------------
Finished episode: 1473 Reward: 2457.3337 total_loss = 11.4233 = -0.0818 + 0.5 * 23.0103 + 0.01 * -0.0023
-----------------
Finished episode: 1474 Reward: 2595.6164 total_loss = 10.8847 = -0.0588 + 0.5 * 21.8870 + 0.01 * -0.0022
-----------------
Finished episode: 1475 Reward: 2704.3128 total_loss = 7.3687 = 0.0590 + 0.5 * 14.6193 + 0.01 * -0.0022
-----------------
Finished episode: 1476 Reward: 3121.0218 total_loss = 11.3922 = -0.0554 + 0.5 * 22.8954 + 0.01 * -0.0022
-----------------
Finished episode: 1477 Reward: 3204.6835 total_loss = 12.1809 = 0.0084 + 0.5 * 24.3451 + 0.01 * -0.0023
-----------------
Finished episode: 1478 Reward: 3101.6376 total_loss = 12.1365 = 0.0168 + 0.5 * 24.2395 + 0.01 * -0.0022
-----------------
Finished episode: 1479 

Finished episode: 1539 Reward: 2627.4029 total_loss = 9.9086 = -0.0416 + 0.5 * 19.9005 + 0.01 * -0.0021
-----------------
Finished episode: 1540 Reward: 2852.5210 total_loss = 9.6180 = -0.0133 + 0.5 * 19.2627 + 0.01 * -0.0022
-----------------
Finished episode: 1541 Reward: 2646.3125 total_loss = 10.8241 = 0.0075 + 0.5 * 21.6332 + 0.01 * -0.0020
-----------------
Finished episode: 1542 Reward: 2384.1028 total_loss = 10.4761 = 0.1781 + 0.5 * 20.5959 + 0.01 * -0.0022
-----------------
Finished episode: 1543 Reward: 2981.5960 total_loss = 12.4212 = 0.0481 + 0.5 * 24.7463 + 0.01 * -0.0022
-----------------
Finished episode: 1544 Reward: 3007.2997 total_loss = 12.1196 = -0.0067 + 0.5 * 24.2528 + 0.01 * -0.0022
-----------------
Finished episode: 1545 Reward: 2898.7268 total_loss = 11.3788 = -0.0553 + 0.5 * 22.8683 + 0.01 * -0.0023
-----------------
Finished episode: 1546 Reward: 2643.2528 total_loss = 11.9175 = -0.0165 + 0.5 * 23.8682 + 0.01 * -0.0023
-----------------
Finished episode: 154

Finished episode: 1606 Reward: 3096.6759 total_loss = 13.8411 = 0.0400 + 0.5 * 27.6023 + 0.01 * -0.0020
-----------------
Finished episode: 1607 Reward: 3000.1461 total_loss = 13.7805 = -0.0267 + 0.5 * 27.6145 + 0.01 * -0.0019
-----------------
Finished episode: 1608 Reward: 3002.7216 total_loss = 12.0138 = -0.0944 + 0.5 * 24.2163 + 0.01 * -0.0018
-----------------
Finished episode: 1609 Reward: 3022.7599 total_loss = 10.7129 = -0.0986 + 0.5 * 21.6231 + 0.01 * -0.0019
-----------------
Finished episode: 1610 Reward: 3154.8310 total_loss = 13.4695 = 0.0880 + 0.5 * 26.7630 + 0.01 * -0.0019
-----------------
Finished episode: 1611 Reward: 2412.1330 total_loss = 10.1499 = -0.0704 + 0.5 * 20.4406 + 0.01 * -0.0017
-----------------
Finished episode: 1612 Reward: 2272.5266 total_loss = 6.8169 = -0.0553 + 0.5 * 13.7444 + 0.01 * -0.0019
-----------------
Finished episode: 1613 Reward: 3037.1021 total_loss = 13.9150 = 0.2016 + 0.5 * 27.4268 + 0.01 * -0.0019
-----------------
Finished episode: 16

Finished episode: 1673 Reward: 3109.5425 total_loss = 12.4396 = 0.0634 + 0.5 * 24.7526 + 0.01 * -0.0017
-----------------
Finished episode: 1674 Reward: 2195.5623 total_loss = 2.4466 = 0.0240 + 0.5 * 4.8454 + 0.01 * -0.0017
-----------------
Finished episode: 1675 Reward: 2988.2609 total_loss = 12.4586 = -0.0209 + 0.5 * 24.9590 + 0.01 * -0.0015
-----------------
Finished episode: 1676 Reward: 2932.3011 total_loss = 11.9418 = -0.0722 + 0.5 * 24.0280 + 0.01 * -0.0016
-----------------
Finished episode: 1677 Reward: 3124.6381 total_loss = 13.0779 = -0.0122 + 0.5 * 26.1802 + 0.01 * -0.0017
-----------------
Finished episode: 1678 Reward: 2406.7938 total_loss = 10.9741 = -0.1173 + 0.5 * 22.1828 + 0.01 * -0.0015
-----------------
Finished episode: 1679 Reward: 2767.1081 total_loss = 8.0306 = -0.0175 + 0.5 * 16.0962 + 0.01 * -0.0017
-----------------
Finished episode: 1680 Reward: 3029.0658 total_loss = 11.0256 = -0.0941 + 0.5 * 22.2395 + 0.01 * -0.0018
-----------------
Finished episode: 168

Finished episode: 1740 Reward: 3121.3740 total_loss = 8.4601 = -0.0178 + 0.5 * 16.9557 + 0.01 * -0.0017
-----------------
Finished episode: 1741 Reward: 2876.5993 total_loss = 10.3366 = 0.0762 + 0.5 * 20.5208 + 0.01 * -0.0017
-----------------
Finished episode: 1742 Reward: 2855.0863 total_loss = 12.6051 = -0.0138 + 0.5 * 25.2379 + 0.01 * -0.0015
-----------------
Finished episode: 1743 Reward: 3193.6472 total_loss = 13.9862 = 0.0694 + 0.5 * 27.8337 + 0.01 * -0.0015
-----------------
Finished episode: 1744 Reward: 3030.9110 total_loss = 10.0128 = -0.0366 + 0.5 * 20.0988 + 0.01 * -0.0015
-----------------
Finished episode: 1745 Reward: 3042.6754 total_loss = 14.2988 = 0.0815 + 0.5 * 28.4347 + 0.01 * -0.0016
-----------------
Finished episode: 1746 Reward: 2829.8530 total_loss = 6.7476 = 0.1084 + 0.5 * 13.2785 + 0.01 * -0.0016
-----------------
Finished episode: 1747 Reward: 2126.0691 total_loss = 11.6299 = -0.0662 + 0.5 * 23.3923 + 0.01 * -0.0017
-----------------
Finished episode: 1748

Finished episode: 1808 Reward: 1958.5746 total_loss = 9.6771 = -0.0861 + 0.5 * 19.5265 + 0.01 * -0.0015
-----------------
Finished episode: 1809 Reward: 2784.2357 total_loss = 8.9431 = 0.0050 + 0.5 * 17.8762 + 0.01 * -0.0014
-----------------
Finished episode: 1810 Reward: 2176.0158 total_loss = 8.3862 = 0.0061 + 0.5 * 16.7602 + 0.01 * -0.0016
-----------------
Finished episode: 1811 Reward: 2640.8410 total_loss = 13.7936 = 0.0986 + 0.5 * 27.3901 + 0.01 * -0.0015
-----------------
Finished episode: 1812 Reward: 3193.8102 total_loss = 12.4492 = 0.0047 + 0.5 * 24.8889 + 0.01 * -0.0015
-----------------
Finished episode: 1813 Reward: 2593.0671 total_loss = 7.8502 = -0.0570 + 0.5 * 15.8143 + 0.01 * -0.0015
-----------------
Finished episode: 1814 Reward: 2566.0527 total_loss = 5.9929 = 0.0910 + 0.5 * 11.8038 + 0.01 * -0.0015
-----------------
Finished episode: 1815 Reward: 2729.3432 total_loss = 9.5092 = -0.0273 + 0.5 * 19.0729 + 0.01 * -0.0015
-----------------
Finished episode: 1816 Rewa

Finished episode: 1876 Reward: 3084.0736 total_loss = 10.8563 = 0.0313 + 0.5 * 21.6500 + 0.01 * -0.0014
-----------------
Finished episode: 1877 Reward: 2517.2006 total_loss = 11.1106 = 0.0313 + 0.5 * 22.1586 + 0.01 * -0.0013
-----------------
Finished episode: 1878 Reward: 2868.8992 total_loss = 9.3160 = -0.0221 + 0.5 * 18.6762 + 0.01 * -0.0013
-----------------
Finished episode: 1879 Reward: 2458.3595 total_loss = 11.9515 = 0.0663 + 0.5 * 23.7703 + 0.01 * -0.0014
-----------------
Finished episode: 1880 Reward: 2052.2814 total_loss = 11.7650 = 0.0019 + 0.5 * 23.5261 + 0.01 * -0.0013
-----------------
Finished episode: 1881 Reward: 3112.8834 total_loss = 12.0369 = -0.0145 + 0.5 * 24.1028 + 0.01 * -0.0013
-----------------
Finished episode: 1882 Reward: 3068.3983 total_loss = 13.3302 = -0.0010 + 0.5 * 26.6624 + 0.01 * -0.0016
-----------------
Finished episode: 1883 Reward: 2746.1045 total_loss = 13.3621 = 0.0974 + 0.5 * 26.5294 + 0.01 * -0.0015
-----------------
Finished episode: 1884

Finished episode: 1944 Reward: 3077.8160 total_loss = 10.0590 = -0.0647 + 0.5 * 20.2474 + 0.01 * -0.0012
-----------------
Finished episode: 1945 Reward: 2715.7787 total_loss = 9.7463 = 0.0340 + 0.5 * 19.4248 + 0.01 * -0.0014
-----------------
Finished episode: 1946 Reward: 3055.6050 total_loss = 7.9562 = -0.0749 + 0.5 * 16.0622 + 0.01 * -0.0014
-----------------
Finished episode: 1947 Reward: 3107.6929 total_loss = 10.3494 = -0.0107 + 0.5 * 20.7203 + 0.01 * -0.0013
-----------------
Finished episode: 1948 Reward: 2401.0284 total_loss = 13.1975 = -0.0183 + 0.5 * 26.4317 + 0.01 * -0.0014
-----------------
Finished episode: 1949 Reward: 2692.8767 total_loss = 7.7493 = 0.0237 + 0.5 * 15.4511 + 0.01 * -0.0013
-----------------
Finished episode: 1950 Reward: 2373.2199 total_loss = 13.0133 = -0.0818 + 0.5 * 26.1902 + 0.01 * -0.0013
-----------------
Finished episode: 1951 Reward: 2884.8768 total_loss = 15.4794 = 0.0597 + 0.5 * 30.8395 + 0.01 * -0.0013
-----------------
Finished episode: 1952

Finished episode: 12 Reward: 26.8608 total_loss = 0.0354 = -0.0391 + 0.5 * 0.1529 + 0.01 * -0.1934
-----------------
Finished episode: 13 Reward: 27.7190 total_loss = 0.1046 = 0.0432 + 0.5 * 0.1268 + 0.01 * -0.2042
-----------------
Finished episode: 14 Reward: 25.8401 total_loss = -0.0000 = -0.0776 + 0.5 * 0.1592 + 0.01 * -0.2016
-----------------
Finished episode: 15 Reward: 30.4395 total_loss = 0.0553 = 0.0260 + 0.5 * 0.0628 + 0.01 * -0.2089
-----------------
Finished episode: 16 Reward: 31.8611 total_loss = -0.0400 = -0.0753 + 0.5 * 0.0749 + 0.01 * -0.2115
-----------------
Finished episode: 17 Reward: 35.2887 total_loss = 0.0477 = 0.0205 + 0.5 * 0.0587 + 0.01 * -0.2128
-----------------
Finished episode: 18 Reward: 32.2302 total_loss = 0.0203 = -0.0341 + 0.5 * 0.1130 + 0.01 * -0.2180
-----------------
Finished episode: 19 Reward: 34.5933 total_loss = 0.0028 = -0.0234 + 0.5 * 0.0567 + 0.01 * -0.2168
-----------------
Finished episode: 20 Reward: 38.1231 total_loss = -0.0045 = -0.03

Finished episode: 83 Reward: 42.0352 total_loss = -0.0684 = -0.0688 + 0.5 * 0.0064 + 0.01 * -0.2840
-----------------
Finished episode: 84 Reward: 42.3981 total_loss = 0.0985 = 0.0992 + 0.5 * 0.0040 + 0.01 * -0.2721
-----------------
Finished episode: 85 Reward: 42.1578 total_loss = 0.0006 = -0.0012 + 0.5 * 0.0090 + 0.01 * -0.2757
-----------------
Finished episode: 86 Reward: 41.6547 total_loss = 0.0762 = 0.0757 + 0.5 * 0.0066 + 0.01 * -0.2808
-----------------
Finished episode: 87 Reward: 42.5066 total_loss = -0.1473 = -0.1486 + 0.5 * 0.0083 + 0.01 * -0.2841
-----------------
Finished episode: 88 Reward: 41.5666 total_loss = -0.0228 = -0.0222 + 0.5 * 0.0046 + 0.01 * -0.2936
-----------------
Finished episode: 89 Reward: 41.5536 total_loss = 0.0007 = -0.0009 + 0.5 * 0.0088 + 0.01 * -0.2820
-----------------
Finished episode: 90 Reward: 42.3825 total_loss = 0.0922 = 0.0924 + 0.5 * 0.0052 + 0.01 * -0.2859
-----------------
Finished episode: 91 Reward: 42.2886 total_loss = -0.0340 = -0.0

Finished episode: 153 Reward: 43.2460 total_loss = 0.0807 = 0.0830 + 0.5 * 0.0014 + 0.01 * -0.3001
-----------------
Finished episode: 154 Reward: 42.8726 total_loss = 0.1087 = 0.1111 + 0.5 * 0.0014 + 0.01 * -0.3036
-----------------
Finished episode: 155 Reward: 43.1855 total_loss = -0.0168 = -0.0142 + 0.5 * 0.0008 + 0.01 * -0.2995
-----------------
Finished episode: 156 Reward: 41.4961 total_loss = 0.0237 = 0.0262 + 0.5 * 0.0009 + 0.01 * -0.2917
-----------------
Finished episode: 157 Reward: 40.5825 total_loss = 0.0088 = 0.0106 + 0.5 * 0.0026 + 0.01 * -0.3035
-----------------
Finished episode: 158 Reward: 44.8056 total_loss = -0.0043 = -0.0442 + 0.5 * 0.0860 + 0.01 * -0.3071
-----------------
Finished episode: 159 Reward: 42.3333 total_loss = -0.0373 = -0.0348 + 0.5 * 0.0010 + 0.01 * -0.3067
-----------------
Finished episode: 160 Reward: 42.4428 total_loss = 0.0615 = 0.0637 + 0.5 * 0.0016 + 0.01 * -0.3043
-----------------
Finished episode: 161 Reward: 42.3869 total_loss = -0.0062

Finished episode: 223 Reward: 66.8154 total_loss = -0.0715 = -0.0916 + 0.5 * 0.0460 + 0.01 * -0.2868
-----------------
Finished episode: 224 Reward: 71.9760 total_loss = 0.0281 = 0.0106 + 0.5 * 0.0408 + 0.01 * -0.2868
-----------------
Finished episode: 225 Reward: 66.4337 total_loss = 0.1486 = 0.0680 + 0.5 * 0.1669 + 0.01 * -0.2879
-----------------
Finished episode: 226 Reward: 67.7546 total_loss = 0.0326 = 0.0130 + 0.5 * 0.0450 + 0.01 * -0.2894
-----------------
Finished episode: 227 Reward: 68.6019 total_loss = 0.0186 = 0.0114 + 0.5 * 0.0203 + 0.01 * -0.2931
-----------------
Finished episode: 228 Reward: 68.3326 total_loss = -0.0470 = -0.0652 + 0.5 * 0.0422 + 0.01 * -0.2872
-----------------
Finished episode: 229 Reward: 68.2310 total_loss = 0.0022 = -0.0256 + 0.5 * 0.0613 + 0.01 * -0.2855
-----------------
Finished episode: 230 Reward: 71.7225 total_loss = -0.0475 = -0.0634 + 0.5 * 0.0377 + 0.01 * -0.2918
-----------------
Finished episode: 231 Reward: 70.6203 total_loss = 0.0963

Finished episode: 293 Reward: 86.5992 total_loss = 0.0551 = -0.0176 + 0.5 * 0.1509 + 0.01 * -0.2813
-----------------
Finished episode: 294 Reward: 80.3882 total_loss = 0.0369 = -0.0544 + 0.5 * 0.1882 + 0.01 * -0.2828
-----------------
Finished episode: 295 Reward: 85.3185 total_loss = -0.0061 = -0.0776 + 0.5 * 0.1486 + 0.01 * -0.2778
-----------------
Finished episode: 296 Reward: 86.3373 total_loss = 0.1463 = 0.0032 + 0.5 * 0.2917 + 0.01 * -0.2828
-----------------
Finished episode: 297 Reward: 84.3317 total_loss = 0.0981 = 0.0014 + 0.5 * 0.1991 + 0.01 * -0.2793
-----------------
Finished episode: 298 Reward: 84.8545 total_loss = 0.2267 = 0.0924 + 0.5 * 0.2741 + 0.01 * -0.2840
-----------------
Finished episode: 299 Reward: 81.2664 total_loss = 0.2380 = 0.0897 + 0.5 * 0.3023 + 0.01 * -0.2857
-----------------
Finished episode: 300 Reward: 84.4133 total_loss = 0.0131 = -0.0510 + 0.5 * 0.1337 + 0.01 * -0.2696
-----------------
Finished episode: 301 Reward: 86.4169 total_loss = -0.0018 

Finished episode: 363 Reward: 91.6418 total_loss = 0.0914 = 0.0111 + 0.5 * 0.1660 + 0.01 * -0.2710
-----------------
Finished episode: 364 Reward: 90.5767 total_loss = 0.0907 = -0.0011 + 0.5 * 0.1891 + 0.01 * -0.2705
-----------------
Finished episode: 365 Reward: 89.8968 total_loss = -0.0549 = -0.1315 + 0.5 * 0.1588 + 0.01 * -0.2755
-----------------
Finished episode: 366 Reward: 89.0018 total_loss = 0.1618 = 0.0241 + 0.5 * 0.2807 + 0.01 * -0.2666
-----------------
Finished episode: 367 Reward: 86.5767 total_loss = 0.1319 = 0.0003 + 0.5 * 0.2687 + 0.01 * -0.2776
-----------------
Finished episode: 368 Reward: 85.6126 total_loss = 0.1471 = -0.0101 + 0.5 * 0.3197 + 0.01 * -0.2667
-----------------
Finished episode: 369 Reward: 87.0625 total_loss = 0.1802 = -0.0094 + 0.5 * 0.3846 + 0.01 * -0.2726
-----------------
Finished episode: 370 Reward: 88.7082 total_loss = 0.2303 = 0.0501 + 0.5 * 0.3657 + 0.01 * -0.2679
-----------------
Finished episode: 371 Reward: 88.0042 total_loss = 0.0087 =

Finished episode: 433 Reward: 98.1869 total_loss = 0.0807 = -0.0612 + 0.5 * 0.2893 + 0.01 * -0.2706
-----------------
Finished episode: 434 Reward: 93.9336 total_loss = 0.0256 = -0.0819 + 0.5 * 0.2205 + 0.01 * -0.2739
-----------------
Finished episode: 435 Reward: 97.3359 total_loss = 0.2041 = 0.0341 + 0.5 * 0.3456 + 0.01 * -0.2831
-----------------
Finished episode: 436 Reward: 96.7968 total_loss = 0.2809 = 0.0885 + 0.5 * 0.3905 + 0.01 * -0.2865
-----------------
Finished episode: 437 Reward: 97.2680 total_loss = 0.2181 = 0.0345 + 0.5 * 0.3730 + 0.01 * -0.2936
-----------------
Finished episode: 438 Reward: 95.3793 total_loss = 0.1906 = -0.0185 + 0.5 * 0.4236 + 0.01 * -0.2700
-----------------
Finished episode: 439 Reward: 93.7822 total_loss = 0.1722 = -0.0043 + 0.5 * 0.3587 + 0.01 * -0.2777
-----------------
Finished episode: 440 Reward: 96.4435 total_loss = 0.0462 = -0.0838 + 0.5 * 0.2653 + 0.01 * -0.2685
-----------------
Finished episode: 441 Reward: 95.7486 total_loss = 0.0251 =

Finished episode: 503 Reward: 94.2575 total_loss = 0.2364 = 0.0292 + 0.5 * 0.4199 + 0.01 * -0.2799
-----------------
Finished episode: 504 Reward: 97.9127 total_loss = 0.2721 = 0.0696 + 0.5 * 0.4104 + 0.01 * -0.2744
-----------------
Finished episode: 505 Reward: 96.6343 total_loss = 0.1921 = -0.0113 + 0.5 * 0.4126 + 0.01 * -0.2941
-----------------
Finished episode: 506 Reward: 98.5271 total_loss = 0.2497 = 0.0548 + 0.5 * 0.3954 + 0.01 * -0.2869
-----------------
Finished episode: 507 Reward: 96.6464 total_loss = 0.1563 = -0.0262 + 0.5 * 0.3706 + 0.01 * -0.2808
-----------------
Finished episode: 508 Reward: 99.6052 total_loss = 0.1946 = 0.0698 + 0.5 * 0.2554 + 0.01 * -0.2880
-----------------
Finished episode: 509 Reward: 99.6336 total_loss = 0.1414 = -0.0016 + 0.5 * 0.2917 + 0.01 * -0.2900
-----------------
Finished episode: 510 Reward: 98.0235 total_loss = 0.1553 = -0.0049 + 0.5 * 0.3261 + 0.01 * -0.2880
-----------------
Finished episode: 511 Reward: 98.7185 total_loss = 0.0938 = 

Finished episode: 573 Reward: 99.1325 total_loss = 0.1659 = -0.0021 + 0.5 * 0.3416 + 0.01 * -0.2785
-----------------
Finished episode: 574 Reward: 99.6813 total_loss = 0.0424 = -0.0977 + 0.5 * 0.2858 + 0.01 * -0.2808
-----------------
Finished episode: 575 Reward: 101.3860 total_loss = 0.0358 = -0.0579 + 0.5 * 0.1928 + 0.01 * -0.2708
-----------------
Finished episode: 576 Reward: 101.2876 total_loss = 0.2097 = 0.0132 + 0.5 * 0.3983 + 0.01 * -0.2740
-----------------
Finished episode: 577 Reward: 97.4348 total_loss = 0.2238 = 0.0652 + 0.5 * 0.3226 + 0.01 * -0.2637
-----------------
Finished episode: 578 Reward: 98.1388 total_loss = 0.1068 = -0.0053 + 0.5 * 0.2298 + 0.01 * -0.2759
-----------------
Finished episode: 579 Reward: 97.5156 total_loss = 0.2038 = 0.0560 + 0.5 * 0.3008 + 0.01 * -0.2651
-----------------
Finished episode: 580 Reward: 99.7162 total_loss = 0.1068 = -0.0525 + 0.5 * 0.3239 + 0.01 * -0.2704
-----------------
Finished episode: 581 Reward: 100.4194 total_loss = 0.201