In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
from IPython import display

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
import numpy as np
import math
import random

In [4]:
import time

In [5]:
def show_state(env, episode,step, info=""):
    plt.figure(99999,figsize=[8,6])
    plt.clf()
    plt.imshow(env.render(mode='rgb_array'))
    plt.title("episode: {} step: {} ".format(episode,step))
    #plt.title("%s | Step: %d %s" % (env._spec.id,step, info))
    plt.axis('off')

    display.clear_output(wait=True)
    display.display(plt.gcf())

In [6]:
import gym

In [7]:
use_cuda = torch.cuda.is_available()
device=torch.device("cuda:5" if use_cuda else "cpu")

In [8]:
###### PARAMS ######
learning_rate = 0.001
num_episodes = 50000
gamma=0.999999
#gamma=0.85
egreedy = 0.9
egreedy_final = 0.01
egreedy_decay = 50000

report_interval=10

score_to_solve = 195

hidden_layer_size=64

replay_memory_size=500000

batch_size=32

update_target_frequency = 100000

clip_error=True

In [9]:
env = gym.make('Breakout-ramDeterministic-v4')

In [10]:
#env = gym.wrappers.Monitor(env, '../mp4/sandbox10',video_callable=lambda episode_id: True,force=True)
#env = gym.wrappers.Monitor(env, '../mp4/breakout_DQN_p1',video_callable=lambda episode_id: episode_id%100==0,force=True)

In [11]:
number_of_inputs=env.observation_space.shape[0]
number_of_outputs=env.action_space.n

In [12]:
def calculate_epsilon(steps_done):
    epsilon = egreedy_final + (egreedy - egreedy_final) * \
              math.exp(-1. * steps_done / egreedy_decay )
    return epsilon

In [13]:
class ExperienceReplay():
    def __init__(self, capacity):
        self.capacity=capacity
        self.memory=[]
        self.position=0
        
    
    def push(self, state,
             action, new_state,
             reward, done):
        
            transition=(state,action,new_state,reward,done)
            
            if self.position>=len(self.memory):
                self.memory.append(transition)
            else:
                self.memory[self.position]=transition
                
            self.position=(self.position+1)%self.capacity
        
    
    def sample(self,batch_size):
        return zip(*random.sample(self.memory, batch_size))
    
    def __len__(self):
        return len(self.memory)
    

In [14]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.linear1 = nn.Linear(number_of_inputs,hidden_layer_size)
        self.linear2 = nn.Linear(hidden_layer_size,number_of_outputs)
        
        self.activation=nn.Tanh()
        
    def forward(self, x):
        output1 = self.linear1(x)
        output1 = self.activation(output1)
        output2 = self.linear2(output1)
        
        return output2

In [15]:
class QNet_Agent():
    def __init__(self):
        
        self.nn = NeuralNetwork().to(device)
        self.target_nn = NeuralNetwork().to(device)
        
        self.loss_function = nn.MSELoss()
        self.optimizer = optim.Adam(params=self.nn.parameters(), lr=learning_rate)
        
        self.update_target_counter = 0
        
    def select_action(self,state,epsilon):
        
        random_for_egreedy=torch.rand(1).item()
        
        if random_for_egreedy>epsilon:
            self.nn.eval()
            with torch.no_grad():
                state=torch.Tensor(state).to(device)
                predicted_value_from_nn=self.nn(state)
                action=torch.argmax(predicted_value_from_nn).item()
        else:
            action=env.action_space.sample()
                
                
        return action
    
    def optimize(self):
        
        if len(memory)<batch_size:
            return
        
        state, action, new_state, reward, done = memory.sample(batch_size)
        
        state=torch.Tensor(state).to(device)
        new_state=torch.Tensor(new_state).to(device)
        reward=torch.Tensor(reward).to(device)
        
        #the view call below is to transform into column vector
        #so that it can be used in the gather call
        #i.e. we will use it to pick out from the computed value
        #tensor only values indexed by selected action
        action=(torch.Tensor(action).view(-1,1).long()).to(device)
        #print('action: ')
        #print(action)
        #print('contiguous?', action.is_contiguous())
        done=torch.Tensor(done).to(device)
        
        #print('shape of: state, new state, reward, action, done:')
        #print(state.shape)
        #print(new_state.shape)
        #print(reward.shape)
        #print(action.shape)
        #print(done.shape)
        
        
        self.nn.eval()
        self.target_nn.eval()
            
        new_state_values=self.target_nn(new_state).detach()
        #print('shape of: new_state_values')
        #print(new_state_values.shape)
        
        max_new_state_values=torch.max(new_state_values,dim=1)[0]
        #print('shape of: max_new_state_values')
        #print(max_new_state_values.shape)
        target_value=(reward + (1-done)*gamma*max_new_state_values).view(-1,1)
        
        #print('shape of: target_value')
        #print(target_value.shape)
        self.nn.train()
        
        #this will select only the values of the desired actions
        predicted_value=torch.gather(self.nn(state),1,action)
        #print('shape of: predicted_value')
        #print(predicted_value.shape)
        
        
        loss=self.loss_function(predicted_value,target_value)
        self.optimizer.zero_grad()
        loss.backward()
        
        if clip_error:
            for param in self.nn.parameters():
                param.grad.clamp_(-1.0,1.0)
        
        self.optimizer.step()
        
        if self.update_target_counter % update_target_frequency == 0:
            #print("***********************")
            #print("UPDATING TARGET NETWORK")
            #print("update counter: {}".format(self.update_target_counter))
            #print("***********************")
            self.target_nn.load_state_dict(self.nn.state_dict())
        
        self.update_target_counter+=1
        

In [16]:
a=torch.randn([3,3])
print(a)

tensor([[-1.6049, -1.0776,  0.2535],
        [ 1.6732, -1.1906, -0.7068],
        [ 1.3573, -0.8768, -1.5517]])


In [17]:
a.clamp_(-1.0,1.0)

tensor([[-1.0000, -1.0000,  0.2535],
        [ 1.0000, -1.0000, -0.7068],
        [ 1.0000, -0.8768, -1.0000]])

In [18]:
a

tensor([[-1.0000, -1.0000,  0.2535],
        [ 1.0000, -1.0000, -0.7068],
        [ 1.0000, -0.8768, -1.0000]])

In [19]:
memory=ExperienceReplay(replay_memory_size)

In [20]:
qnet_agent=QNet_Agent()

In [21]:
seed_value=23
env.seed(seed_value)
torch.manual_seed(seed_value)
random.seed(seed_value)

In [None]:
%%time

steps_total=np.full([num_episodes],-999,dtype=np.int32)
reward_total=np.full([num_episodes],-999,dtype=np.int32)

frames_total=0

solved_after = 0
solved = False

start_time = time.time()

for i_episode in range(num_episodes):
    
    state = env.reset()
    #for step in range(100):
    step=0
    reward_total[i_episode]=0
    
    while True:
        
        step+=1
        frames_total += 1
        
        epsilon=calculate_epsilon(frames_total)
        
        #action=env.action_space.sample()
        action=qnet_agent.select_action(state,epsilon)
        
        new_state, reward, done, info = env.step(action)
        memory.push(state, action, new_state,
                     reward, done)
        
        reward_total[i_episode]+=reward
        
        qnet_agent.optimize()
        
        state=new_state
        
        if done:
            steps_total[i_episode]=step
            
            if i_episode>100:
                mean_reward_100 = np.sum(reward_total[i_episode-100:i_episode])/100
            
                if (mean_reward_100 > score_to_solve and solved == False):
                    print("SOLVED! After %i episodes " % i_episode)
                    solved_after = i_episode
                    solved = True
            
            if (i_episode % report_interval == 0 and i_episode>1):
                print("**** Episode  {} **** ".format(i_episode))
                recent_avg_reward=np.average(reward_total[i_episode-report_interval:i_episode])
                print("Recent average reward: {}".format(recent_avg_reward))
                if i_episode>100:
                    print("Reward over last 100: {}".format(mean_reward_100))
                full_avg_so_far=np.average(reward_total[:i_episode])
                print("Average over all episodes so far: {}".format(full_avg_so_far))
                print("epsilon: {}".format(epsilon))
            
                #print("Episode {} finished after: {}".format(i_episode,step))
            break
            
if solved:
    print("Solved after %i episodes" % solved_after)
        

**** Episode  10 **** 
Recent average reward: 0.9
Average over all episodes so far: 0.9
epsilon: 0.8668487906593241
**** Episode  20 **** 
Recent average reward: 0.4
Average over all episodes so far: 0.65
epsilon: 0.8418078484171984
**** Episode  30 **** 
Recent average reward: 1.2
Average over all episodes so far: 0.8333333333333334
epsilon: 0.8134230906913473
**** Episode  40 **** 
Recent average reward: 0.9
Average over all episodes so far: 0.85
epsilon: 0.7868920917963156
**** Episode  50 **** 
Recent average reward: 1.7
Average over all episodes so far: 1.02
epsilon: 0.7535391811224643
**** Episode  60 **** 
Recent average reward: 1.3
Average over all episodes so far: 1.0666666666666667
epsilon: 0.7280372606275634
**** Episode  70 **** 
Recent average reward: 2.0
Average over all episodes so far: 1.2
epsilon: 0.6977060286799767
**** Episode  80 **** 
Recent average reward: 1.2
Average over all episodes so far: 1.2
epsilon: 0.6702123084260773
**** Episode  90 **** 
Recent average r

**** Episode  570 **** 
Recent average reward: 1.4
Reward over last 100: 1.52
Average over all episodes so far: 1.4736842105263157
epsilon: 0.10279766359769003
**** Episode  580 **** 
Recent average reward: 1.5
Reward over last 100: 1.61
Average over all episodes so far: 1.4741379310344827
epsilon: 0.09892038890404693
**** Episode  590 **** 
Recent average reward: 1.0
Reward over last 100: 1.57
Average over all episodes so far: 1.4661016949152543
epsilon: 0.09547136971458488
**** Episode  600 **** 
Recent average reward: 1.2
Reward over last 100: 1.48
Average over all episodes so far: 1.4616666666666667
epsilon: 0.09220872686980197
**** Episode  610 **** 
Recent average reward: 1.4
Reward over last 100: 1.46
Average over all episodes so far: 1.460655737704918
epsilon: 0.0888448078446117
**** Episode  620 **** 
Recent average reward: 1.9
Reward over last 100: 1.5
Average over all episodes so far: 1.467741935483871
epsilon: 0.08508354768948673
**** Episode  630 **** 
Recent average rewar

**** Episode  1090 **** 
Recent average reward: 2.0
Reward over last 100: 1.61
Average over all episodes so far: 1.5128440366972478
epsilon: 0.019984575605165533
**** Episode  1100 **** 
Recent average reward: 0.7
Reward over last 100: 1.58
Average over all episodes so far: 1.5054545454545454
epsilon: 0.01961708748620693
**** Episode  1110 **** 
Recent average reward: 1.3
Reward over last 100: 1.58
Average over all episodes so far: 1.5036036036036036
epsilon: 0.019215634673689787
**** Episode  1120 **** 
Recent average reward: 1.7
Reward over last 100: 1.57
Average over all episodes so far: 1.5053571428571428
epsilon: 0.018831999763226852
**** Episode  1130 **** 
Recent average reward: 1.5
Reward over last 100: 1.58
Average over all episodes so far: 1.5053097345132744
epsilon: 0.018459257998395418
**** Episode  1140 **** 
Recent average reward: 1.4
Reward over last 100: 1.6
Average over all episodes so far: 1.5043859649122806
epsilon: 0.01811538353824268
**** Episode  1150 **** 
Recent

**** Episode  1600 **** 
Recent average reward: 1.6
Reward over last 100: 1.44
Average over all episodes so far: 1.530625
epsilon: 0.011083548672706223
**** Episode  1610 **** 
Recent average reward: 2.8
Reward over last 100: 1.6
Average over all episodes so far: 1.5385093167701864
epsilon: 0.011030229365833586
**** Episode  1620 **** 
Recent average reward: 1.6
Reward over last 100: 1.62
Average over all episodes so far: 1.538888888888889
epsilon: 0.010983990991182812
**** Episode  1630 **** 
Recent average reward: 1.8
Reward over last 100: 1.66
Average over all episodes so far: 1.5404907975460123
epsilon: 0.010936862701583434
**** Episode  1640 **** 
Recent average reward: 2.4
Reward over last 100: 1.77
Average over all episodes so far: 1.545731707317073
epsilon: 0.010893437820915385
**** Episode  1650 **** 
Recent average reward: 1.6
Reward over last 100: 1.74
Average over all episodes so far: 1.5460606060606061
epsilon: 0.010854483114522799
**** Episode  1660 **** 
Recent average r

**** Episode  2110 **** 
Recent average reward: 1.4
Reward over last 100: 1.83
Average over all episodes so far: 1.5876777251184835
epsilon: 0.010108158391609341
**** Episode  2120 **** 
Recent average reward: 1.4
Reward over last 100: 1.75
Average over all episodes so far: 1.5867924528301887
epsilon: 0.01010377620875429
**** Episode  2130 **** 
Recent average reward: 1.7
Reward over last 100: 1.72
Average over all episodes so far: 1.587323943661972
epsilon: 0.010099309053658967
**** Episode  2140 **** 
Recent average reward: 2.3
Reward over last 100: 1.76
Average over all episodes so far: 1.5906542056074766
epsilon: 0.010094325988140477
**** Episode  2150 **** 
Recent average reward: 0.9
Reward over last 100: 1.69
Average over all episodes so far: 1.5874418604651164
epsilon: 0.010090533209727524
**** Episode  2160 **** 
Recent average reward: 1.8
Reward over last 100: 1.65
Average over all episodes so far: 1.588425925925926
epsilon: 0.010086889460489258
**** Episode  2170 **** 
Recent

**** Episode  2620 **** 
Recent average reward: 2.1
Reward over last 100: 1.86
Average over all episodes so far: 1.5950381679389314
epsilon: 0.010011361117065716
**** Episode  2630 **** 
Recent average reward: 2.2
Reward over last 100: 1.88
Average over all episodes so far: 1.5973384030418252
epsilon: 0.010010835380387988
**** Episode  2640 **** 
Recent average reward: 0.8
Reward over last 100: 1.79
Average over all episodes so far: 1.5943181818181817
epsilon: 0.01001044075336979
**** Episode  2650 **** 
Recent average reward: 2.3
Reward over last 100: 1.82
Average over all episodes so far: 1.5969811320754717
epsilon: 0.01000992857280063
**** Episode  2660 **** 
Recent average reward: 2.0
Reward over last 100: 1.86
Average over all episodes so far: 1.5984962406015037
epsilon: 0.010009479359414828
**** Episode  2670 **** 
Recent average reward: 1.8
Reward over last 100: 1.89
Average over all episodes so far: 1.599250936329588
epsilon: 0.010009078388888735
**** Episode  2680 **** 
Recent

**** Episode  3130 **** 
Recent average reward: 1.6
Reward over last 100: 1.81
Average over all episodes so far: 1.629073482428115
epsilon: 0.010001128463239072
**** Episode  3140 **** 
Recent average reward: 1.7
Reward over last 100: 1.81
Average over all episodes so far: 1.629299363057325
epsilon: 0.01000107837657798
**** Episode  3150 **** 
Recent average reward: 1.7
Reward over last 100: 1.77
Average over all episodes so far: 1.6295238095238096
epsilon: 0.01000103053361583
**** Episode  3160 **** 
Recent average reward: 1.6
Reward over last 100: 1.64
Average over all episodes so far: 1.6294303797468355
epsilon: 0.010000986469118326
**** Episode  3170 **** 
Recent average reward: 1.2
Reward over last 100: 1.59
Average over all episodes so far: 1.6280757097791798
epsilon: 0.010000946311712595
**** Episode  3180 **** 
Recent average reward: 1.5
Reward over last 100: 1.54
Average over all episodes so far: 1.6276729559748428
epsilon: 0.010000905142165843
**** Episode  3190 **** 
Recent 

**** Episode  3640 **** 
Recent average reward: 1.7
Reward over last 100: 2.01
Average over all episodes so far: 1.658791208791209
epsilon: 0.010000110162001214
**** Episode  3650 **** 
Recent average reward: 2.6
Reward over last 100: 2.04
Average over all episodes so far: 1.6613698630136986
epsilon: 0.01000010473276605
**** Episode  3660 **** 
Recent average reward: 1.7
Reward over last 100: 2.0
Average over all episodes so far: 1.6614754098360656
epsilon: 0.01000009992221374
**** Episode  3670 **** 
Recent average reward: 2.9
Reward over last 100: 2.12
Average over all episodes so far: 1.664850136239782
epsilon: 0.010000094957746663
**** Episode  3680 **** 
Recent average reward: 2.1
Reward over last 100: 2.18
Average over all episodes so far: 1.6660326086956523
epsilon: 0.010000090755767124
**** Episode  3690 **** 
Recent average reward: 2.2
Reward over last 100: 2.21
Average over all episodes so far: 1.667479674796748
epsilon: 0.010000086409015868
**** Episode  3700 **** 
Recent av

**** Episode  4150 **** 
Recent average reward: 1.6
Reward over last 100: 1.9
Average over all episodes so far: 1.676867469879518
epsilon: 0.010000010658013182
**** Episode  4160 **** 
Recent average reward: 2.3
Reward over last 100: 1.99
Average over all episodes so far: 1.6783653846153845
epsilon: 0.010000010133755913
**** Episode  4170 **** 
Recent average reward: 2.5
Reward over last 100: 2.01
Average over all episodes so far: 1.6803357314148681
epsilon: 0.010000009650329123
**** Episode  4180 **** 
Recent average reward: 1.5
Reward over last 100: 1.97
Average over all episodes so far: 1.6799043062200958
epsilon: 0.010000009222185243
**** Episode  4190 **** 
Recent average reward: 2.6
Reward over last 100: 1.99
Average over all episodes so far: 1.6821002386634845
epsilon: 0.010000008798418775
**** Episode  4200 **** 
Recent average reward: 1.0
Reward over last 100: 1.94
Average over all episodes so far: 1.6804761904761905
epsilon: 0.010000008450047145
**** Episode  4210 **** 
Recen

**** Episode  4660 **** 
Recent average reward: 1.4
Reward over last 100: 1.24
Average over all episodes so far: 1.6688841201716738
epsilon: 0.010000001096882451
**** Episode  4670 **** 
Recent average reward: 1.1
Reward over last 100: 1.23
Average over all episodes so far: 1.6676659528907922
epsilon: 0.010000001051599166
**** Episode  4680 **** 
Recent average reward: 1.7
Reward over last 100: 1.27
Average over all episodes so far: 1.6677350427350428
epsilon: 0.010000000999712062
**** Episode  4690 **** 
Recent average reward: 2.1
Reward over last 100: 1.37
Average over all episodes so far: 1.6686567164179105
epsilon: 0.010000000958727898
**** Episode  4700 **** 
Recent average reward: 2.1
Reward over last 100: 1.42
Average over all episodes so far: 1.6695744680851063
epsilon: 0.0100000009147285
**** Episode  4710 **** 
Recent average reward: 1.1
Reward over last 100: 1.47
Average over all episodes so far: 1.6683651804670914
epsilon: 0.010000000875773329
**** Episode  4720 **** 
Recen

**** Episode  5170 **** 
Recent average reward: 1.4
Reward over last 100: 1.67
Average over all episodes so far: 1.6736943907156674
epsilon: 0.01000000010794559
**** Episode  5180 **** 
Recent average reward: 1.9
Reward over last 100: 1.72
Average over all episodes so far: 1.6741312741312742
epsilon: 0.010000000102898936
**** Episode  5190 **** 
Recent average reward: 0.9
Reward over last 100: 1.61
Average over all episodes so far: 1.6726396917148363
epsilon: 0.010000000098971042
**** Episode  5200 **** 
Recent average reward: 1.9
Reward over last 100: 1.61
Average over all episodes so far: 1.6730769230769231
epsilon: 0.010000000094151699
**** Episode  5210 **** 
Recent average reward: 1.2
Reward over last 100: 1.63
Average over all episodes so far: 1.672168905950096
epsilon: 0.010000000090429208
**** Episode  5220 **** 
Recent average reward: 1.2
Reward over last 100: 1.61
Average over all episodes so far: 1.671264367816092
epsilon: 0.010000000086810476
**** Episode  5230 **** 
Recent

**** Episode  5680 **** 
Recent average reward: 1.6
Reward over last 100: 1.66
Average over all episodes so far: 1.6735915492957747
epsilon: 0.010000000011050187
**** Episode  5690 **** 
Recent average reward: 1.5
Reward over last 100: 1.68
Average over all episodes so far: 1.673286467486819
epsilon: 0.01000000001058764
**** Episode  5700 **** 
Recent average reward: 2.0
Reward over last 100: 1.69
Average over all episodes so far: 1.673859649122807
epsilon: 0.01000000001012763
**** Episode  5710 **** 
Recent average reward: 1.9
Reward over last 100: 1.67
Average over all episodes so far: 1.6742556917688267
epsilon: 0.010000000009690318
**** Episode  5720 **** 
Recent average reward: 1.2
Reward over last 100: 1.62
Average over all episodes so far: 1.6734265734265734
epsilon: 0.010000000009279869
**** Episode  5730 **** 
Recent average reward: 1.5
Reward over last 100: 1.57
Average over all episodes so far: 1.6731239092495638
epsilon: 0.010000000008894627
**** Episode  5740 **** 
Recent 

**** Episode  6190 **** 
Recent average reward: 2.3
Reward over last 100: 1.94
Average over all episodes so far: 1.6810985460420032
epsilon: 0.010000000001110742
**** Episode  6200 **** 
Recent average reward: 0.7
Reward over last 100: 1.83
Average over all episodes so far: 1.679516129032258
epsilon: 0.010000000001071294
**** Episode  6210 **** 
Recent average reward: 1.5
Reward over last 100: 1.84
Average over all episodes so far: 1.6792270531400966
epsilon: 0.010000000001024831
**** Episode  6220 **** 
Recent average reward: 1.4
Reward over last 100: 1.84
Average over all episodes so far: 1.6787781350482316
epsilon: 0.010000000000982896
**** Episode  6230 **** 
Recent average reward: 2.2
Reward over last 100: 1.83
Average over all episodes so far: 1.6796147672552166
epsilon: 0.010000000000938725
**** Episode  6240 **** 
Recent average reward: 1.5
Reward over last 100: 1.75
Average over all episodes so far: 1.679326923076923
epsilon: 0.010000000000899269
**** Episode  6250 **** 
Recen

**** Episode  6700 **** 
Recent average reward: 1.7
Reward over last 100: 1.77
Average over all episodes so far: 1.683134328358209
epsilon: 0.01000000000011343
**** Episode  6710 **** 
Recent average reward: 2.1
Reward over last 100: 1.77
Average over all episodes so far: 1.6837555886736215
epsilon: 0.010000000000107907
**** Episode  6720 **** 
Recent average reward: 2.0
Reward over last 100: 1.75
Average over all episodes so far: 1.6842261904761904
epsilon: 0.010000000000102987
**** Episode  6730 **** 
Recent average reward: 1.7
Reward over last 100: 1.77
Average over all episodes so far: 1.6842496285289748
epsilon: 0.01000000000009819
**** Episode  6740 **** 
Recent average reward: 1.8
Reward over last 100: 1.79
Average over all episodes so far: 1.6844213649851631
epsilon: 0.010000000000093878
**** Episode  6750 **** 
Recent average reward: 2.1
Reward over last 100: 1.88
Average over all episodes so far: 1.685037037037037
epsilon: 0.010000000000089345
**** Episode  6760 **** 
Recent 

**** Episode  7210 **** 
Recent average reward: 2.3
Reward over last 100: 1.73
Average over all episodes so far: 1.6915395284327324
epsilon: 0.010000000000010858
**** Episode  7220 **** 
Recent average reward: 1.3
Reward over last 100: 1.69
Average over all episodes so far: 1.6909972299168976
epsilon: 0.010000000000010346
**** Episode  7230 **** 
Recent average reward: 3.0
Reward over last 100: 1.84
Average over all episodes so far: 1.692807745504841
epsilon: 0.010000000000009841
**** Episode  7240 **** 
Recent average reward: 2.5
Reward over last 100: 1.92
Average over all episodes so far: 1.6939226519337016
epsilon: 0.010000000000009352
**** Episode  7250 **** 
Recent average reward: 2.5
Reward over last 100: 2.01
Average over all episodes so far: 1.6950344827586208
epsilon: 0.010000000000008903
**** Episode  7260 **** 
Recent average reward: 1.4
Reward over last 100: 2.02
Average over all episodes so far: 1.6946280991735536
epsilon: 0.010000000000008486
**** Episode  7270 **** 
Rece

**** Episode  7720 **** 
Recent average reward: 2.6
Reward over last 100: 1.89
Average over all episodes so far: 1.7064766839378238
epsilon: 0.010000000000001
**** Episode  7730 **** 
Recent average reward: 1.2
Reward over last 100: 1.8
Average over all episodes so far: 1.7058214747736093
epsilon: 0.010000000000000958
**** Episode  7740 **** 
Recent average reward: 1.5
Reward over last 100: 1.7
Average over all episodes so far: 1.7055555555555555
epsilon: 0.010000000000000916
**** Episode  7750 **** 
Recent average reward: 2.3
Reward over last 100: 1.76
Average over all episodes so far: 1.7063225806451612
epsilon: 0.010000000000000873
**** Episode  7760 **** 
Recent average reward: 2.3
Reward over last 100: 1.85
Average over all episodes so far: 1.7070876288659793
epsilon: 0.010000000000000831
**** Episode  7770 **** 
Recent average reward: 0.9
Reward over last 100: 1.82
Average over all episodes so far: 1.7060489060489061
epsilon: 0.010000000000000798
**** Episode  7780 **** 
Recent a

**** Episode  8230 **** 
Recent average reward: 1.7
Reward over last 100: 1.74
Average over all episodes so far: 1.711543134872418
epsilon: 0.010000000000000096
**** Episode  8240 **** 
Recent average reward: 1.4
Reward over last 100: 1.68
Average over all episodes so far: 1.7111650485436893
epsilon: 0.010000000000000092
**** Episode  8250 **** 
Recent average reward: 0.8
Reward over last 100: 1.59
Average over all episodes so far: 1.710060606060606
epsilon: 0.010000000000000089
**** Episode  8260 **** 
Recent average reward: 1.7
Reward over last 100: 1.62
Average over all episodes so far: 1.710048426150121
epsilon: 0.010000000000000085
**** Episode  8270 **** 
Recent average reward: 1.3
Reward over last 100: 1.6
Average over all episodes so far: 1.709552599758162
epsilon: 0.01000000000000008
**** Episode  8280 **** 
Recent average reward: 1.5
Reward over last 100: 1.6
Average over all episodes so far: 1.7092995169082126
epsilon: 0.010000000000000077
**** Episode  8290 **** 
Recent ave

**** Episode  8740 **** 
Recent average reward: 1.8
Reward over last 100: 1.82
Average over all episodes so far: 1.7132723112128145
epsilon: 0.010000000000000009
**** Episode  8750 **** 
Recent average reward: 1.2
Reward over last 100: 1.79
Average over all episodes so far: 1.7126857142857144
epsilon: 0.010000000000000009
**** Episode  8760 **** 
Recent average reward: 1.4
Reward over last 100: 1.68
Average over all episodes so far: 1.7123287671232876
epsilon: 0.010000000000000009
**** Episode  8770 **** 
Recent average reward: 1.9
Reward over last 100: 1.67
Average over all episodes so far: 1.7125427594070695
epsilon: 0.010000000000000009
**** Episode  8780 **** 
Recent average reward: 1.7
Reward over last 100: 1.67
Average over all episodes so far: 1.7125284738041002
epsilon: 0.010000000000000009
**** Episode  8790 **** 
Recent average reward: 1.5
Reward over last 100: 1.67
Average over all episodes so far: 1.7122866894197952
epsilon: 0.010000000000000007
**** Episode  8800 **** 
Rec

**** Episode  9250 **** 
Recent average reward: 1.7
Reward over last 100: 1.71
Average over all episodes so far: 1.714054054054054
epsilon: 0.010000000000000002
**** Episode  9260 **** 
Recent average reward: 2.3
Reward over last 100: 1.77
Average over all episodes so far: 1.7146868250539957
epsilon: 0.010000000000000002
**** Episode  9270 **** 
Recent average reward: 1.7
Reward over last 100: 1.76
Average over all episodes so far: 1.7146709816612729
epsilon: 0.01
**** Episode  9280 **** 
Recent average reward: 1.5
Reward over last 100: 1.73
Average over all episodes so far: 1.7144396551724137
epsilon: 0.01
**** Episode  9290 **** 
Recent average reward: 2.6
Reward over last 100: 1.83
Average over all episodes so far: 1.7153928955866524
epsilon: 0.01
**** Episode  9300 **** 
Recent average reward: 1.3
Reward over last 100: 1.77
Average over all episodes so far: 1.7149462365591397
epsilon: 0.01
**** Episode  9310 **** 
Recent average reward: 1.6
Reward over last 100: 1.8
Average over al

**** Episode  9820 **** 
Recent average reward: 1.8
Reward over last 100: 1.57
Average over all episodes so far: 1.7130346232179225
epsilon: 0.01
**** Episode  9830 **** 
Recent average reward: 1.9
Reward over last 100: 1.62
Average over all episodes so far: 1.7132248219735504
epsilon: 0.01
**** Episode  9840 **** 
Recent average reward: 2.1
Reward over last 100: 1.67
Average over all episodes so far: 1.7136178861788618
epsilon: 0.01
**** Episode  9850 **** 
Recent average reward: 2.4
Reward over last 100: 1.81
Average over all episodes so far: 1.7143147208121827
epsilon: 0.01
**** Episode  9860 **** 
Recent average reward: 2.4
Reward over last 100: 1.86
Average over all episodes so far: 1.7150101419878296
epsilon: 0.01
**** Episode  9870 **** 
Recent average reward: 1.6
Reward over last 100: 1.87
Average over all episodes so far: 1.7148936170212765
epsilon: 0.01
**** Episode  9880 **** 
Recent average reward: 1.5
Reward over last 100: 1.85
Average over all episodes so far: 1.714676113

**** Episode  10390 **** 
Recent average reward: 1.9
Reward over last 100: 1.73
Average over all episodes so far: 1.7193455245428297
epsilon: 0.01
**** Episode  10400 **** 
Recent average reward: 1.7
Reward over last 100: 1.73
Average over all episodes so far: 1.719326923076923
epsilon: 0.01
**** Episode  10410 **** 
Recent average reward: 1.9
Reward over last 100: 1.7
Average over all episodes so far: 1.7195004803073968
epsilon: 0.01
**** Episode  10420 **** 
Recent average reward: 2.0
Reward over last 100: 1.77
Average over all episodes so far: 1.7197696737044146
epsilon: 0.01
**** Episode  10430 **** 
Recent average reward: 1.3
Reward over last 100: 1.71
Average over all episodes so far: 1.7193672099712367
epsilon: 0.01
**** Episode  10440 **** 
Recent average reward: 2.1
Reward over last 100: 1.75
Average over all episodes so far: 1.7197318007662836
epsilon: 0.01
**** Episode  10450 **** 
Recent average reward: 1.7
Reward over last 100: 1.8
Average over all episodes so far: 1.71971

**** Episode  10950 **** 
Recent average reward: 1.5
Reward over last 100: 1.78
Average over all episodes so far: 1.72337899543379
epsilon: 0.01
**** Episode  10960 **** 
Recent average reward: 1.9
Reward over last 100: 1.82
Average over all episodes so far: 1.7235401459854014
epsilon: 0.01
**** Episode  10970 **** 
Recent average reward: 2.4
Reward over last 100: 1.95
Average over all episodes so far: 1.7241567912488605
epsilon: 0.01
**** Episode  10980 **** 
Recent average reward: 1.8
Reward over last 100: 1.93
Average over all episodes so far: 1.7242258652094717
epsilon: 0.01
**** Episode  10990 **** 
Recent average reward: 1.1
Reward over last 100: 1.87
Average over all episodes so far: 1.7236578707916288
epsilon: 0.01
**** Episode  11000 **** 
Recent average reward: 1.3
Reward over last 100: 1.76
Average over all episodes so far: 1.7232727272727273
epsilon: 0.01
**** Episode  11010 **** 
Recent average reward: 2.3
Reward over last 100: 1.8
Average over all episodes so far: 1.72379

**** Episode  11510 **** 
Recent average reward: 2.2
Reward over last 100: 1.83
Average over all episodes so far: 1.7229365768896612
epsilon: 0.01
**** Episode  11520 **** 
Recent average reward: 1.9
Reward over last 100: 1.85
Average over all episodes so far: 1.7230902777777777
epsilon: 0.01
**** Episode  11530 **** 
Recent average reward: 1.4
Reward over last 100: 1.75
Average over all episodes so far: 1.7228100607111883
epsilon: 0.01
**** Episode  11540 **** 
Recent average reward: 2.7
Reward over last 100: 1.85
Average over all episodes so far: 1.7236568457538994
epsilon: 0.01
**** Episode  11550 **** 
Recent average reward: 1.6
Reward over last 100: 1.8
Average over all episodes so far: 1.7235497835497835
epsilon: 0.01
**** Episode  11560 **** 
Recent average reward: 1.9
Reward over last 100: 1.78
Average over all episodes so far: 1.7237024221453288
epsilon: 0.01
**** Episode  11570 **** 
Recent average reward: 1.7
Reward over last 100: 1.76
Average over all episodes so far: 1.723

**** Episode  12070 **** 
Recent average reward: 2.1
Reward over last 100: 1.56
Average over all episodes so far: 1.7300745650372824
epsilon: 0.01
**** Episode  12080 **** 
Recent average reward: 1.2
Reward over last 100: 1.55
Average over all episodes so far: 1.729635761589404
epsilon: 0.01
**** Episode  12090 **** 
Recent average reward: 2.4
Reward over last 100: 1.64
Average over all episodes so far: 1.7301902398676592
epsilon: 0.01
**** Episode  12100 **** 
Recent average reward: 1.3
Reward over last 100: 1.65
Average over all episodes so far: 1.7298347107438017
epsilon: 0.01
**** Episode  12110 **** 
Recent average reward: 1.8
Reward over last 100: 1.65
Average over all episodes so far: 1.7298926507018992
epsilon: 0.01
**** Episode  12120 **** 
Recent average reward: 1.7
Reward over last 100: 1.65
Average over all episodes so far: 1.72986798679868
epsilon: 0.01
**** Episode  12130 **** 
Recent average reward: 1.3
Reward over last 100: 1.71
Average over all episodes so far: 1.72951

**** Episode  12630 **** 
Recent average reward: 1.8
Reward over last 100: 1.84
Average over all episodes so far: 1.7322248614410134
epsilon: 0.01
**** Episode  12640 **** 
Recent average reward: 1.6
Reward over last 100: 1.73
Average over all episodes so far: 1.732120253164557
epsilon: 0.01
**** Episode  12650 **** 
Recent average reward: 2.2
Reward over last 100: 1.73
Average over all episodes so far: 1.7324901185770751
epsilon: 0.01
**** Episode  12660 **** 
Recent average reward: 2.0
Reward over last 100: 1.79
Average over all episodes so far: 1.7327014218009478
epsilon: 0.01
**** Episode  12670 **** 
Recent average reward: 1.5
Reward over last 100: 1.74
Average over all episodes so far: 1.7325177584846092
epsilon: 0.01
**** Episode  12680 **** 
Recent average reward: 2.0
Reward over last 100: 1.82
Average over all episodes so far: 1.7327287066246058
epsilon: 0.01
**** Episode  12690 **** 
Recent average reward: 3.2
Reward over last 100: 1.95
Average over all episodes so far: 1.733

**** Episode  13200 **** 
Recent average reward: 1.9
Reward over last 100: 2.05
Average over all episodes so far: 1.738409090909091
epsilon: 0.01
**** Episode  13210 **** 
Recent average reward: 2.2
Reward over last 100: 1.97
Average over all episodes so far: 1.7387585162755488
epsilon: 0.01
**** Episode  13220 **** 
Recent average reward: 2.1
Reward over last 100: 1.91
Average over all episodes so far: 1.7390317700453857
epsilon: 0.01
**** Episode  13230 **** 
Recent average reward: 1.8
Reward over last 100: 1.96
Average over all episodes so far: 1.7390778533635676
epsilon: 0.01
**** Episode  13240 **** 
Recent average reward: 1.6
Reward over last 100: 1.86
Average over all episodes so far: 1.7389728096676738
epsilon: 0.01
**** Episode  13250 **** 
Recent average reward: 1.9
Reward over last 100: 1.79
Average over all episodes so far: 1.7390943396226415
epsilon: 0.01
**** Episode  13260 **** 
Recent average reward: 1.5
Reward over last 100: 1.78
Average over all episodes so far: 1.738

**** Episode  13760 **** 
Recent average reward: 2.0
Reward over last 100: 1.83
Average over all episodes so far: 1.742514534883721
epsilon: 0.01
**** Episode  13770 **** 
Recent average reward: 1.6
Reward over last 100: 1.76
Average over all episodes so far: 1.7424110384894698
epsilon: 0.01
**** Episode  13780 **** 
Recent average reward: 1.6
Reward over last 100: 1.79
Average over all episodes so far: 1.7423076923076923
epsilon: 0.01
**** Episode  13790 **** 
Recent average reward: 2.2
Reward over last 100: 1.86
Average over all episodes so far: 1.7426395939086294
epsilon: 0.01
**** Episode  13800 **** 
Recent average reward: 1.8
Reward over last 100: 1.84
Average over all episodes so far: 1.7426811594202898
epsilon: 0.01
**** Episode  13810 **** 
Recent average reward: 1.7
Reward over last 100: 1.81
Average over all episodes so far: 1.7426502534395365
epsilon: 0.01
**** Episode  13820 **** 
Recent average reward: 0.8
Reward over last 100: 1.64
Average over all episodes so far: 1.741

**** Episode  14320 **** 
Recent average reward: 1.0
Reward over last 100: 2.05
Average over all episodes so far: 1.7460195530726257
epsilon: 0.01
**** Episode  14330 **** 
Recent average reward: 1.6
Reward over last 100: 1.98
Average over all episodes so far: 1.745917655268667
epsilon: 0.01
**** Episode  14340 **** 
Recent average reward: 2.0
Reward over last 100: 1.95
Average over all episodes so far: 1.746094839609484
epsilon: 0.01
**** Episode  14350 **** 
Recent average reward: 1.8
Reward over last 100: 1.87
Average over all episodes so far: 1.7461324041811848
epsilon: 0.01
**** Episode  14360 **** 
Recent average reward: 1.1
Reward over last 100: 1.68
Average over all episodes so far: 1.7456824512534819
epsilon: 0.01
**** Episode  14370 **** 
Recent average reward: 2.7
Reward over last 100: 1.74
Average over all episodes so far: 1.7463465553235908
epsilon: 0.01
**** Episode  14380 **** 
Recent average reward: 1.8
Reward over last 100: 1.81
Average over all episodes so far: 1.7463

**** Episode  14880 **** 
Recent average reward: 1.4
Reward over last 100: 1.89
Average over all episodes so far: 1.7498655913978494
epsilon: 0.01
**** Episode  14890 **** 
Recent average reward: 2.1
Reward over last 100: 1.95
Average over all episodes so far: 1.7501007387508394
epsilon: 0.01
**** Episode  14900 **** 
Recent average reward: 2.4
Reward over last 100: 2.04
Average over all episodes so far: 1.7505369127516779
epsilon: 0.01
**** Episode  14910 **** 
Recent average reward: 1.6
Reward over last 100: 2.05
Average over all episodes so far: 1.7504359490274983
epsilon: 0.01
**** Episode  14920 **** 
Recent average reward: 1.5
Reward over last 100: 2.06
Average over all episodes so far: 1.7502680965147452
epsilon: 0.01
**** Episode  14930 **** 
Recent average reward: 1.6
Reward over last 100: 1.97
Average over all episodes so far: 1.7501674480910918
epsilon: 0.01
**** Episode  14940 **** 
Recent average reward: 1.0
Reward over last 100: 1.87
Average over all episodes so far: 1.74

**** Episode  15440 **** 
Recent average reward: 1.6
Reward over last 100: 1.82
Average over all episodes so far: 1.75
epsilon: 0.01
**** Episode  15450 **** 
Recent average reward: 1.9
Reward over last 100: 1.83
Average over all episodes so far: 1.7500970873786408
epsilon: 0.01
**** Episode  15460 **** 
Recent average reward: 2.4
Reward over last 100: 1.9
Average over all episodes so far: 1.7505174644243209
epsilon: 0.01
**** Episode  15470 **** 
Recent average reward: 1.3
Reward over last 100: 1.91
Average over all episodes so far: 1.7502262443438914
epsilon: 0.01
**** Episode  15480 **** 
Recent average reward: 2.3
Reward over last 100: 1.94
Average over all episodes so far: 1.7505813953488372
epsilon: 0.01
**** Episode  15490 **** 
Recent average reward: 1.4
Reward over last 100: 1.92
Average over all episodes so far: 1.750355067785668
epsilon: 0.01
**** Episode  15500 **** 
Recent average reward: 1.8
Reward over last 100: 1.92
Average over all episodes so far: 1.7503870967741935
e

**** Episode  16000 **** 
Recent average reward: 1.7
Reward over last 100: 1.86
Average over all episodes so far: 1.7511875
epsilon: 0.01
**** Episode  16010 **** 
Recent average reward: 2.9
Reward over last 100: 1.97
Average over all episodes so far: 1.7519050593379137
epsilon: 0.01
**** Episode  16020 **** 
Recent average reward: 2.7
Reward over last 100: 2.08
Average over all episodes so far: 1.7524968789013733
epsilon: 0.01
**** Episode  16030 **** 
Recent average reward: 1.4
Reward over last 100: 2.09
Average over all episodes so far: 1.7522769806612601
epsilon: 0.01
**** Episode  16040 **** 
Recent average reward: 1.5
Reward over last 100: 1.99
Average over all episodes so far: 1.7521197007481297
epsilon: 0.01
**** Episode  16050 **** 
Recent average reward: 1.3
Reward over last 100: 1.97
Average over all episodes so far: 1.7518380062305297
epsilon: 0.01
**** Episode  16060 **** 
Recent average reward: 1.8
Reward over last 100: 1.93
Average over all episodes so far: 1.75186799501

**** Episode  16560 **** 
Recent average reward: 1.5
Reward over last 100: 1.81
Average over all episodes so far: 1.7506642512077295
epsilon: 0.01
**** Episode  16570 **** 
Recent average reward: 1.1
Reward over last 100: 1.72
Average over all episodes so far: 1.7502715751357876
epsilon: 0.01
**** Episode  16580 **** 
Recent average reward: 1.7
Reward over last 100: 1.7
Average over all episodes so far: 1.7502412545235224
epsilon: 0.01
**** Episode  16590 **** 
Recent average reward: 1.6
Reward over last 100: 1.65
Average over all episodes so far: 1.750150693188668
epsilon: 0.01
**** Episode  16600 **** 
Recent average reward: 1.8
Reward over last 100: 1.67
Average over all episodes so far: 1.7501807228915662
epsilon: 0.01
**** Episode  16610 **** 
Recent average reward: 1.6
Reward over last 100: 1.64
Average over all episodes so far: 1.7500903070439495
epsilon: 0.01
**** Episode  16620 **** 
Recent average reward: 2.3
Reward over last 100: 1.74
Average over all episodes so far: 1.7504

**** Episode  17120 **** 
Recent average reward: 1.5
Reward over last 100: 1.91
Average over all episodes so far: 1.7505257009345794
epsilon: 0.01
**** Episode  17130 **** 
Recent average reward: 1.9
Reward over last 100: 1.91
Average over all episodes so far: 1.75061295971979
epsilon: 0.01
**** Episode  17140 **** 
Recent average reward: 1.6
Reward over last 100: 1.93
Average over all episodes so far: 1.7505250875145857
epsilon: 0.01
**** Episode  17150 **** 
Recent average reward: 1.4
Reward over last 100: 1.77
Average over all episodes so far: 1.7503206997084548
epsilon: 0.01
**** Episode  17160 **** 
Recent average reward: 1.9
Reward over last 100: 1.69
Average over all episodes so far: 1.7504079254079254
epsilon: 0.01
**** Episode  17170 **** 
Recent average reward: 2.4
Reward over last 100: 1.81
Average over all episodes so far: 1.7507862550960978
epsilon: 0.01
**** Episode  17180 **** 
Recent average reward: 1.8
Reward over last 100: 1.8
Average over all episodes so far: 1.75081

**** Episode  17690 **** 
Recent average reward: 2.4
Reward over last 100: 1.95
Average over all episodes so far: 1.7521198417184851
epsilon: 0.01
**** Episode  17700 **** 
Recent average reward: 1.8
Reward over last 100: 1.91
Average over all episodes so far: 1.7521468926553672
epsilon: 0.01
**** Episode  17710 **** 
Recent average reward: 2.0
Reward over last 100: 1.92
Average over all episodes so far: 1.7522868435911914
epsilon: 0.01
**** Episode  17720 **** 
Recent average reward: 1.6
Reward over last 100: 1.94
Average over all episodes so far: 1.7522009029345373
epsilon: 0.01
**** Episode  17730 **** 
Recent average reward: 1.6
Reward over last 100: 1.88
Average over all episodes so far: 1.752115059221658
epsilon: 0.01
**** Episode  17740 **** 
Recent average reward: 1.2
Reward over last 100: 1.79
Average over all episodes so far: 1.751803833145434
epsilon: 0.01
**** Episode  17750 **** 
Recent average reward: 2.0
Reward over last 100: 1.79
Average over all episodes so far: 1.7519

**** Episode  18250 **** 
Recent average reward: 1.3
Reward over last 100: 1.84
Average over all episodes so far: 1.7543013698630137
epsilon: 0.01
**** Episode  18260 **** 
Recent average reward: 2.4
Reward over last 100: 1.84
Average over all episodes so far: 1.7546549835706462
epsilon: 0.01
**** Episode  18270 **** 
Recent average reward: 2.6
Reward over last 100: 1.94
Average over all episodes so far: 1.7551176792556102
epsilon: 0.01
**** Episode  18280 **** 
Recent average reward: 2.1
Reward over last 100: 1.97
Average over all episodes so far: 1.7553063457330416
epsilon: 0.01
**** Episode  18290 **** 
Recent average reward: 2.4
Reward over last 100: 2.06
Average over all episodes so far: 1.7556588299617277
epsilon: 0.01
**** Episode  18300 **** 
Recent average reward: 2.0
Reward over last 100: 2.07
Average over all episodes so far: 1.755792349726776
epsilon: 0.01
**** Episode  18310 **** 
Recent average reward: 1.8
Reward over last 100: 2.03
Average over all episodes so far: 1.755

**** Episode  18810 **** 
Recent average reward: 1.5
Reward over last 100: 1.65
Average over all episodes so far: 1.7575757575757576
epsilon: 0.01
**** Episode  18820 **** 
Recent average reward: 2.1
Reward over last 100: 1.58
Average over all episodes so far: 1.757757704569607
epsilon: 0.01
**** Episode  18830 **** 
Recent average reward: 1.7
Reward over last 100: 1.62
Average over all episodes so far: 1.7577270313329794
epsilon: 0.01
**** Episode  18840 **** 
Recent average reward: 2.2
Reward over last 100: 1.67
Average over all episodes so far: 1.7579617834394905
epsilon: 0.01
**** Episode  18850 **** 
Recent average reward: 2.0
Reward over last 100: 1.62
Average over all episodes so far: 1.7580901856763926
epsilon: 0.01
**** Episode  18860 **** 
Recent average reward: 1.0
Reward over last 100: 1.56
Average over all episodes so far: 1.7576882290562037
epsilon: 0.01
**** Episode  18870 **** 
Recent average reward: 1.8
Reward over last 100: 1.57
Average over all episodes so far: 1.757

**** Episode  19370 **** 
Recent average reward: 2.6
Reward over last 100: 1.77
Average over all episodes so far: 1.7590087764584408
epsilon: 0.01
**** Episode  19380 **** 
Recent average reward: 1.4
Reward over last 100: 1.7
Average over all episodes so far: 1.7588235294117647
epsilon: 0.01
**** Episode  19390 **** 
Recent average reward: 1.9
Reward over last 100: 1.71
Average over all episodes so far: 1.758896338318721
epsilon: 0.01
**** Episode  19400 **** 
Recent average reward: 2.0
Reward over last 100: 1.75
Average over all episodes so far: 1.759020618556701
epsilon: 0.01
**** Episode  19410 **** 
Recent average reward: 1.0
Reward over last 100: 1.72
Average over all episodes so far: 1.7586295723853684
epsilon: 0.01
**** Episode  19420 **** 
Recent average reward: 2.6
Reward over last 100: 1.8
Average over all episodes so far: 1.7590628218331616
epsilon: 0.01
**** Episode  19430 **** 
Recent average reward: 1.6
Reward over last 100: 1.78
Average over all episodes so far: 1.758980

**** Episode  19930 **** 
Recent average reward: 2.2
Reward over last 100: 1.75
Average over all episodes so far: 1.7601103863522327
epsilon: 0.01
**** Episode  19940 **** 
Recent average reward: 1.4
Reward over last 100: 1.69
Average over all episodes so far: 1.7599297893681043
epsilon: 0.01
**** Episode  19950 **** 
Recent average reward: 1.3
Reward over last 100: 1.64
Average over all episodes so far: 1.7596992481203007
epsilon: 0.01
**** Episode  19960 **** 
Recent average reward: 2.4
Reward over last 100: 1.71
Average over all episodes so far: 1.7600200400801602
epsilon: 0.01
**** Episode  19970 **** 
Recent average reward: 2.4
Reward over last 100: 1.86
Average over all episodes so far: 1.7603405107661492
epsilon: 0.01
**** Episode  19980 **** 
Recent average reward: 1.2
Reward over last 100: 1.82
Average over all episodes so far: 1.76006006006006
epsilon: 0.01
**** Episode  19990 **** 
Recent average reward: 1.6
Reward over last 100: 1.81
Average over all episodes so far: 1.7599

**** Episode  20490 **** 
Recent average reward: 2.7
Reward over last 100: 2.0
Average over all episodes so far: 1.762274280136652
epsilon: 0.01
**** Episode  20500 **** 
Recent average reward: 1.0
Reward over last 100: 1.91
Average over all episodes so far: 1.7619024390243903
epsilon: 0.01
**** Episode  20510 **** 
Recent average reward: 2.7
Reward over last 100: 2.06
Average over all episodes so far: 1.7623598244758654
epsilon: 0.01
**** Episode  20520 **** 
Recent average reward: 1.7
Reward over last 100: 2.03
Average over all episodes so far: 1.7623294346978557
epsilon: 0.01
**** Episode  20530 **** 
Recent average reward: 1.7
Reward over last 100: 1.99
Average over all episodes so far: 1.7622990745250853
epsilon: 0.01
**** Episode  20540 **** 
Recent average reward: 2.1
Reward over last 100: 1.91
Average over all episodes so far: 1.7624634858812074
epsilon: 0.01
**** Episode  20550 **** 
Recent average reward: 2.0
Reward over last 100: 1.97
Average over all episodes so far: 1.7625

**** Episode  21050 **** 
Recent average reward: 1.4
Reward over last 100: 1.85
Average over all episodes so far: 1.766270783847981
epsilon: 0.01
**** Episode  21060 **** 
Recent average reward: 1.7
Reward over last 100: 1.79
Average over all episodes so far: 1.7662393162393162
epsilon: 0.01
**** Episode  21070 **** 
Recent average reward: 1.5
Reward over last 100: 1.63
Average over all episodes so far: 1.7661129568106313
epsilon: 0.01
**** Episode  21080 **** 
Recent average reward: 2.2
Reward over last 100: 1.7
Average over all episodes so far: 1.7663187855787477
epsilon: 0.01
**** Episode  21090 **** 
Recent average reward: 2.5
Reward over last 100: 1.81
Average over all episodes so far: 1.7666666666666666
epsilon: 0.01
**** Episode  21100 **** 
Recent average reward: 2.4
Reward over last 100: 1.91
Average over all episodes so far: 1.7669668246445498
epsilon: 0.01
**** Episode  21110 **** 
Recent average reward: 2.0
Reward over last 100: 1.97
Average over all episodes so far: 1.7670

**** Episode  21610 **** 
Recent average reward: 1.7
Reward over last 100: 1.67
Average over all episodes so far: 1.767561314206386
epsilon: 0.01
**** Episode  21620 **** 
Recent average reward: 2.4
Reward over last 100: 1.73
Average over all episodes so far: 1.767853839037928
epsilon: 0.01
**** Episode  21630 **** 
Recent average reward: 1.5
Reward over last 100: 1.69
Average over all episodes so far: 1.7677300046232085
epsilon: 0.01
**** Episode  21640 **** 
Recent average reward: 2.1
Reward over last 100: 1.77
Average over all episodes so far: 1.7678835489833642
epsilon: 0.01
**** Episode  21650 **** 
Recent average reward: 1.3
Reward over last 100: 1.64
Average over all episodes so far: 1.7676674364896074
epsilon: 0.01
**** Episode  21660 **** 
Recent average reward: 1.3
Reward over last 100: 1.57
Average over all episodes so far: 1.7674515235457064
epsilon: 0.01
**** Episode  21670 **** 
Recent average reward: 2.3
Reward over last 100: 1.64
Average over all episodes so far: 1.7676

**** Episode  22170 **** 
Recent average reward: 1.2
Reward over last 100: 1.68
Average over all episodes so far: 1.767749210645016
epsilon: 0.01
**** Episode  22180 **** 
Recent average reward: 3.0
Reward over last 100: 1.83
Average over all episodes so far: 1.7683047790802524
epsilon: 0.01
**** Episode  22190 **** 
Recent average reward: 1.1
Reward over last 100: 1.8
Average over all episodes so far: 1.76800360522758
epsilon: 0.01
**** Episode  22200 **** 
Recent average reward: 2.3
Reward over last 100: 1.94
Average over all episodes so far: 1.7682432432432433
epsilon: 0.01
**** Episode  22210 **** 
Recent average reward: 2.1
Reward over last 100: 1.92
Average over all episodes so far: 1.7683926159387664
epsilon: 0.01
**** Episode  22220 **** 
Recent average reward: 1.8
Reward over last 100: 1.86
Average over all episodes so far: 1.7684068406840685
epsilon: 0.01
**** Episode  22230 **** 
Recent average reward: 3.3
Reward over last 100: 2.04
Average over all episodes so far: 1.769095

**** Episode  22730 **** 
Recent average reward: 1.8
Reward over last 100: 2.14
Average over all episodes so far: 1.7715794104707434
epsilon: 0.01
**** Episode  22740 **** 
Recent average reward: 1.9
Reward over last 100: 2.16
Average over all episodes so far: 1.7716358839050133
epsilon: 0.01
**** Episode  22750 **** 
Recent average reward: 1.0
Reward over last 100: 2.07
Average over all episodes so far: 1.7712967032967033
epsilon: 0.01
**** Episode  22760 **** 
Recent average reward: 2.2
Reward over last 100: 2.12
Average over all episodes so far: 1.7714850615114235
epsilon: 0.01
**** Episode  22770 **** 
Recent average reward: 1.7
Reward over last 100: 2.04
Average over all episodes so far: 1.771453667105841
epsilon: 0.01
**** Episode  22780 **** 
Recent average reward: 2.6
Reward over last 100: 2.03
Average over all episodes so far: 1.7718173836698858
epsilon: 0.01
**** Episode  22790 **** 
Recent average reward: 1.4
Reward over last 100: 1.95
Average over all episodes so far: 1.771

**** Episode  23290 **** 
Recent average reward: 1.2
Reward over last 100: 1.8
Average over all episodes so far: 1.7709746672391584
epsilon: 0.01
**** Episode  23300 **** 
Recent average reward: 1.0
Reward over last 100: 1.79
Average over all episodes so far: 1.7706437768240344
epsilon: 0.01
**** Episode  23310 **** 
Recent average reward: 2.2
Reward over last 100: 1.74
Average over all episodes so far: 1.7708279708279708
epsilon: 0.01
**** Episode  23320 **** 
Recent average reward: 1.3
Reward over last 100: 1.68
Average over all episodes so far: 1.7706260720411664
epsilon: 0.01
**** Episode  23330 **** 
Recent average reward: 1.8
Reward over last 100: 1.66
Average over all episodes so far: 1.7706386626660953
epsilon: 0.01
**** Episode  23340 **** 
Recent average reward: 1.9
Reward over last 100: 1.72
Average over all episodes so far: 1.770694087403599
epsilon: 0.01
**** Episode  23350 **** 
Recent average reward: 1.5
Reward over last 100: 1.71
Average over all episodes so far: 1.7705

**** Episode  23950 **** 
Recent average reward: 1.9
Reward over last 100: 1.71
Average over all episodes so far: 1.7702296450939456
epsilon: 0.01
**** Episode  23960 **** 
Recent average reward: 1.7
Reward over last 100: 1.73
Average over all episodes so far: 1.7702003338898165
epsilon: 0.01
**** Episode  23970 **** 
Recent average reward: 1.7
Reward over last 100: 1.67
Average over all episodes so far: 1.7701710471422611
epsilon: 0.01
**** Episode  23980 **** 
Recent average reward: 2.0
Reward over last 100: 1.71
Average over all episodes so far: 1.7702668890742286
epsilon: 0.01
**** Episode  23990 **** 
Recent average reward: 1.6
Reward over last 100: 1.62
Average over all episodes so far: 1.7701959149645685
epsilon: 0.01
**** Episode  24000 **** 
Recent average reward: 2.1
Reward over last 100: 1.65
Average over all episodes so far: 1.7703333333333333
epsilon: 0.01
**** Episode  24010 **** 
Recent average reward: 1.5
Reward over last 100: 1.71
Average over all episodes so far: 1.77

**** Episode  24510 **** 
Recent average reward: 2.8
Reward over last 100: 1.92
Average over all episodes so far: 1.7713178294573644
epsilon: 0.01
**** Episode  24520 **** 
Recent average reward: 1.3
Reward over last 100: 1.84
Average over all episodes so far: 1.7711256117455139
epsilon: 0.01
**** Episode  24530 **** 
Recent average reward: 1.6
Reward over last 100: 1.8
Average over all episodes so far: 1.7710558499796167
epsilon: 0.01
**** Episode  24540 **** 
Recent average reward: 2.1
Reward over last 100: 1.77
Average over all episodes so far: 1.7711898940505297
epsilon: 0.01
**** Episode  24550 **** 
Recent average reward: 2.0
Reward over last 100: 1.82
Average over all episodes so far: 1.7712830957230143
epsilon: 0.01
**** Episode  24560 **** 
Recent average reward: 2.2
Reward over last 100: 1.88
Average over all episodes so far: 1.771457654723127
epsilon: 0.01
**** Episode  24570 **** 
Recent average reward: 1.7
Reward over last 100: 1.86
Average over all episodes so far: 1.7714

**** Episode  25070 **** 
Recent average reward: 1.8
Reward over last 100: 1.87
Average over all episodes so far: 1.7735141603510172
epsilon: 0.01
**** Episode  25080 **** 
Recent average reward: 1.8
Reward over last 100: 1.88
Average over all episodes so far: 1.773524720893142
epsilon: 0.01
**** Episode  25090 **** 
Recent average reward: 0.8
Reward over last 100: 1.79
Average over all episodes so far: 1.7731367078517337
epsilon: 0.01
**** Episode  25100 **** 
Recent average reward: 1.9
Reward over last 100: 1.78
Average over all episodes so far: 1.773187250996016
epsilon: 0.01
**** Episode  25110 **** 
Recent average reward: 2.5
Reward over last 100: 1.84
Average over all episodes so far: 1.7734767025089606
epsilon: 0.01
**** Episode  25120 **** 
Recent average reward: 1.2
Reward over last 100: 1.74
Average over all episodes so far: 1.7732484076433122
epsilon: 0.01
**** Episode  25130 **** 
Recent average reward: 2.5
Reward over last 100: 1.88
Average over all episodes so far: 1.7735

**** Episode  25630 **** 
Recent average reward: 1.0
Reward over last 100: 1.73
Average over all episodes so far: 1.7750292625829107
epsilon: 0.01
**** Episode  25640 **** 
Recent average reward: 2.2
Reward over last 100: 1.74
Average over all episodes so far: 1.775195007800312
epsilon: 0.01
**** Episode  25650 **** 
Recent average reward: 1.6
Reward over last 100: 1.69
Average over all episodes so far: 1.7751267056530216
epsilon: 0.01
**** Episode  25660 **** 
Recent average reward: 1.0
Reward over last 100: 1.6
Average over all episodes so far: 1.7748246297739672
epsilon: 0.01
**** Episode  25670 **** 
Recent average reward: 2.1
Reward over last 100: 1.61
Average over all episodes so far: 1.7749513050253214
epsilon: 0.01
**** Episode  25680 **** 
Recent average reward: 1.8
Reward over last 100: 1.59
Average over all episodes so far: 1.774961059190031
epsilon: 0.01
**** Episode  25690 **** 
Recent average reward: 2.0
Reward over last 100: 1.6
Average over all episodes so far: 1.775048

**** Episode  26190 **** 
Recent average reward: 1.9
Reward over last 100: 1.87
Average over all episodes so far: 1.773577701412753
epsilon: 0.01
**** Episode  26200 **** 
Recent average reward: 1.8
Reward over last 100: 1.82
Average over all episodes so far: 1.773587786259542
epsilon: 0.01
**** Episode  26210 **** 
Recent average reward: 1.8
Reward over last 100: 1.83
Average over all episodes so far: 1.773597863410912
epsilon: 0.01
**** Episode  26220 **** 
Recent average reward: 1.9
Reward over last 100: 1.81
Average over all episodes so far: 1.7736460717009916
epsilon: 0.01
**** Episode  26230 **** 
Recent average reward: 1.8
Reward over last 100: 1.75
Average over all episodes so far: 1.7736561189477698
epsilon: 0.01
**** Episode  26240 **** 
Recent average reward: 1.7
Reward over last 100: 1.78
Average over all episodes so far: 1.7736280487804879
epsilon: 0.01
**** Episode  26250 **** 
Recent average reward: 2.0
Reward over last 100: 1.82
Average over all episodes so far: 1.77371

**** Episode  26750 **** 
Recent average reward: 1.3
Reward over last 100: 1.7
Average over all episodes so far: 1.7725981308411214
epsilon: 0.01
**** Episode  26760 **** 
Recent average reward: 2.2
Reward over last 100: 1.74
Average over all episodes so far: 1.7727578475336323
epsilon: 0.01
**** Episode  26770 **** 
Recent average reward: 2.2
Reward over last 100: 1.83
Average over all episodes so far: 1.7729174449010086
epsilon: 0.01
**** Episode  26780 **** 
Recent average reward: 1.4
Reward over last 100: 1.77
Average over all episodes so far: 1.7727781926811053
epsilon: 0.01
**** Episode  26790 **** 
Recent average reward: 1.1
Reward over last 100: 1.72
Average over all episodes so far: 1.7725270623366929
epsilon: 0.01
**** Episode  26800 **** 
Recent average reward: 0.8
Reward over last 100: 1.6
Average over all episodes so far: 1.7721641791044775
epsilon: 0.01
**** Episode  26810 **** 
Recent average reward: 1.6
Reward over last 100: 1.52
Average over all episodes so far: 1.7720

**** Episode  27310 **** 
Recent average reward: 1.6
Reward over last 100: 1.62
Average over all episodes so far: 1.771731966312706
epsilon: 0.01
**** Episode  27320 **** 
Recent average reward: 2.0
Reward over last 100: 1.7
Average over all episodes so far: 1.7718155197657395
epsilon: 0.01
**** Episode  27330 **** 
Recent average reward: 1.8
Reward over last 100: 1.76
Average over all episodes so far: 1.7718258324185876
epsilon: 0.01
**** Episode  27340 **** 
Recent average reward: 2.0
Reward over last 100: 1.78
Average over all episodes so far: 1.7719092904169715
epsilon: 0.01
**** Episode  27350 **** 
Recent average reward: 1.8
Reward over last 100: 1.76
Average over all episodes so far: 1.7719195612431444
epsilon: 0.01
**** Episode  27360 **** 
Recent average reward: 1.6
Reward over last 100: 1.69
Average over all episodes so far: 1.7718567251461987
epsilon: 0.01
**** Episode  27370 **** 
Recent average reward: 1.4
Reward over last 100: 1.68
Average over all episodes so far: 1.7717

**** Episode  27870 **** 
Recent average reward: 2.0
Reward over last 100: 1.85
Average over all episodes so far: 1.7730534625044851
epsilon: 0.01
**** Episode  27880 **** 
Recent average reward: 1.5
Reward over last 100: 1.81
Average over all episodes so far: 1.7729555236728838
epsilon: 0.01
**** Episode  27890 **** 
Recent average reward: 1.7
Reward over last 100: 1.77
Average over all episodes so far: 1.7729293653639298
epsilon: 0.01
**** Episode  27900 **** 
Recent average reward: 2.0
Reward over last 100: 1.77
Average over all episodes so far: 1.773010752688172
epsilon: 0.01
**** Episode  27910 **** 
Recent average reward: 1.8
Reward over last 100: 1.79
Average over all episodes so far: 1.7730204227875312
epsilon: 0.01
**** Episode  27920 **** 
Recent average reward: 1.3
Reward over last 100: 1.69
Average over all episodes so far: 1.7728510028653295
epsilon: 0.01
**** Episode  27930 **** 
Recent average reward: 1.5
Reward over last 100: 1.68
Average over all episodes so far: 1.772

**** Episode  28430 **** 
Recent average reward: 1.5
Reward over last 100: 1.85
Average over all episodes so far: 1.7739359831164263
epsilon: 0.01
**** Episode  28440 **** 
Recent average reward: 1.4
Reward over last 100: 1.78
Average over all episodes so far: 1.7738045007032348
epsilon: 0.01
**** Episode  28450 **** 
Recent average reward: 1.7
Reward over last 100: 1.77
Average over all episodes so far: 1.7737785588752197
epsilon: 0.01
**** Episode  28460 **** 
Recent average reward: 1.7
Reward over last 100: 1.7
Average over all episodes so far: 1.7737526352775825
epsilon: 0.01
**** Episode  28470 **** 
Recent average reward: 1.8
Reward over last 100: 1.8
Average over all episodes so far: 1.7737618545837723
epsilon: 0.01
**** Episode  28480 **** 
Recent average reward: 1.7
Reward over last 100: 1.74
Average over all episodes so far: 1.7737359550561798
epsilon: 0.01
**** Episode  28490 **** 
Recent average reward: 2.6
Reward over last 100: 1.82
Average over all episodes so far: 1.7740

**** Episode  28990 **** 
Recent average reward: 1.6
Reward over last 100: 1.63
Average over all episodes so far: 1.775232838909969
epsilon: 0.01
**** Episode  29000 **** 
Recent average reward: 2.1
Reward over last 100: 1.69
Average over all episodes so far: 1.775344827586207
epsilon: 0.01
**** Episode  29010 **** 
Recent average reward: 1.9
Reward over last 100: 1.7
Average over all episodes so far: 1.775387797311272
epsilon: 0.01
**** Episode  29020 **** 
Recent average reward: 2.3
Reward over last 100: 1.81
Average over all episodes so far: 1.7755685733976567
epsilon: 0.01
**** Episode  29030 **** 
Recent average reward: 2.1
Reward over last 100: 1.82
Average over all episodes so far: 1.7756803306923872
epsilon: 0.01
**** Episode  29040 **** 
Recent average reward: 1.1
Reward over last 100: 1.78
Average over all episodes so far: 1.7754476584022039
epsilon: 0.01
**** Episode  29050 **** 
Recent average reward: 1.2
Reward over last 100: 1.72
Average over all episodes so far: 1.775249

**** Episode  29550 **** 
Recent average reward: 1.7
Reward over last 100: 1.87
Average over all episodes so far: 1.7753299492385788
epsilon: 0.01
**** Episode  29560 **** 
Recent average reward: 1.4
Reward over last 100: 1.85
Average over all episodes so far: 1.7752029769959405
epsilon: 0.01
**** Episode  29570 **** 
Recent average reward: 1.7
Reward over last 100: 1.83
Average over all episodes so far: 1.775177544808928
epsilon: 0.01
**** Episode  29580 **** 
Recent average reward: 1.5
Reward over last 100: 1.78
Average over all episodes so far: 1.7750845165652467
epsilon: 0.01
**** Episode  29590 **** 
Recent average reward: 1.4
Reward over last 100: 1.7
Average over all episodes so far: 1.7749577559986482
epsilon: 0.01
**** Episode  29600 **** 
Recent average reward: 1.9
Reward over last 100: 1.73
Average over all episodes so far: 1.775
epsilon: 0.01
**** Episode  29610 **** 
Recent average reward: 1.6
Reward over last 100: 1.75
Average over all episodes so far: 1.7749408983451536


**** Episode  30120 **** 
Recent average reward: 1.7
Reward over last 100: 1.66
Average over all episodes so far: 1.774468791500664
epsilon: 0.01
**** Episode  30130 **** 
Recent average reward: 2.8
Reward over last 100: 1.79
Average over all episodes so far: 1.7748091603053435
epsilon: 0.01
**** Episode  30140 **** 
Recent average reward: 2.0
Reward over last 100: 1.84
Average over all episodes so far: 1.7748838752488387
epsilon: 0.01
**** Episode  30150 **** 
Recent average reward: 1.0
Reward over last 100: 1.87
Average over all episodes so far: 1.7746268656716417
epsilon: 0.01
**** Episode  30160 **** 
Recent average reward: 1.5
Reward over last 100: 1.81
Average over all episodes so far: 1.7745358090185677
epsilon: 0.01
**** Episode  30170 **** 
Recent average reward: 1.2
Reward over last 100: 1.75
Average over all episodes so far: 1.7743453762015247
epsilon: 0.01
**** Episode  30180 **** 
Recent average reward: 2.2
Reward over last 100: 1.81
Average over all episodes so far: 1.774

**** Episode  30680 **** 
Recent average reward: 1.8
Reward over last 100: 1.83
Average over all episodes so far: 1.776140808344198
epsilon: 0.01
**** Episode  30690 **** 
Recent average reward: 1.5
Reward over last 100: 1.8
Average over all episodes so far: 1.7760508308895406
epsilon: 0.01
**** Episode  30700 **** 
Recent average reward: 2.1
Reward over last 100: 1.8
Average over all episodes so far: 1.7761563517915309
epsilon: 0.01
**** Episode  30710 **** 
Recent average reward: 1.9
Reward over last 100: 1.9
Average over all episodes so far: 1.7761966786063172
epsilon: 0.01
**** Episode  30720 **** 
Recent average reward: 1.7
Reward over last 100: 1.8
Average over all episodes so far: 1.776171875
epsilon: 0.01
**** Episode  30730 **** 
Recent average reward: 1.6
Reward over last 100: 1.79
Average over all episodes so far: 1.7761145460462089
epsilon: 0.01
**** Episode  30740 **** 
Recent average reward: 1.3
Reward over last 100: 1.79
Average over all episodes so far: 1.77595966167859

**** Episode  31250 **** 
Recent average reward: 1.4
Reward over last 100: 1.84
Average over all episodes so far: 1.775104
epsilon: 0.01
**** Episode  31260 **** 
Recent average reward: 2.4
Reward over last 100: 1.86
Average over all episodes so far: 1.7753039027511197
epsilon: 0.01
**** Episode  31270 **** 
Recent average reward: 2.2
Reward over last 100: 1.92
Average over all episodes so far: 1.7754397185801087
epsilon: 0.01
**** Episode  31280 **** 
Recent average reward: 1.3
Reward over last 100: 1.84
Average over all episodes so far: 1.7752877237851663
epsilon: 0.01
**** Episode  31290 **** 
Recent average reward: 2.2
Reward over last 100: 1.88
Average over all episodes so far: 1.7754234579737935
epsilon: 0.01
**** Episode  31300 **** 
Recent average reward: 1.3
Reward over last 100: 1.79
Average over all episodes so far: 1.7752715654952076
epsilon: 0.01
**** Episode  31310 **** 
Recent average reward: 1.9
Reward over last 100: 1.75
Average over all episodes so far: 1.775311402107

**** Episode  31810 **** 
Recent average reward: 1.4
Reward over last 100: 1.49
Average over all episodes so far: 1.7733102797862308
epsilon: 0.01
**** Episode  31820 **** 
Recent average reward: 2.1
Reward over last 100: 1.56
Average over all episodes so far: 1.7734129478315526
epsilon: 0.01
**** Episode  31830 **** 
Recent average reward: 1.4
Reward over last 100: 1.57
Average over all episodes so far: 1.7732956330505811
epsilon: 0.01
**** Episode  31840 **** 
Recent average reward: 1.1
Reward over last 100: 1.57
Average over all episodes so far: 1.7730841708542713
epsilon: 0.01
**** Episode  31850 **** 
Recent average reward: 1.2
Reward over last 100: 1.51
Average over all episodes so far: 1.7729042386185243
epsilon: 0.01
**** Episode  31860 **** 
Recent average reward: 1.9
Reward over last 100: 1.6
Average over all episodes so far: 1.7729441305712492
epsilon: 0.01
**** Episode  31870 **** 
Recent average reward: 1.7
Reward over last 100: 1.62
Average over all episodes so far: 1.772

**** Episode  32370 **** 
Recent average reward: 1.9
Reward over last 100: 1.89
Average over all episodes so far: 1.773586654309546
epsilon: 0.01
**** Episode  32380 **** 
Recent average reward: 1.2
Reward over last 100: 1.82
Average over all episodes so far: 1.773409512044472
epsilon: 0.01
**** Episode  32390 **** 
Recent average reward: 2.0
Reward over last 100: 1.84
Average over all episodes so far: 1.773479468971905
epsilon: 0.01
**** Episode  32400 **** 
Recent average reward: 1.8
Reward over last 100: 1.94
Average over all episodes so far: 1.7734876543209876
epsilon: 0.01
**** Episode  32410 **** 
Recent average reward: 1.6
Reward over last 100: 1.85
Average over all episodes so far: 1.7734341252699783
epsilon: 0.01
**** Episode  32420 **** 
Recent average reward: 1.5
Reward over last 100: 1.82
Average over all episodes so far: 1.7733497840838988
epsilon: 0.01
**** Episode  32430 **** 
Recent average reward: 1.5
Reward over last 100: 1.76
Average over all episodes so far: 1.77326

**** Episode  32930 **** 
Recent average reward: 2.4
Reward over last 100: 1.71
Average over all episodes so far: 1.772699665958093
epsilon: 0.01
**** Episode  32940 **** 
Recent average reward: 2.2
Reward over last 100: 1.81
Average over all episodes so far: 1.772829386763813
epsilon: 0.01
**** Episode  32950 **** 
Recent average reward: 1.6
Reward over last 100: 1.84
Average over all episodes so far: 1.7727769347496207
epsilon: 0.01
**** Episode  32960 **** 
Recent average reward: 2.2
Reward over last 100: 1.86
Average over all episodes so far: 1.7729065533980584
epsilon: 0.01
**** Episode  32970 **** 
Recent average reward: 2.2
Reward over last 100: 1.88
Average over all episodes so far: 1.773036093418259
epsilon: 0.01
**** Episode  32980 **** 
Recent average reward: 2.4
Reward over last 100: 1.96
Average over all episodes so far: 1.773226197695573
epsilon: 0.01
**** Episode  32990 **** 
Recent average reward: 1.0
Reward over last 100: 1.94
Average over all episodes so far: 1.772991

**** Episode  33490 **** 
Recent average reward: 1.4
Reward over last 100: 1.67
Average over all episodes so far: 1.772738130785309
epsilon: 0.01
**** Episode  33500 **** 
Recent average reward: 1.5
Reward over last 100: 1.66
Average over all episodes so far: 1.7726567164179103
epsilon: 0.01
**** Episode  33510 **** 
Recent average reward: 2.7
Reward over last 100: 1.79
Average over all episodes so far: 1.7729334527006864
epsilon: 0.01
**** Episode  33520 **** 
Recent average reward: 1.8
Reward over last 100: 1.81
Average over all episodes so far: 1.7729415274463007
epsilon: 0.01
**** Episode  33530 **** 
Recent average reward: 2.1
Reward over last 100: 1.84
Average over all episodes so far: 1.773039069490009
epsilon: 0.01
**** Episode  33540 **** 
Recent average reward: 1.0
Reward over last 100: 1.8
Average over all episodes so far: 1.772808586762075
epsilon: 0.01
**** Episode  33550 **** 
Recent average reward: 1.4
Reward over last 100: 1.71
Average over all episodes so far: 1.772697

**** Episode  34050 **** 
Recent average reward: 2.1
Reward over last 100: 1.82
Average over all episodes so far: 1.7736270190895742
epsilon: 0.01
**** Episode  34060 **** 
Recent average reward: 1.6
Reward over last 100: 1.81
Average over all episodes so far: 1.7735760422783324
epsilon: 0.01
**** Episode  34070 **** 
Recent average reward: 1.3
Reward over last 100: 1.74
Average over all episodes so far: 1.773437041385383
epsilon: 0.01
**** Episode  34080 **** 
Recent average reward: 1.9
Reward over last 100: 1.68
Average over all episodes so far: 1.7734741784037558
epsilon: 0.01
**** Episode  34090 **** 
Recent average reward: 2.4
Reward over last 100: 1.72
Average over all episodes so far: 1.773657964212379
epsilon: 0.01
**** Episode  34100 **** 
Recent average reward: 1.6
Reward over last 100: 1.69
Average over all episodes so far: 1.7736070381231672
epsilon: 0.01
**** Episode  34110 **** 
Recent average reward: 1.1
Reward over last 100: 1.67
Average over all episodes so far: 1.7734

**** Episode  34610 **** 
Recent average reward: 1.6
Reward over last 100: 1.77
Average over all episodes so far: 1.7719156313204276
epsilon: 0.01
**** Episode  34620 **** 
Recent average reward: 1.3
Reward over last 100: 1.65
Average over all episodes so far: 1.7717793183131139
epsilon: 0.01
**** Episode  34630 **** 
Recent average reward: 2.1
Reward over last 100: 1.63
Average over all episodes so far: 1.771874097603234
epsilon: 0.01
**** Episode  34640 **** 
Recent average reward: 1.5
Reward over last 100: 1.6
Average over all episodes so far: 1.771795612009238
epsilon: 0.01
**** Episode  34650 **** 
Recent average reward: 1.3
Reward over last 100: 1.58
Average over all episodes so far: 1.7716594516594517
epsilon: 0.01
**** Episode  34660 **** 
Recent average reward: 1.2
Reward over last 100: 1.56
Average over all episodes so far: 1.7714945181765724
epsilon: 0.01
**** Episode  34670 **** 
Recent average reward: 1.5
Reward over last 100: 1.46
Average over all episodes so far: 1.77141

**** Episode  35170 **** 
Recent average reward: 2.0
Reward over last 100: 1.74
Average over all episodes so far: 1.7700597099800968
epsilon: 0.01
**** Episode  35180 **** 
Recent average reward: 1.4
Reward over last 100: 1.75
Average over all episodes so far: 1.7699545196134168
epsilon: 0.01
**** Episode  35190 **** 
Recent average reward: 2.3
Reward over last 100: 1.83
Average over all episodes so far: 1.7701051435066781
epsilon: 0.01
**** Episode  35200 **** 
Recent average reward: 1.2
Reward over last 100: 1.74
Average over all episodes so far: 1.7699431818181819
epsilon: 0.01
**** Episode  35210 **** 
Recent average reward: 1.1
Reward over last 100: 1.68
Average over all episodes so far: 1.7697529111047998
epsilon: 0.01
**** Episode  35220 **** 
Recent average reward: 0.9
Reward over last 100: 1.62
Average over all episodes so far: 1.7695059625212948
epsilon: 0.01
**** Episode  35230 **** 
Recent average reward: 1.2
Reward over last 100: 1.58
Average over all episodes so far: 1.76

**** Episode  35730 **** 
Recent average reward: 1.8
Reward over last 100: 1.71
Average over all episodes so far: 1.770584942625245
epsilon: 0.01
**** Episode  35740 **** 
Recent average reward: 3.4
Reward over last 100: 1.93
Average over all episodes so far: 1.771040850587577
epsilon: 0.01
**** Episode  35750 **** 
Recent average reward: 1.4
Reward over last 100: 1.81
Average over all episodes so far: 1.7709370629370629
epsilon: 0.01
**** Episode  35760 **** 
Recent average reward: 1.6
Reward over last 100: 1.81
Average over all episodes so far: 1.7708892617449665
epsilon: 0.01
**** Episode  35770 **** 
Recent average reward: 1.6
Reward over last 100: 1.87
Average over all episodes so far: 1.7708414872798435
epsilon: 0.01
**** Episode  35780 **** 
Recent average reward: 2.4
Reward over last 100: 1.91
Average over all episodes so far: 1.771017328116266
epsilon: 0.01
**** Episode  35790 **** 
Recent average reward: 2.6
Reward over last 100: 1.99
Average over all episodes so far: 1.77124

**** Episode  36300 **** 
Recent average reward: 1.5
Reward over last 100: 1.64
Average over all episodes so far: 1.7699449035812673
epsilon: 0.01
**** Episode  36310 **** 
Recent average reward: 1.3
Reward over last 100: 1.6
Average over all episodes so far: 1.769815477829799
epsilon: 0.01
**** Episode  36320 **** 
Recent average reward: 2.6
Reward over last 100: 1.67
Average over all episodes so far: 1.770044052863436
epsilon: 0.01
**** Episode  36330 **** 
Recent average reward: 1.9
Reward over last 100: 1.78
Average over all episodes so far: 1.7700798238370492
epsilon: 0.01
**** Episode  36340 **** 
Recent average reward: 2.3
Reward over last 100: 1.82
Average over all episodes so far: 1.7702256466703357
epsilon: 0.01
**** Episode  36350 **** 
Recent average reward: 1.5
Reward over last 100: 1.78
Average over all episodes so far: 1.7701513067400274
epsilon: 0.01
**** Episode  36360 **** 
Recent average reward: 1.7
Reward over last 100: 1.79
Average over all episodes so far: 1.77013

**** Episode  36860 **** 
Recent average reward: 1.3
Reward over last 100: 1.69
Average over all episodes so far: 1.7678513293543137
epsilon: 0.01
**** Episode  36870 **** 
Recent average reward: 2.6
Reward over last 100: 1.73
Average over all episodes so far: 1.7680770273935449
epsilon: 0.01
**** Episode  36880 **** 
Recent average reward: 1.7
Reward over last 100: 1.71
Average over all episodes so far: 1.768058568329718
epsilon: 0.01
**** Episode  36890 **** 
Recent average reward: 0.8
Reward over last 100: 1.54
Average over all episodes so far: 1.767796150718352
epsilon: 0.01
**** Episode  36900 **** 
Recent average reward: 2.5
Reward over last 100: 1.66
Average over all episodes so far: 1.7679945799457994
epsilon: 0.01
**** Episode  36910 **** 
Recent average reward: 2.0
Reward over last 100: 1.66
Average over all episodes so far: 1.7680574370089406
epsilon: 0.01
**** Episode  36920 **** 
Recent average reward: 1.3
Reward over last 100: 1.67
Average over all episodes so far: 1.7679

**** Episode  37420 **** 
Recent average reward: 2.3
Reward over last 100: 1.75
Average over all episodes so far: 1.767103153393907
epsilon: 0.01
**** Episode  37430 **** 
Recent average reward: 1.3
Reward over last 100: 1.68
Average over all episodes so far: 1.7669783596045952
epsilon: 0.01
**** Episode  37440 **** 
Recent average reward: 1.8
Reward over last 100: 1.74
Average over all episodes so far: 1.7669871794871794
epsilon: 0.01
**** Episode  37450 **** 
Recent average reward: 2.1
Reward over last 100: 1.82
Average over all episodes so far: 1.767076101468625
epsilon: 0.01
**** Episode  37460 **** 
Recent average reward: 1.3
Reward over last 100: 1.78
Average over all episodes so far: 1.7669514148424987
epsilon: 0.01
**** Episode  37470 **** 
Recent average reward: 1.7
Reward over last 100: 1.8
Average over all episodes so far: 1.76693354683747
epsilon: 0.01
**** Episode  37480 **** 
Recent average reward: 1.8
Reward over last 100: 1.78
Average over all episodes so far: 1.7669423

**** Episode  37980 **** 
Recent average reward: 1.1
Reward over last 100: 1.59
Average over all episodes so far: 1.7652448657187993
epsilon: 0.01
**** Episode  37990 **** 
Recent average reward: 2.7
Reward over last 100: 1.69
Average over all episodes so far: 1.765490918662806
epsilon: 0.01
**** Episode  38000 **** 
Recent average reward: 1.1
Reward over last 100: 1.61
Average over all episodes so far: 1.7653157894736842
epsilon: 0.01
**** Episode  38010 **** 
Recent average reward: 1.3
Reward over last 100: 1.57
Average over all episodes so far: 1.7651933701657458
epsilon: 0.01
**** Episode  38020 **** 
Recent average reward: 1.5
Reward over last 100: 1.58
Average over all episodes so far: 1.765123619147817
epsilon: 0.01
**** Episode  38030 **** 
Recent average reward: 1.3
Reward over last 100: 1.5
Average over all episodes so far: 1.765001314751512
epsilon: 0.01
**** Episode  38040 **** 
Recent average reward: 1.8
Reward over last 100: 1.53
Average over all episodes so far: 1.765010

**** Episode  38540 **** 
Recent average reward: 1.5
Reward over last 100: 1.56
Average over all episodes so far: 1.763959522573949
epsilon: 0.01
**** Episode  38550 **** 
Recent average reward: 1.0
Reward over last 100: 1.47
Average over all episodes so far: 1.7637613488975357
epsilon: 0.01
**** Episode  38560 **** 
Recent average reward: 1.1
Reward over last 100: 1.44
Average over all episodes so far: 1.7635892116182572
epsilon: 0.01
**** Episode  38570 **** 
Recent average reward: 1.6
Reward over last 100: 1.51
Average over all episodes so far: 1.7635467980295567
epsilon: 0.01
**** Episode  38580 **** 
Recent average reward: 1.5
Reward over last 100: 1.43
Average over all episodes so far: 1.763478486262312
epsilon: 0.01
**** Episode  38590 **** 
Recent average reward: 1.4
Reward over last 100: 1.36
Average over all episodes so far: 1.7633842964498574
epsilon: 0.01
**** Episode  38600 **** 
Recent average reward: 2.2
Reward over last 100: 1.44
Average over all episodes so far: 1.7634

**** Episode  39100 **** 
Recent average reward: 2.0
Reward over last 100: 1.78
Average over all episodes so far: 1.7618670076726344
epsilon: 0.01
**** Episode  39110 **** 
Recent average reward: 1.4
Reward over last 100: 1.72
Average over all episodes so far: 1.7617744822296089
epsilon: 0.01
**** Episode  39120 **** 
Recent average reward: 1.8
Reward over last 100: 1.7
Average over all episodes so far: 1.761784253578732
epsilon: 0.01
**** Episode  39130 **** 
Recent average reward: 1.5
Reward over last 100: 1.63
Average over all episodes so far: 1.761717352415027
epsilon: 0.01
**** Episode  39140 **** 
Recent average reward: 1.5
Reward over last 100: 1.66
Average over all episodes so far: 1.7616504854368933
epsilon: 0.01
**** Episode  39150 **** 
Recent average reward: 1.7
Reward over last 100: 1.67
Average over all episodes so far: 1.7616347381864623
epsilon: 0.01
**** Episode  39160 **** 
Recent average reward: 1.9
Reward over last 100: 1.75
Average over all episodes so far: 1.76167

**** Episode  39660 **** 
Recent average reward: 1.8
Reward over last 100: 1.62
Average over all episodes so far: 1.7601613716591025
epsilon: 0.01
**** Episode  39670 **** 
Recent average reward: 1.6
Reward over last 100: 1.63
Average over all episodes so far: 1.7601209982354424
epsilon: 0.01
**** Episode  39680 **** 
Recent average reward: 2.0
Reward over last 100: 1.67
Average over all episodes so far: 1.7601814516129033
epsilon: 0.01
**** Episode  39690 **** 
Recent average reward: 1.8
Reward over last 100: 1.61
Average over all episodes so far: 1.7601914840010078
epsilon: 0.01
**** Episode  39700 **** 
Recent average reward: 2.5
Reward over last 100: 1.7
Average over all episodes so far: 1.7603778337531486
epsilon: 0.01
**** Episode  39710 **** 
Recent average reward: 1.7
Reward over last 100: 1.79
Average over all episodes so far: 1.76036262906069
epsilon: 0.01
**** Episode  39720 **** 
Recent average reward: 1.5
Reward over last 100: 1.83
Average over all episodes so far: 1.76029

**** Episode  40220 **** 
Recent average reward: 2.4
Reward over last 100: 1.9
Average over all episodes so far: 1.7596966683242168
epsilon: 0.01
**** Episode  40230 **** 
Recent average reward: 1.6
Reward over last 100: 1.86
Average over all episodes so far: 1.7596569724086502
epsilon: 0.01
**** Episode  40240 **** 
Recent average reward: 0.8
Reward over last 100: 1.78
Average over all episodes so far: 1.7594184890656064
epsilon: 0.01
**** Episode  40250 **** 
Recent average reward: 0.9
Reward over last 100: 1.65
Average over all episodes so far: 1.7592049689440994
epsilon: 0.01
**** Episode  40260 **** 
Recent average reward: 2.0
Reward over last 100: 1.64
Average over all episodes so far: 1.75926477893691
epsilon: 0.01
**** Episode  40270 **** 
Recent average reward: 1.4
Reward over last 100: 1.68
Average over all episodes so far: 1.7591755649366774
epsilon: 0.01
**** Episode  40280 **** 
Recent average reward: 1.9
Reward over last 100: 1.65
Average over all episodes so far: 1.75921

**** Episode  40780 **** 
Recent average reward: 1.5
Reward over last 100: 1.78
Average over all episodes so far: 1.7589995095635116
epsilon: 0.01
**** Episode  40790 **** 
Recent average reward: 2.0
Reward over last 100: 1.8
Average over all episodes so far: 1.759058592792351
epsilon: 0.01
**** Episode  40800 **** 
Recent average reward: 1.4
Reward over last 100: 1.79
Average over all episodes so far: 1.7589705882352942
epsilon: 0.01
**** Episode  40810 **** 
Recent average reward: 1.3
Reward over last 100: 1.71
Average over all episodes so far: 1.7588581230090663
epsilon: 0.01
**** Episode  40820 **** 
Recent average reward: 1.1
Reward over last 100: 1.63
Average over all episodes so far: 1.7586967172954433
epsilon: 0.01
**** Episode  40830 **** 
Recent average reward: 1.2
Reward over last 100: 1.55
Average over all episodes so far: 1.7585598824393829
epsilon: 0.01
**** Episode  40840 **** 
Recent average reward: 1.8
Reward over last 100: 1.58
Average over all episodes so far: 1.7585

**** Episode  41340 **** 
Recent average reward: 1.6
Reward over last 100: 1.64
Average over all episodes so far: 1.758055152394775
epsilon: 0.01
**** Episode  41350 **** 
Recent average reward: 1.6
Reward over last 100: 1.62
Average over all episodes so far: 1.7580169286577993
epsilon: 0.01
**** Episode  41360 **** 
Recent average reward: 1.3
Reward over last 100: 1.59
Average over all episodes so far: 1.7579061895551258
epsilon: 0.01
**** Episode  41370 **** 
Recent average reward: 2.0
Reward over last 100: 1.66
Average over all episodes so far: 1.75796470872613
epsilon: 0.01
**** Episode  41380 **** 
Recent average reward: 2.4
Reward over last 100: 1.75
Average over all episodes so far: 1.7581198646689222
epsilon: 0.01
**** Episode  41390 **** 
Recent average reward: 2.3
Reward over last 100: 1.75
Average over all episodes so far: 1.7582507852138198
epsilon: 0.01
**** Episode  41400 **** 
Recent average reward: 1.9
Reward over last 100: 1.78
Average over all episodes so far: 1.75828

**** Episode  41900 **** 
Recent average reward: 2.1
Reward over last 100: 1.89
Average over all episodes so far: 1.75708830548926
epsilon: 0.01
**** Episode  41910 **** 
Recent average reward: 2.0
Reward over last 100: 1.83
Average over all episodes so far: 1.757146265807683
epsilon: 0.01
**** Episode  41920 **** 
Recent average reward: 1.4
Reward over last 100: 1.8
Average over all episodes so far: 1.75706106870229
epsilon: 0.01
**** Episode  41930 **** 
Recent average reward: 2.6
Reward over last 100: 1.9
Average over all episodes so far: 1.757262103505843
epsilon: 0.01
**** Episode  41940 **** 
Recent average reward: 1.7
Reward over last 100: 1.93
Average over all episodes so far: 1.757248450166905
epsilon: 0.01
**** Episode  41950 **** 
Recent average reward: 1.8
Reward over last 100: 1.91
Average over all episodes so far: 1.757258641239571
epsilon: 0.01
**** Episode  41960 **** 
Recent average reward: 1.6
Reward over last 100: 1.93
Average over all episodes so far: 1.757221163012

**** Episode  42460 **** 
Recent average reward: 2.2
Reward over last 100: 1.62
Average over all episodes so far: 1.755016486104569
epsilon: 0.01
**** Episode  42470 **** 
Recent average reward: 2.0
Reward over last 100: 1.64
Average over all episodes so far: 1.7550741700023547
epsilon: 0.01
**** Episode  42480 **** 
Recent average reward: 1.8
Reward over last 100: 1.7
Average over all episodes so far: 1.755084745762712
epsilon: 0.01
**** Episode  42490 **** 
Recent average reward: 1.2
Reward over last 100: 1.66
Average over all episodes so far: 1.7549541068486703
epsilon: 0.01
**** Episode  42500 **** 
Recent average reward: 0.8
Reward over last 100: 1.58
Average over all episodes so far: 1.7547294117647059
epsilon: 0.01
**** Episode  42510 **** 
Recent average reward: 2.0
Reward over last 100: 1.6
Average over all episodes so far: 1.7547871089155493
epsilon: 0.01
**** Episode  42520 **** 
Recent average reward: 2.3
Reward over last 100: 1.68
Average over all episodes so far: 1.754915

**** Episode  43020 **** 
Recent average reward: 1.2
Reward over last 100: 1.42
Average over all episodes so far: 1.753765690376569
epsilon: 0.01
**** Episode  43030 **** 
Recent average reward: 2.4
Reward over last 100: 1.55
Average over all episodes so far: 1.7539158726469906
epsilon: 0.01
**** Episode  43040 **** 
Recent average reward: 1.6
Reward over last 100: 1.55
Average over all episodes so far: 1.7538801115241636
epsilon: 0.01
**** Episode  43050 **** 
Recent average reward: 1.5
Reward over last 100: 1.54
Average over all episodes so far: 1.7538211382113822
epsilon: 0.01
**** Episode  43060 **** 
Recent average reward: 1.6
Reward over last 100: 1.53
Average over all episodes so far: 1.7537854156990247
epsilon: 0.01
**** Episode  43070 **** 
Recent average reward: 1.5
Reward over last 100: 1.52
Average over all episodes so far: 1.7537264917576039
epsilon: 0.01
**** Episode  43080 **** 
Recent average reward: 2.1
Reward over last 100: 1.56
Average over all episodes so far: 1.753

**** Episode  43580 **** 
Recent average reward: 1.9
Reward over last 100: 1.52
Average over all episodes so far: 1.7519963285910969
epsilon: 0.01
**** Episode  43590 **** 
Recent average reward: 1.1
Reward over last 100: 1.5
Average over all episodes so far: 1.7518467538426246
epsilon: 0.01
**** Episode  43600 **** 
Recent average reward: 2.1
Reward over last 100: 1.56
Average over all episodes so far: 1.7519266055045872
epsilon: 0.01
**** Episode  43610 **** 
Recent average reward: 1.8
Reward over last 100: 1.62
Average over all episodes so far: 1.751937628984178
epsilon: 0.01
**** Episode  43620 **** 
Recent average reward: 2.0
Reward over last 100: 1.65
Average over all episodes so far: 1.7519944979367263
epsilon: 0.01
**** Episode  43630 **** 
Recent average reward: 2.4
Reward over last 100: 1.74
Average over all episodes so far: 1.7521430208572084
epsilon: 0.01
**** Episode  43640 **** 
Recent average reward: 1.5
Reward over last 100: 1.71
Average over all episodes so far: 1.7520

**** Episode  44140 **** 
Recent average reward: 1.0
Reward over last 100: 1.7
Average over all episodes so far: 1.751155414589941
epsilon: 0.01
**** Episode  44150 **** 
Recent average reward: 1.5
Reward over last 100: 1.67
Average over all episodes so far: 1.7510985277463194
epsilon: 0.01
**** Episode  44160 **** 
Recent average reward: 1.9
Reward over last 100: 1.74
Average over all episodes so far: 1.7511322463768115
epsilon: 0.01
**** Episode  44170 **** 
Recent average reward: 2.4
Reward over last 100: 1.74
Average over all episodes so far: 1.751279148743491
epsilon: 0.01
**** Episode  44180 **** 
Recent average reward: 1.5
Reward over last 100: 1.79
Average over all episodes so far: 1.751222272521503
epsilon: 0.01
**** Episode  44190 **** 
Recent average reward: 2.4
Reward over last 100: 1.9
Average over all episodes so far: 1.7513690880289658
epsilon: 0.01
**** Episode  44200 **** 
Recent average reward: 2.2
Reward over last 100: 1.85
Average over all episodes so far: 1.7514705

**** Episode  44700 **** 
Recent average reward: 1.3
Reward over last 100: 1.56
Average over all episodes so far: 1.7509395973154362
epsilon: 0.01
**** Episode  44710 **** 
Recent average reward: 1.1
Reward over last 100: 1.47
Average over all episodes so far: 1.7507940058152538
epsilon: 0.01
**** Episode  44720 **** 
Recent average reward: 1.3
Reward over last 100: 1.43
Average over all episodes so far: 1.7506932021466906
epsilon: 0.01
**** Episode  44730 **** 
Recent average reward: 1.7
Reward over last 100: 1.45
Average over all episodes so far: 1.750681868991728
epsilon: 0.01
**** Episode  44740 **** 
Recent average reward: 1.9
Reward over last 100: 1.41
Average over all episodes so far: 1.7507152436298614
epsilon: 0.01
**** Episode  44750 **** 
Recent average reward: 1.5
Reward over last 100: 1.41
Average over all episodes so far: 1.750659217877095
epsilon: 0.01
**** Episode  44760 **** 
Recent average reward: 2.2
Reward over last 100: 1.45
Average over all episodes so far: 1.7507

**** Episode  45260 **** 
Recent average reward: 1.7
Reward over last 100: 1.44
Average over all episodes so far: 1.7502430402121079
epsilon: 0.01
**** Episode  45270 **** 
Recent average reward: 1.5
Reward over last 100: 1.48
Average over all episodes so far: 1.7501877623149988
epsilon: 0.01
**** Episode  45280 **** 
Recent average reward: 1.1
Reward over last 100: 1.46
Average over all episodes so far: 1.7500441696113074
epsilon: 0.01
**** Episode  45290 **** 
Recent average reward: 1.1
Reward over last 100: 1.48
Average over all episodes so far: 1.749900640317951
epsilon: 0.01
**** Episode  45300 **** 
Recent average reward: 1.7
Reward over last 100: 1.45
Average over all episodes so far: 1.7498896247240618
epsilon: 0.01
**** Episode  45310 **** 
Recent average reward: 0.8
Reward over last 100: 1.37
Average over all episodes so far: 1.7496799823438534
epsilon: 0.01
**** Episode  45320 **** 
Recent average reward: 1.5
Reward over last 100: 1.34
Average over all episodes so far: 1.749

**** Episode  45820 **** 
Recent average reward: 2.0
Reward over last 100: 1.81
Average over all episodes so far: 1.7484941073766913
epsilon: 0.01
**** Episode  45830 **** 
Recent average reward: 1.7
Reward over last 100: 1.81
Average over all episodes so far: 1.7484835260746237
epsilon: 0.01
**** Episode  45840 **** 
Recent average reward: 1.4
Reward over last 100: 1.85
Average over all episodes so far: 1.7484075043630019
epsilon: 0.01
**** Episode  45850 **** 
Recent average reward: 3.1
Reward over last 100: 1.99
Average over all episodes so far: 1.7487022900763358
epsilon: 0.01
**** Episode  45860 **** 
Recent average reward: 2.9
Reward over last 100: 2.1
Average over all episodes so far: 1.7489533362407326
epsilon: 0.01
**** Episode  45870 **** 
Recent average reward: 1.7
Reward over last 100: 2.06
Average over all episodes so far: 1.7489426640505776
epsilon: 0.01
**** Episode  45880 **** 
Recent average reward: 1.2
Reward over last 100: 2.0
Average over all episodes so far: 1.7488

**** Episode  46380 **** 
Recent average reward: 2.4
Reward over last 100: 1.84
Average over all episodes so far: 1.748684777921518
epsilon: 0.01
**** Episode  46390 **** 
Recent average reward: 1.4
Reward over last 100: 1.8
Average over all episodes so far: 1.7486096141409786
epsilon: 0.01
**** Episode  46400 **** 
Recent average reward: 1.7
Reward over last 100: 1.83
Average over all episodes so far: 1.7485991379310344
epsilon: 0.01
**** Episode  46410 **** 
Recent average reward: 2.2
Reward over last 100: 1.85
Average over all episodes so far: 1.7486964016375781
epsilon: 0.01
**** Episode  46420 **** 
Recent average reward: 2.0
Reward over last 100: 1.91
Average over all episodes so far: 1.7487505385609652
epsilon: 0.01
**** Episode  46430 **** 
Recent average reward: 1.3
Reward over last 100: 1.83
Average over all episodes so far: 1.74865388757269
epsilon: 0.01
**** Episode  46440 **** 
Recent average reward: 2.6
Reward over last 100: 1.92
Average over all episodes so far: 1.748837

**** Episode  46940 **** 
Recent average reward: 2.2
Reward over last 100: 1.72
Average over all episodes so far: 1.7484661269706008
epsilon: 0.01
**** Episode  46950 **** 
Recent average reward: 1.4
Reward over last 100: 1.68
Average over all episodes so far: 1.74839190628328
epsilon: 0.01
**** Episode  46960 **** 
Recent average reward: 1.4
Reward over last 100: 1.65
Average over all episodes so far: 1.7483177172061328
epsilon: 0.01
**** Episode  46970 **** 
Recent average reward: 1.4
Reward over last 100: 1.68
Average over all episodes so far: 1.7482435597189696
epsilon: 0.01
**** Episode  46980 **** 
Recent average reward: 1.8
Reward over last 100: 1.7
Average over all episodes so far: 1.748254576415496
epsilon: 0.01
**** Episode  46990 **** 
Recent average reward: 1.9
Reward over last 100: 1.67
Average over all episodes so far: 1.748286869546712
epsilon: 0.01
**** Episode  47000 **** 
Recent average reward: 2.3
Reward over last 100: 1.74
Average over all episodes so far: 1.7484042

**** Episode  47500 **** 
Recent average reward: 1.6
Reward over last 100: 1.53
Average over all episodes so far: 1.7477473684210527
epsilon: 0.01
**** Episode  47510 **** 
Recent average reward: 1.7
Reward over last 100: 1.62
Average over all episodes so far: 1.7477373184592717
epsilon: 0.01
**** Episode  47520 **** 
Recent average reward: 1.9
Reward over last 100: 1.68
Average over all episodes so far: 1.7477693602693603
epsilon: 0.01
**** Episode  47530 **** 
Recent average reward: 1.2
Reward over last 100: 1.66
Average over all episodes so far: 1.7476541131916685
epsilon: 0.01
**** Episode  47540 **** 
Recent average reward: 1.6
Reward over last 100: 1.7
Average over all episodes so far: 1.7476230542700883
epsilon: 0.01
**** Episode  47550 **** 
Recent average reward: 1.8
Reward over last 100: 1.69
Average over all episodes so far: 1.747634069400631
epsilon: 0.01
**** Episode  47560 **** 
Recent average reward: 1.7
Reward over last 100: 1.71
Average over all episodes so far: 1.7476

**** Episode  48060 **** 
Recent average reward: 1.1
Reward over last 100: 1.5
Average over all episodes so far: 1.7465875988347899
epsilon: 0.01
**** Episode  48070 **** 
Recent average reward: 2.2
Reward over last 100: 1.61
Average over all episodes so far: 1.7466819221967964
epsilon: 0.01
**** Episode  48080 **** 
Recent average reward: 1.1
Reward over last 100: 1.61
Average over all episodes so far: 1.7465474209650582
epsilon: 0.01
**** Episode  48090 **** 
Recent average reward: 2.2
Reward over last 100: 1.66
Average over all episodes so far: 1.7466417134539405
epsilon: 0.01
**** Episode  48100 **** 
Recent average reward: 2.8
Reward over last 100: 1.77
Average over all episodes so far: 1.746860706860707
epsilon: 0.01
**** Episode  48110 **** 
Recent average reward: 2.2
Reward over last 100: 1.79
Average over all episodes so far: 1.7469548950322178
epsilon: 0.01
**** Episode  48120 **** 
Recent average reward: 1.2
Reward over last 100: 1.74
Average over all episodes so far: 1.7468

**** Episode  48620 **** 
Recent average reward: 1.3
Reward over last 100: 1.84
Average over all episodes so far: 1.745392842451666
epsilon: 0.01
**** Episode  48630 **** 
Recent average reward: 1.4
Reward over last 100: 1.8
Average over all episodes so far: 1.7453218178079375
epsilon: 0.01
**** Episode  48640 **** 
Recent average reward: 0.8
Reward over last 100: 1.74
Average over all episodes so far: 1.7451274671052632
epsilon: 0.01
**** Episode  48650 **** 
Recent average reward: 2.1
Reward over last 100: 1.7
Average over all episodes so far: 1.7452004110996917
epsilon: 0.01
**** Episode  48660 **** 
Recent average reward: 1.6
Reward over last 100: 1.7
Average over all episodes so far: 1.7451705713111385
epsilon: 0.01
**** Episode  48670 **** 
Recent average reward: 2.1
Reward over last 100: 1.68
Average over all episodes so far: 1.7452434764742142
epsilon: 0.01
**** Episode  48680 **** 
Recent average reward: 1.8
Reward over last 100: 1.71
Average over all episodes so far: 1.745254

**** Episode  49180 **** 
Recent average reward: 1.5
Reward over last 100: 1.84
Average over all episodes so far: 1.7456689711264741
epsilon: 0.01
**** Episode  49190 **** 
Recent average reward: 1.6
Reward over last 100: 1.8
Average over all episodes so far: 1.7456393575930067
epsilon: 0.01
**** Episode  49200 **** 
Recent average reward: 1.7
Reward over last 100: 1.76
Average over all episodes so far: 1.745630081300813
epsilon: 0.01
**** Episode  49210 **** 
Recent average reward: 1.7
Reward over last 100: 1.69
Average over all episodes so far: 1.7456208087787035
epsilon: 0.01
**** Episode  49220 **** 
Recent average reward: 2.5
Reward over last 100: 1.81
Average over all episodes so far: 1.745774075579033
epsilon: 0.01
**** Episode  49230 **** 
Recent average reward: 1.3
Reward over last 100: 1.79
Average over all episodes so far: 1.7456835263050985
epsilon: 0.01
**** Episode  49240 **** 
Recent average reward: 2.4
Reward over last 100: 1.85
Average over all episodes so far: 1.74581

In [None]:
plt.figure(2,figsize=[12,5])
plt.title("Score at end of episode")
plt.plot(reward_total[:frames_total])
plt.show()

In [None]:
print("Average number of steps: {}". format(np.average(steps_total[:frames_total])))

In [None]:
print("Average number of steps=reward: {}". format(np.average(steps_total)))
print("Average reward in last 100 episodes: {}". format(np.average(reward_total[frames_total-100:frames_total])))



plt.figure(1,figsize=[12,5])
plt.title("Rewards")
plt.bar(torch.arange(len(rewards_total)), rewards_total,alpha=0.6, color='green')
#plt.plot(rewards_total)
plt.show()

In [None]:
plt.figure(2,figsize=[12,5])
plt.title("Steps to finish episode")
plt.plot(steps_total[:frames_total])
plt.show()