In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
from IPython import display

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
import numpy as np
import math
import random

In [4]:
import time

In [5]:
def show_state(env, episode,step, info=""):
    plt.figure(99999,figsize=[8,6])
    plt.clf()
    plt.imshow(env.render(mode='rgb_array'))
    plt.title("episode: {} step: {} ".format(episode,step))
    #plt.title("%s | Step: %d %s" % (env._spec.id,step, info))
    plt.axis('off')

    display.clear_output(wait=True)
    display.display(plt.gcf())

In [6]:
import gym

In [7]:
use_cuda = torch.cuda.is_available()
device=torch.device("cuda:4" if use_cuda else "cpu")

In [8]:
###### PARAMS ######
learning_rate = 0.01
num_episodes = 50000
gamma=0.999999
#gamma=0.85
egreedy = 0.9
egreedy_final = 0.01
egreedy_decay = 500

report_interval=10

score_to_solve = 195

hidden_layer_size=64

replay_memory_size=50000

batch_size=32

update_target_frequency = 100

clip_error=False

In [9]:
env = gym.make('Breakout-ramDeterministic-v4')

In [10]:
#env = gym.wrappers.Monitor(env, '../mp4/sandbox10',video_callable=lambda episode_id: True,force=True)
env = gym.wrappers.Monitor(env, '../mp4/breakout_DQN',video_callable=lambda episode_id: episode_id%100==0,force=True)

In [11]:
number_of_inputs=env.observation_space.shape[0]
number_of_outputs=env.action_space.n

In [12]:
def calculate_epsilon(steps_done):
    epsilon = egreedy_final + (egreedy - egreedy_final) * \
              math.exp(-1. * steps_done / egreedy_decay )
    return epsilon

In [13]:
class ExperienceReplay():
    def __init__(self, capacity):
        self.capacity=capacity
        self.memory=[]
        self.position=0
        
    
    def push(self, state,
             action, new_state,
             reward, done):
        
            transition=(state,action,new_state,reward,done)
            
            if self.position>=len(self.memory):
                self.memory.append(transition)
            else:
                self.memory[self.position]=transition
                
            self.position=(self.position+1)%self.capacity
        
    
    def sample(self,batch_size):
        return zip(*random.sample(self.memory, batch_size))
    
    def __len__(self):
        return len(self.memory)
    

In [14]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.linear1 = nn.Linear(number_of_inputs,hidden_layer_size)
        self.linear2 = nn.Linear(hidden_layer_size,number_of_outputs)
        
        self.activation=nn.Tanh()
        
    def forward(self, x):
        output1 = self.linear1(x)
        output1 = self.activation(output1)
        output2 = self.linear2(output1)
        
        return output2

In [15]:
class QNet_Agent():
    def __init__(self):
        
        self.nn = NeuralNetwork().to(device)
        self.target_nn = NeuralNetwork().to(device)
        
        self.loss_function = nn.MSELoss()
        self.optimizer = optim.Adam(params=self.nn.parameters(), lr=learning_rate)
        
        self.update_target_counter = 0
        
    def select_action(self,state,epsilon):
        
        random_for_egreedy=torch.rand(1).item()
        
        if random_for_egreedy>epsilon:
            self.nn.eval()
            with torch.no_grad():
                state=torch.Tensor(state).to(device)
                predicted_value_from_nn=self.nn(state)
                action=torch.argmax(predicted_value_from_nn).item()
        else:
            action=env.action_space.sample()
                
                
        return action
    
    def optimize(self):
        
        if len(memory)<batch_size:
            return
        
        state, action, new_state, reward, done = memory.sample(batch_size)
        
        state=torch.Tensor(state).to(device)
        new_state=torch.Tensor(new_state).to(device)
        reward=torch.Tensor(reward).to(device)
        
        #the view call below is to transform into column vector
        #so that it can be used in the gather call
        #i.e. we will use it to pick out from the computed value
        #tensor only values indexed by selected action
        action=(torch.Tensor(action).view(-1,1).long()).to(device)
        #print('action: ')
        #print(action)
        #print('contiguous?', action.is_contiguous())
        done=torch.Tensor(done).to(device)
        
        #print('shape of: state, new state, reward, action, done:')
        #print(state.shape)
        #print(new_state.shape)
        #print(reward.shape)
        #print(action.shape)
        #print(done.shape)
        
        
        self.nn.eval()
        self.target_nn.eval()
            
        new_state_values=self.target_nn(new_state).detach()
        #print('shape of: new_state_values')
        #print(new_state_values.shape)
        
        max_new_state_values=torch.max(new_state_values,dim=1)[0]
        #print('shape of: max_new_state_values')
        #print(max_new_state_values.shape)
        target_value=(reward + (1-done)*gamma*max_new_state_values).view(-1,1)
        
        #print('shape of: target_value')
        #print(target_value.shape)
        self.nn.train()
        
        #this will select only the values of the desired actions
        predicted_value=torch.gather(self.nn(state),1,action)
        #print('shape of: predicted_value')
        #print(predicted_value.shape)
        
        
        loss=self.loss_function(predicted_value,target_value)
        self.optimizer.zero_grad()
        loss.backward()
        
        if clip_error:
            for param in self.nn.parameters():
                param.grad.clamp_(-1.0,1.0)
        
        self.optimizer.step()
        
        if self.update_target_counter % update_target_frequency == 0:
            #print("***********************")
            #print("UPDATING TARGET NETWORK")
            #print("update counter: {}".format(self.update_target_counter))
            #print("***********************")
            self.target_nn.load_state_dict(self.nn.state_dict())
        
        self.update_target_counter+=1
        

In [16]:
a=torch.randn([3,3])
print(a)

tensor([[ 1.5225,  1.1023,  1.4868],
        [-2.0261, -0.6412,  0.4389],
        [ 1.6641,  0.0969,  0.9602]])


In [17]:
a.clamp_(-1.0,1.0)

tensor([[ 1.0000,  1.0000,  1.0000],
        [-1.0000, -0.6412,  0.4389],
        [ 1.0000,  0.0969,  0.9602]])

In [18]:
a

tensor([[ 1.0000,  1.0000,  1.0000],
        [-1.0000, -0.6412,  0.4389],
        [ 1.0000,  0.0969,  0.9602]])

In [19]:
memory=ExperienceReplay(replay_memory_size)

In [20]:
qnet_agent=QNet_Agent()

In [21]:
seed_value=23
env.seed(seed_value)
torch.manual_seed(seed_value)
random.seed(seed_value)

In [None]:
%%time

steps_total=np.full([num_episodes],-999,dtype=np.int32)
reward_total=np.full([num_episodes],-999,dtype=np.int32)

frames_total=0

solved_after = 0
solved = False

start_time = time.time()

for i_episode in range(num_episodes):
    
    state = env.reset()
    #for step in range(100):
    step=0
    reward_total[i_episode]=0
    
    while True:
        
        step+=1
        frames_total += 1
        
        epsilon=calculate_epsilon(frames_total)
        
        #action=env.action_space.sample()
        action=qnet_agent.select_action(state,epsilon)
        
        new_state, reward, done, info = env.step(action)
        memory.push(state, action, new_state,
                     reward, done)
        
        reward_total[i_episode]+=reward
        
        qnet_agent.optimize()
        
        state=new_state
        
        if done:
            steps_total[i_episode]=step
            
            if i_episode>100:
                mean_reward_100 = np.sum(reward_total[i_episode-100:i_episode])/100
            
                if (mean_reward_100 > score_to_solve and solved == False):
                    print("SOLVED! After %i episodes " % i_episode)
                    solved_after = i_episode
                    solved = True
            
            if (i_episode % report_interval == 0 and i_episode>1):
                print("**** Episode  {} **** ".format(i_episode))
                recent_avg_reward=np.average(reward_total[i_episode-report_interval:i_episode])
                print("Recent average reward: {}".format(recent_avg_reward))
                if i_episode>100:
                    print("Reward over last 100: {}".format(mean_reward_100))
                full_avg_so_far=np.average(reward_total[:i_episode])
                print("Average over all episodes so far: {}".format(full_avg_so_far))
                print("epsilon: {}".format(epsilon))
            
                #print("Episode {} finished after: {}".format(i_episode,step))
            break
            
if solved:
    print("Solved after %i episodes" % solved_after)
        

**** Episode  10 **** 
Recent average reward: 1.9
Average over all episodes so far: 1.9
epsilon: 0.016405912952911657
**** Episode  20 **** 
Recent average reward: 2.1
Average over all episodes so far: 2.0
epsilon: 0.010057683821353419
**** Episode  30 **** 
Recent average reward: 1.4
Average over all episodes so far: 1.8
epsilon: 0.010001167630803692
**** Episode  40 **** 
Recent average reward: 1.1
Average over all episodes so far: 1.625
epsilon: 0.0100000226176827
**** Episode  50 **** 
Recent average reward: 2.0
Average over all episodes so far: 1.7
epsilon: 0.010000000258394452
**** Episode  60 **** 
Recent average reward: 1.2
Average over all episodes so far: 1.6166666666666667
epsilon: 0.010000000004248166
**** Episode  70 **** 
Recent average reward: 2.3
Average over all episodes so far: 1.7142857142857142
epsilon: 0.010000000000030334
**** Episode  80 **** 
Recent average reward: 1.7
Average over all episodes so far: 1.7125
epsilon: 0.010000000000000387
**** Episode  90 **** 


**** Episode  600 **** 
Recent average reward: 1.7
Reward over last 100: 2.27
Average over all episodes so far: 1.955
epsilon: 0.01
**** Episode  610 **** 
Recent average reward: 1.4
Reward over last 100: 2.11
Average over all episodes so far: 1.9459016393442623
epsilon: 0.01
**** Episode  620 **** 
Recent average reward: 2.1
Reward over last 100: 2.14
Average over all episodes so far: 1.9483870967741936
epsilon: 0.01
**** Episode  630 **** 
Recent average reward: 1.3
Reward over last 100: 1.98
Average over all episodes so far: 1.938095238095238
epsilon: 0.01
**** Episode  640 **** 
Recent average reward: 1.5
Reward over last 100: 1.9
Average over all episodes so far: 1.93125
epsilon: 0.01
**** Episode  650 **** 
Recent average reward: 1.5
Reward over last 100: 1.76
Average over all episodes so far: 1.9246153846153846
epsilon: 0.01
**** Episode  660 **** 
Recent average reward: 2.3
Reward over last 100: 1.76
Average over all episodes so far: 1.9303030303030304
epsilon: 0.01
**** Episod

**** Episode  1180 **** 
Recent average reward: 1.2
Reward over last 100: 2.27
Average over all episodes so far: 2.1652542372881354
epsilon: 0.01
**** Episode  1190 **** 
Recent average reward: 2.5
Reward over last 100: 2.19
Average over all episodes so far: 2.168067226890756
epsilon: 0.01
**** Episode  1200 **** 
Recent average reward: 3.0
Reward over last 100: 2.16
Average over all episodes so far: 2.175
epsilon: 0.01
**** Episode  1210 **** 
Recent average reward: 2.3
Reward over last 100: 2.16
Average over all episodes so far: 2.1760330578512397
epsilon: 0.01
**** Episode  1220 **** 
Recent average reward: 3.3
Reward over last 100: 2.26
Average over all episodes so far: 2.185245901639344
epsilon: 0.01
**** Episode  1230 **** 
Recent average reward: 3.0
Reward over last 100: 2.37
Average over all episodes so far: 2.191869918699187
epsilon: 0.01
**** Episode  1240 **** 
Recent average reward: 2.7
Reward over last 100: 2.44
Average over all episodes so far: 2.1959677419354837
epsilon:

**** Episode  1750 **** 
Recent average reward: 3.2
Reward over last 100: 2.91
Average over all episodes so far: 2.352
epsilon: 0.01
**** Episode  1760 **** 
Recent average reward: 2.5
Reward over last 100: 2.75
Average over all episodes so far: 2.3528409090909093
epsilon: 0.01
**** Episode  1770 **** 
Recent average reward: 2.4
Reward over last 100: 2.77
Average over all episodes so far: 2.353107344632768
epsilon: 0.01
**** Episode  1780 **** 
Recent average reward: 2.3
Reward over last 100: 2.54
Average over all episodes so far: 2.352808988764045
epsilon: 0.01
**** Episode  1790 **** 
Recent average reward: 4.9
Reward over last 100: 2.75
Average over all episodes so far: 2.3670391061452514
epsilon: 0.01
**** Episode  1800 **** 
Recent average reward: 2.8
Reward over last 100: 2.88
Average over all episodes so far: 2.3694444444444445
epsilon: 0.01
**** Episode  1810 **** 
Recent average reward: 3.5
Reward over last 100: 2.98
Average over all episodes so far: 2.3756906077348066
epsilon

**** Episode  2320 **** 
Recent average reward: 4.4
Reward over last 100: 5.51
Average over all episodes so far: 2.5853448275862068
epsilon: 0.01
**** Episode  2330 **** 
Recent average reward: 2.5
Reward over last 100: 5.39
Average over all episodes so far: 2.584978540772532
epsilon: 0.01
**** Episode  2340 **** 
Recent average reward: 2.7
Reward over last 100: 5.42
Average over all episodes so far: 2.5854700854700856
epsilon: 0.01
**** Episode  2350 **** 
Recent average reward: 3.2
Reward over last 100: 5.24
Average over all episodes so far: 2.588085106382979
epsilon: 0.01
**** Episode  2360 **** 
Recent average reward: 3.3
Reward over last 100: 4.78
Average over all episodes so far: 2.5911016949152543
epsilon: 0.01
**** Episode  2370 **** 
Recent average reward: 3.5
Reward over last 100: 4.52
Average over all episodes so far: 2.5949367088607596
epsilon: 0.01
**** Episode  2380 **** 
Recent average reward: 2.9
Reward over last 100: 4.06
Average over all episodes so far: 2.59621848739

**** Episode  2890 **** 
Recent average reward: 3.2
Reward over last 100: 4.31
Average over all episodes so far: 2.75121107266436
epsilon: 0.01
**** Episode  2900 **** 
Recent average reward: 2.8
Reward over last 100: 4.16
Average over all episodes so far: 2.7513793103448276
epsilon: 0.01
**** Episode  2910 **** 
Recent average reward: 3.0
Reward over last 100: 3.94
Average over all episodes so far: 2.752233676975945
epsilon: 0.01
**** Episode  2920 **** 
Recent average reward: 3.2
Reward over last 100: 3.88
Average over all episodes so far: 2.753767123287671
epsilon: 0.01
**** Episode  2930 **** 
Recent average reward: 2.9
Reward over last 100: 3.68
Average over all episodes so far: 2.7542662116040955
epsilon: 0.01
**** Episode  2940 **** 
Recent average reward: 2.4
Reward over last 100: 3.35
Average over all episodes so far: 2.753061224489796
epsilon: 0.01
**** Episode  2950 **** 
Recent average reward: 2.2
Reward over last 100: 3.1
Average over all episodes so far: 2.751186440677966

**** Episode  3460 **** 
Recent average reward: 3.7
Reward over last 100: 2.74
Average over all episodes so far: 2.9196531791907514
epsilon: 0.01
**** Episode  3470 **** 
Recent average reward: 5.8
Reward over last 100: 3.05
Average over all episodes so far: 2.9279538904899134
epsilon: 0.01
**** Episode  3480 **** 
Recent average reward: 6.6
Reward over last 100: 3.42
Average over all episodes so far: 2.938505747126437
epsilon: 0.01
**** Episode  3490 **** 
Recent average reward: 4.5
Reward over last 100: 3.62
Average over all episodes so far: 2.94297994269341
epsilon: 0.01
**** Episode  3500 **** 
Recent average reward: 4.7
Reward over last 100: 3.87
Average over all episodes so far: 2.948
epsilon: 0.01
**** Episode  3510 **** 
Recent average reward: 3.4
Reward over last 100: 3.92
Average over all episodes so far: 2.949287749287749
epsilon: 0.01
**** Episode  3520 **** 
Recent average reward: 7.0
Reward over last 100: 4.35
Average over all episodes so far: 2.9607954545454547
epsilon: 

**** Episode  4030 **** 
Recent average reward: 4.9
Reward over last 100: 4.67
Average over all episodes so far: 3.0707196029776673
epsilon: 0.01
**** Episode  4040 **** 
Recent average reward: 7.0
Reward over last 100: 4.99
Average over all episodes so far: 3.0804455445544554
epsilon: 0.01
**** Episode  4050 **** 
Recent average reward: 6.2
Reward over last 100: 5.17
Average over all episodes so far: 3.0881481481481483
epsilon: 0.01
**** Episode  4060 **** 
Recent average reward: 6.1
Reward over last 100: 5.13
Average over all episodes so far: 3.095566502463054
epsilon: 0.01
**** Episode  4070 **** 
Recent average reward: 8.2
Reward over last 100: 5.5
Average over all episodes so far: 3.108108108108108
epsilon: 0.01
**** Episode  4080 **** 
Recent average reward: 6.0
Reward over last 100: 5.6
Average over all episodes so far: 3.1151960784313726
epsilon: 0.01
**** Episode  4090 **** 
Recent average reward: 4.6
Reward over last 100: 5.62
Average over all episodes so far: 3.1188264058679

**** Episode  4600 **** 
Recent average reward: 3.2
Reward over last 100: 3.68
Average over all episodes so far: 3.1767391304347825
epsilon: 0.01
**** Episode  4610 **** 
Recent average reward: 4.5
Reward over last 100: 3.88
Average over all episodes so far: 3.179609544468547
epsilon: 0.01
**** Episode  4620 **** 
Recent average reward: 4.0
Reward over last 100: 3.9
Average over all episodes so far: 3.1813852813852814
epsilon: 0.01
**** Episode  4630 **** 
Recent average reward: 7.5
Reward over last 100: 4.26
Average over all episodes so far: 3.1907127429805615
epsilon: 0.01
**** Episode  4640 **** 
Recent average reward: 4.7
Reward over last 100: 4.47
Average over all episodes so far: 3.1939655172413794
epsilon: 0.01
**** Episode  4650 **** 
Recent average reward: 3.4
Reward over last 100: 4.46
Average over all episodes so far: 3.1944086021505376
epsilon: 0.01
**** Episode  4660 **** 
Recent average reward: 6.5
Reward over last 100: 4.72
Average over all episodes so far: 3.20150214592

**** Episode  5170 **** 
Recent average reward: 3.4
Reward over last 100: 3.18
Average over all episodes so far: 3.2295938104448743
epsilon: 0.01
**** Episode  5180 **** 
Recent average reward: 4.1
Reward over last 100: 3.06
Average over all episodes so far: 3.231274131274131
epsilon: 0.01
**** Episode  5190 **** 
Recent average reward: 2.9
Reward over last 100: 2.91
Average over all episodes so far: 3.230635838150289
epsilon: 0.01
**** Episode  5200 **** 
Recent average reward: 3.3
Reward over last 100: 2.78
Average over all episodes so far: 3.230769230769231
epsilon: 0.01
**** Episode  5210 **** 
Recent average reward: 4.0
Reward over last 100: 3.03
Average over all episodes so far: 3.232245681381958
epsilon: 0.01
**** Episode  5220 **** 
Recent average reward: 3.6
Reward over last 100: 3.17
Average over all episodes so far: 3.232950191570881
epsilon: 0.01
**** Episode  5230 **** 
Recent average reward: 3.2
Reward over last 100: 3.16
Average over all episodes so far: 3.23288718929254

**** Episode  5750 **** 
Recent average reward: 3.1
Reward over last 100: 3.71
Average over all episodes so far: 3.2926956521739132
epsilon: 0.01
**** Episode  5760 **** 
Recent average reward: 2.6
Reward over last 100: 3.41
Average over all episodes so far: 3.2914930555555557
epsilon: 0.01
**** Episode  5770 **** 
Recent average reward: 5.4
Reward over last 100: 3.13
Average over all episodes so far: 3.295147313691508
epsilon: 0.01
**** Episode  5780 **** 
Recent average reward: 3.6
Reward over last 100: 2.98
Average over all episodes so far: 3.295674740484429
epsilon: 0.01
**** Episode  5790 **** 
Recent average reward: 6.2
Reward over last 100: 3.22
Average over all episodes so far: 3.300690846286701
epsilon: 0.01
**** Episode  5800 **** 
Recent average reward: 2.6
Reward over last 100: 3.09
Average over all episodes so far: 3.2994827586206896
epsilon: 0.01
**** Episode  5810 **** 
Recent average reward: 3.6
Reward over last 100: 3.2
Average over all episodes so far: 3.3
epsilon: 0.

**** Episode  6320 **** 
Recent average reward: 3.9
Reward over last 100: 2.77
Average over all episodes so far: 3.3599683544303796
epsilon: 0.01
**** Episode  6330 **** 
Recent average reward: 4.3
Reward over last 100: 3.0
Average over all episodes so far: 3.3614533965244866
epsilon: 0.01
**** Episode  6340 **** 
Recent average reward: 8.2
Reward over last 100: 3.4
Average over all episodes so far: 3.369085173501577
epsilon: 0.01
**** Episode  6350 **** 
Recent average reward: 4.3
Reward over last 100: 3.54
Average over all episodes so far: 3.3705511811023623
epsilon: 0.01
**** Episode  6360 **** 
Recent average reward: 5.3
Reward over last 100: 3.8
Average over all episodes so far: 3.3735849056603775
epsilon: 0.01
**** Episode  6370 **** 
Recent average reward: 3.5
Reward over last 100: 3.91
Average over all episodes so far: 3.3737833594976454
epsilon: 0.01
**** Episode  6380 **** 
Recent average reward: 2.8
Reward over last 100: 3.88
Average over all episodes so far: 3.3728840125391

**** Episode  6890 **** 
Recent average reward: 2.6
Reward over last 100: 3.32
Average over all episodes so far: 3.4339622641509435
epsilon: 0.01
**** Episode  6900 **** 
Recent average reward: 3.7
Reward over last 100: 3.15
Average over all episodes so far: 3.4343478260869564
epsilon: 0.01
**** Episode  6910 **** 
Recent average reward: 2.3
Reward over last 100: 2.99
Average over all episodes so far: 3.4327062228654124
epsilon: 0.01
**** Episode  6920 **** 
Recent average reward: 3.8
Reward over last 100: 2.89
Average over all episodes so far: 3.4332369942196532
epsilon: 0.01
**** Episode  6930 **** 
Recent average reward: 3.8
Reward over last 100: 2.85
Average over all episodes so far: 3.433766233766234
epsilon: 0.01
**** Episode  6940 **** 
Recent average reward: 3.4
Reward over last 100: 2.85
Average over all episodes so far: 3.4337175792507204
epsilon: 0.01
**** Episode  6950 **** 
Recent average reward: 2.9
Reward over last 100: 2.83
Average over all episodes so far: 3.4329496402

**** Episode  7460 **** 
Recent average reward: 6.2
Reward over last 100: 3.26
Average over all episodes so far: 3.443029490616622
epsilon: 0.01
**** Episode  7470 **** 
Recent average reward: 4.6
Reward over last 100: 3.31
Average over all episodes so far: 3.444578313253012
epsilon: 0.01
**** Episode  7480 **** 
Recent average reward: 3.5
Reward over last 100: 3.46
Average over all episodes so far: 3.4446524064171125
epsilon: 0.01
**** Episode  7490 **** 
Recent average reward: 6.0
Reward over last 100: 3.85
Average over all episodes so far: 3.448064085447263
epsilon: 0.01
**** Episode  7500 **** 
Recent average reward: 6.0
Reward over last 100: 4.29
Average over all episodes so far: 3.4514666666666667
epsilon: 0.01
**** Episode  7510 **** 
Recent average reward: 5.9
Reward over last 100: 4.6
Average over all episodes so far: 3.454727030625832
epsilon: 0.01
**** Episode  7520 **** 
Recent average reward: 4.5
Reward over last 100: 4.82
Average over all episodes so far: 3.45611702127659

**** Episode  8030 **** 
Recent average reward: 2.1
Reward over last 100: 3.59
Average over all episodes so far: 3.488293897882939
epsilon: 0.01
**** Episode  8040 **** 
Recent average reward: 1.3
Reward over last 100: 3.45
Average over all episodes so far: 3.4855721393034824
epsilon: 0.01
**** Episode  8050 **** 
Recent average reward: 4.4
Reward over last 100: 3.6
Average over all episodes so far: 3.4867080745341616
epsilon: 0.01
**** Episode  8060 **** 
Recent average reward: 3.9
Reward over last 100: 3.6
Average over all episodes so far: 3.4872208436724565
epsilon: 0.01
**** Episode  8070 **** 
Recent average reward: 3.0
Reward over last 100: 3.42
Average over all episodes so far: 3.486617100371747
epsilon: 0.01
**** Episode  8080 **** 
Recent average reward: 3.1
Reward over last 100: 3.37
Average over all episodes so far: 3.486138613861386
epsilon: 0.01
**** Episode  8090 **** 
Recent average reward: 3.1
Reward over last 100: 3.3
Average over all episodes so far: 3.485661310259579

**** Episode  8600 **** 
Recent average reward: 3.3
Reward over last 100: 4.95
Average over all episodes so far: 3.5262790697674418
epsilon: 0.01
**** Episode  8610 **** 
Recent average reward: 3.1
Reward over last 100: 4.8
Average over all episodes so far: 3.5257839721254354
epsilon: 0.01
**** Episode  8620 **** 
Recent average reward: 3.2
Reward over last 100: 4.56
Average over all episodes so far: 3.5254060324825987
epsilon: 0.01
**** Episode  8640 **** 
Recent average reward: 6.2
Reward over last 100: 4.82
Average over all episodes so far: 3.528703703703704
epsilon: 0.01
**** Episode  8650 **** 
Recent average reward: 4.5
Reward over last 100: 4.52
Average over all episodes so far: 3.5298265895953755
epsilon: 0.01
**** Episode  8660 **** 
Recent average reward: 4.3
Reward over last 100: 4.28
Average over all episodes so far: 3.530715935334873
epsilon: 0.01
**** Episode  8670 **** 
Recent average reward: 4.8
Reward over last 100: 4.13
Average over all episodes so far: 3.532179930795

**** Episode  9210 **** 
Recent average reward: 4.5
Reward over last 100: 4.86
Average over all episodes so far: 3.5715526601520087
epsilon: 0.01
**** Episode  9220 **** 
Recent average reward: 3.3
Reward over last 100: 4.81
Average over all episodes so far: 3.5712581344902388
epsilon: 0.01
**** Episode  9230 **** 
Recent average reward: 3.4
Reward over last 100: 4.78
Average over all episodes so far: 3.5710725893824486
epsilon: 0.01
**** Episode  9240 **** 
Recent average reward: 3.4
Reward over last 100: 4.63
Average over all episodes so far: 3.570887445887446
epsilon: 0.01
**** Episode  9250 **** 
Recent average reward: 2.7
Reward over last 100: 4.57
Average over all episodes so far: 3.569945945945946
epsilon: 0.01
**** Episode  9260 **** 
Recent average reward: 3.2
Reward over last 100: 4.39
Average over all episodes so far: 3.5695464362850973
epsilon: 0.01
**** Episode  9270 **** 
Recent average reward: 3.0
Reward over last 100: 4.0
Average over all episodes so far: 3.568932038834

**** Episode  9790 **** 
Recent average reward: 2.9
Reward over last 100: 3.52
Average over all episodes so far: 3.5651685393258425
epsilon: 0.01
**** Episode  9800 **** 
Recent average reward: 3.4
Reward over last 100: 3.6
Average over all episodes so far: 3.565
epsilon: 0.01
**** Episode  9810 **** 
Recent average reward: 3.1
Reward over last 100: 3.46
Average over all episodes so far: 3.564525993883792
epsilon: 0.01
**** Episode  9820 **** 
Recent average reward: 6.3
Reward over last 100: 3.77
Average over all episodes so far: 3.5673116089613033
epsilon: 0.01
**** Episode  9830 **** 
Recent average reward: 5.5
Reward over last 100: 4.05
Average over all episodes so far: 3.5692777212614444
epsilon: 0.01
**** Episode  9840 **** 
Recent average reward: 5.2
Reward over last 100: 4.28
Average over all episodes so far: 3.5709349593495934
epsilon: 0.01
**** Episode  9850 **** 
Recent average reward: 7.4
Reward over last 100: 4.78
Average over all episodes so far: 3.5748223350253805
epsilon

**** Episode  10360 **** 
Recent average reward: 4.0
Reward over last 100: 4.41
Average over all episodes so far: 3.588706563706564
epsilon: 0.01
**** Episode  10370 **** 
Recent average reward: 4.3
Reward over last 100: 4.18
Average over all episodes so far: 3.5893924783027966
epsilon: 0.01
**** Episode  10380 **** 
Recent average reward: 3.4
Reward over last 100: 3.87
Average over all episodes so far: 3.589210019267823
epsilon: 0.01
**** Episode  10390 **** 
Recent average reward: 5.2
Reward over last 100: 3.77
Average over all episodes so far: 3.5907603464870066
epsilon: 0.01
**** Episode  10400 **** 
Recent average reward: 6.5
Reward over last 100: 4.04
Average over all episodes so far: 3.593557692307692
epsilon: 0.01
**** Episode  10410 **** 
Recent average reward: 5.0
Reward over last 100: 4.15
Average over all episodes so far: 3.5949087415946206
epsilon: 0.01
**** Episode  10420 **** 
Recent average reward: 3.5
Reward over last 100: 4.29
Average over all episodes so far: 3.59481

**** Episode  10920 **** 
Recent average reward: 7.1
Reward over last 100: 4.94
Average over all episodes so far: 3.641117216117216
epsilon: 0.01
**** Episode  10930 **** 
Recent average reward: 5.0
Reward over last 100: 5.13
Average over all episodes so far: 3.642360475754803
epsilon: 0.01
**** Episode  10940 **** 
Recent average reward: 3.3
Reward over last 100: 4.99
Average over all episodes so far: 3.6420475319926875
epsilon: 0.01
**** Episode  10950 **** 
Recent average reward: 5.2
Reward over last 100: 4.97
Average over all episodes so far: 3.6434703196347034
epsilon: 0.01
**** Episode  10960 **** 
Recent average reward: 2.7
Reward over last 100: 4.89
Average over all episodes so far: 3.642609489051095
epsilon: 0.01
**** Episode  10970 **** 
Recent average reward: 3.2
Reward over last 100: 4.9
Average over all episodes so far: 3.6422060164083865
epsilon: 0.01
**** Episode  10980 **** 
Recent average reward: 1.5
Reward over last 100: 4.71
Average over all episodes so far: 3.640255

**** Episode  11480 **** 
Recent average reward: 4.3
Reward over last 100: 3.27
Average over all episodes so far: 3.677264808362369
epsilon: 0.01
**** Episode  11490 **** 
Recent average reward: 3.1
Reward over last 100: 3.29
Average over all episodes so far: 3.676762402088773
epsilon: 0.01
**** Episode  11500 **** 
Recent average reward: 4.2
Reward over last 100: 3.33
Average over all episodes so far: 3.6772173913043478
epsilon: 0.01
**** Episode  11510 **** 
Recent average reward: 3.0
Reward over last 100: 3.12
Average over all episodes so far: 3.6766290182450043
epsilon: 0.01
**** Episode  11520 **** 
Recent average reward: 3.9
Reward over last 100: 3.19
Average over all episodes so far: 3.6768229166666666
epsilon: 0.01
**** Episode  11530 **** 
Recent average reward: 4.5
Reward over last 100: 3.42
Average over all episodes so far: 3.6775368603642673
epsilon: 0.01
**** Episode  11540 **** 
Recent average reward: 6.6
Reward over last 100: 3.72
Average over all episodes so far: 3.6800

**** Episode  12040 **** 
Recent average reward: 2.8
Reward over last 100: 3.32
Average over all episodes so far: 3.697674418604651
epsilon: 0.01
**** Episode  12050 **** 
Recent average reward: 4.9
Reward over last 100: 3.23
Average over all episodes so far: 3.6986721991701246
epsilon: 0.01
**** Episode  12060 **** 
Recent average reward: 3.9
Reward over last 100: 3.24
Average over all episodes so far: 3.6988391376451077
epsilon: 0.01
**** Episode  12070 **** 
Recent average reward: 2.5
Reward over last 100: 3.15
Average over all episodes so far: 3.6978458989229495
epsilon: 0.01
**** Episode  12080 **** 
Recent average reward: 2.7
Reward over last 100: 3.12
Average over all episodes so far: 3.697019867549669
epsilon: 0.01
**** Episode  12090 **** 
Recent average reward: 3.5
Reward over last 100: 3.16
Average over all episodes so far: 3.6968569065343257
epsilon: 0.01
**** Episode  12100 **** 
Recent average reward: 3.1
Reward over last 100: 3.15
Average over all episodes so far: 3.6963

**** Episode  12600 **** 
Recent average reward: 3.4
Reward over last 100: 5.75
Average over all episodes so far: 3.7225396825396824
epsilon: 0.01
**** Episode  12610 **** 
Recent average reward: 3.5
Reward over last 100: 5.52
Average over all episodes so far: 3.722363203806503
epsilon: 0.01
**** Episode  12620 **** 
Recent average reward: 3.8
Reward over last 100: 5.43
Average over all episodes so far: 3.7224247226624407
epsilon: 0.01
**** Episode  12630 **** 
Recent average reward: 3.6
Reward over last 100: 5.25
Average over all episodes so far: 3.7223277909738717
epsilon: 0.01
**** Episode  12640 **** 
Recent average reward: 2.1
Reward over last 100: 5.04
Average over all episodes so far: 3.7210443037974685
epsilon: 0.01
**** Episode  12650 **** 
Recent average reward: 3.8
Reward over last 100: 4.96
Average over all episodes so far: 3.721106719367589
epsilon: 0.01
**** Episode  12660 **** 
Recent average reward: 4.7
Reward over last 100: 4.58
Average over all episodes so far: 3.7218

**** Episode  13170 **** 
Recent average reward: 4.3
Reward over last 100: 4.21
Average over all episodes so far: 3.734851936218679
epsilon: 0.01
**** Episode  13180 **** 
Recent average reward: 5.3
Reward over last 100: 4.24
Average over all episodes so far: 3.7360394537177544
epsilon: 0.01
**** Episode  13190 **** 
Recent average reward: 5.1
Reward over last 100: 4.14
Average over all episodes so far: 3.737073540561031
epsilon: 0.01
**** Episode  13200 **** 
Recent average reward: 4.1
Reward over last 100: 3.98
Average over all episodes so far: 3.737348484848485
epsilon: 0.01
**** Episode  13210 **** 
Recent average reward: 4.6
Reward over last 100: 4.08
Average over all episodes so far: 3.738001514004542
epsilon: 0.01
**** Episode  13220 **** 
Recent average reward: 4.5
Reward over last 100: 4.31
Average over all episodes so far: 3.7385779122541605
epsilon: 0.01
**** Episode  13230 **** 
Recent average reward: 1.8
Reward over last 100: 4.17
Average over all episodes so far: 3.737112

**** Episode  13730 **** 
Recent average reward: 3.3
Reward over last 100: 5.34
Average over all episodes so far: 3.759650400582666
epsilon: 0.01
**** Episode  13740 **** 
Recent average reward: 2.3
Reward over last 100: 4.96
Average over all episodes so far: 3.758588064046579
epsilon: 0.01
**** Episode  13750 **** 
Recent average reward: 1.9
Reward over last 100: 4.29
Average over all episodes so far: 3.7572363636363635
epsilon: 0.01
**** Episode  13760 **** 
Recent average reward: 2.0
Reward over last 100: 3.79
Average over all episodes so far: 3.7559593023255813
epsilon: 0.01
**** Episode  13770 **** 
Recent average reward: 3.2
Reward over last 100: 3.67
Average over all episodes so far: 3.7555555555555555
epsilon: 0.01
**** Episode  13780 **** 
Recent average reward: 6.1
Reward over last 100: 3.63
Average over all episodes so far: 3.757256894049347
epsilon: 0.01
**** Episode  13790 **** 
Recent average reward: 5.7
Reward over last 100: 3.58
Average over all episodes so far: 3.75866

**** Episode  14290 **** 
Recent average reward: 5.4
Reward over last 100: 4.86
Average over all episodes so far: 3.775367389783065
epsilon: 0.01
**** Episode  14300 **** 
Recent average reward: 4.3
Reward over last 100: 4.84
Average over all episodes so far: 3.7757342657342656
epsilon: 0.01
**** Episode  14310 **** 
Recent average reward: 5.2
Reward over last 100: 5.11
Average over all episodes so far: 3.7767295597484276
epsilon: 0.01
**** Episode  14320 **** 
Recent average reward: 3.5
Reward over last 100: 4.88
Average over all episodes so far: 3.776536312849162
epsilon: 0.01
**** Episode  14330 **** 
Recent average reward: 5.0
Reward over last 100: 4.88
Average over all episodes so far: 3.7773900907187716
epsilon: 0.01
**** Episode  14340 **** 
Recent average reward: 5.2
Reward over last 100: 4.93
Average over all episodes so far: 3.7783821478382147
epsilon: 0.01
**** Episode  14350 **** 
Recent average reward: 7.0
Reward over last 100: 5.02
Average over all episodes so far: 3.7806

**** Episode  14850 **** 
Recent average reward: 4.1
Reward over last 100: 3.45
Average over all episodes so far: 3.7938047138047137
epsilon: 0.01
**** Episode  14860 **** 
Recent average reward: 4.0
Reward over last 100: 3.54
Average over all episodes so far: 3.7939434724091523
epsilon: 0.01
**** Episode  14870 **** 
Recent average reward: 3.5
Reward over last 100: 3.52
Average over all episodes so far: 3.7937457969065234
epsilon: 0.01
**** Episode  14880 **** 
Recent average reward: 2.4
Reward over last 100: 3.55
Average over all episodes so far: 3.7928091397849464
epsilon: 0.01
**** Episode  14890 **** 
Recent average reward: 1.8
Reward over last 100: 3.32
Average over all episodes so far: 3.7914707857622565
epsilon: 0.01
**** Episode  14900 **** 
Recent average reward: 3.2
Reward over last 100: 3.26
Average over all episodes so far: 3.791073825503356
epsilon: 0.01
**** Episode  14910 **** 
Recent average reward: 1.7
Reward over last 100: 2.96
Average over all episodes so far: 3.789

**** Episode  15420 **** 
Recent average reward: 4.9
Reward over last 100: 3.83
Average over all episodes so far: 3.8001945525291827
epsilon: 0.01
**** Episode  15430 **** 
Recent average reward: 6.6
Reward over last 100: 4.02
Average over all episodes so far: 3.8020090732339598
epsilon: 0.01
**** Episode  15440 **** 
Recent average reward: 6.8
Reward over last 100: 4.52
Average over all episodes so far: 3.8039507772020724
epsilon: 0.01
**** Episode  15450 **** 
Recent average reward: 6.2
Reward over last 100: 4.93
Average over all episodes so far: 3.8055016181229773
epsilon: 0.01
**** Episode  15460 **** 
Recent average reward: 7.9
Reward over last 100: 5.41
Average over all episodes so far: 3.808150064683053
epsilon: 0.01
**** Episode  15470 **** 
Recent average reward: 2.9
Reward over last 100: 5.2
Average over all episodes so far: 3.807563025210084
epsilon: 0.01
**** Episode  15480 **** 
Recent average reward: 2.4
Reward over last 100: 5.05
Average over all episodes so far: 3.80665

**** Episode  15990 **** 
Recent average reward: 5.3
Reward over last 100: 4.68
Average over all episodes so far: 3.829455909943715
epsilon: 0.01
**** Episode  16000 **** 
Recent average reward: 7.0
Reward over last 100: 4.92
Average over all episodes so far: 3.8314375
epsilon: 0.01
**** Episode  16010 **** 
Recent average reward: 6.5
Reward over last 100: 5.23
Average over all episodes so far: 3.833104309806371
epsilon: 0.01
**** Episode  16020 **** 
Recent average reward: 2.4
Reward over last 100: 5.16
Average over all episodes so far: 3.8322097378277156
epsilon: 0.01
**** Episode  16030 **** 
Recent average reward: 4.8
Reward over last 100: 5.23
Average over all episodes so far: 3.832813474734872
epsilon: 0.01
**** Episode  16040 **** 
Recent average reward: 2.3
Reward over last 100: 5.07
Average over all episodes so far: 3.831857855361596
epsilon: 0.01
**** Episode  16050 **** 
Recent average reward: 2.6
Reward over last 100: 4.82
Average over all episodes so far: 3.831090342679128

**** Episode  16560 **** 
Recent average reward: 3.9
Reward over last 100: 4.36
Average over all episodes so far: 3.8382246376811593
epsilon: 0.01
**** Episode  16570 **** 
Recent average reward: 5.7
Reward over last 100: 4.58
Average over all episodes so far: 3.8393482196741098
epsilon: 0.01
**** Episode  16580 **** 
Recent average reward: 4.5
Reward over last 100: 4.76
Average over all episodes so far: 3.8397466827503015
epsilon: 0.01
**** Episode  16590 **** 
Recent average reward: 4.1
Reward over last 100: 4.89
Average over all episodes so far: 3.8399035563592525
epsilon: 0.01
**** Episode  16600 **** 
Recent average reward: 4.4
Reward over last 100: 4.87
Average over all episodes so far: 3.840240963855422
epsilon: 0.01
**** Episode  16610 **** 
Recent average reward: 3.2
Reward over last 100: 4.58
Average over all episodes so far: 3.839855508729681
epsilon: 0.01
**** Episode  16620 **** 
Recent average reward: 4.8
Reward over last 100: 4.38
Average over all episodes so far: 3.8404

**** Episode  17120 **** 
Recent average reward: 3.5
Reward over last 100: 4.71
Average over all episodes so far: 3.8417056074766354
epsilon: 0.01
**** Episode  17130 **** 
Recent average reward: 4.5
Reward over last 100: 4.56
Average over all episodes so far: 3.8420899007589027
epsilon: 0.01
**** Episode  17140 **** 
Recent average reward: 5.9
Reward over last 100: 4.89
Average over all episodes so far: 3.8432905484247373
epsilon: 0.01
**** Episode  17150 **** 
Recent average reward: 3.3
Reward over last 100: 4.6
Average over all episodes so far: 3.8429737609329444
epsilon: 0.01
**** Episode  17160 **** 
Recent average reward: 7.8
Reward over last 100: 4.84
Average over all episodes so far: 3.8452797202797204
epsilon: 0.01
**** Episode  17170 **** 
Recent average reward: 6.7
Reward over last 100: 5.19
Average over all episodes so far: 3.846942341292953
epsilon: 0.01
**** Episode  17180 **** 
Recent average reward: 7.2
Reward over last 100: 5.14
Average over all episodes so far: 3.8488

**** Episode  17690 **** 
Recent average reward: 5.6
Reward over last 100: 6.87
Average over all episodes so far: 3.8695873374788015
epsilon: 0.01
**** Episode  17700 **** 
Recent average reward: 7.1
Reward over last 100: 6.76
Average over all episodes so far: 3.8714124293785313
epsilon: 0.01
**** Episode  17710 **** 
Recent average reward: 7.4
Reward over last 100: 6.81
Average over all episodes so far: 3.8734048560135514
epsilon: 0.01
**** Episode  17720 **** 
Recent average reward: 2.0
Reward over last 100: 6.45
Average over all episodes so far: 3.8723476297968396
epsilon: 0.01
**** Episode  17730 **** 
Recent average reward: 2.3
Reward over last 100: 5.66
Average over all episodes so far: 3.871460800902425
epsilon: 0.01
**** Episode  17740 **** 
Recent average reward: 3.4
Reward over last 100: 5.34
Average over all episodes so far: 3.87119503945885
epsilon: 0.01
**** Episode  17750 **** 
Recent average reward: 2.9
Reward over last 100: 5.04
Average over all episodes so far: 3.87064

**** Episode  18260 **** 
Recent average reward: 8.1
Reward over last 100: 5.18
Average over all episodes so far: 3.876725082146769
epsilon: 0.01
**** Episode  18270 **** 
Recent average reward: 6.1
Reward over last 100: 5.59
Average over all episodes so far: 3.8779419813902574
epsilon: 0.01
**** Episode  18280 **** 
Recent average reward: 6.0
Reward over last 100: 5.86
Average over all episodes so far: 3.8791028446389495
epsilon: 0.01
**** Episode  18290 **** 
Recent average reward: 3.6
Reward over last 100: 5.74
Average over all episodes so far: 3.8789502460360854
epsilon: 0.01
**** Episode  18300 **** 
Recent average reward: 3.4
Reward over last 100: 5.46
Average over all episodes so far: 3.878688524590164
epsilon: 0.01
**** Episode  18310 **** 
Recent average reward: 4.6
Reward over last 100: 5.5
Average over all episodes so far: 3.8790824685963954
epsilon: 0.01
**** Episode  18320 **** 
Recent average reward: 5.9
Reward over last 100: 5.76
Average over all episodes so far: 3.88018

**** Episode  18830 **** 
Recent average reward: 2.1
Reward over last 100: 4.95
Average over all episodes so far: 3.8852363250132766
epsilon: 0.01
**** Episode  18840 **** 
Recent average reward: 2.3
Reward over last 100: 4.79
Average over all episodes so far: 3.8843949044585986
epsilon: 0.01
**** Episode  18850 **** 
Recent average reward: 2.3
Reward over last 100: 4.56
Average over all episodes so far: 3.883554376657825
epsilon: 0.01
**** Episode  18860 **** 
Recent average reward: 3.1
Reward over last 100: 4.25
Average over all episodes so far: 3.883138918345705
epsilon: 0.01
**** Episode  18870 **** 
Recent average reward: 1.7
Reward over last 100: 3.64
Average over all episodes so far: 3.881981981981982
epsilon: 0.01
**** Episode  18880 **** 
Recent average reward: 4.2
Reward over last 100: 3.27
Average over all episodes so far: 3.8821504237288136
epsilon: 0.01
**** Episode  18890 **** 
Recent average reward: 4.3
Reward over last 100: 3.12
Average over all episodes so far: 3.88237

**** Episode  19390 **** 
Recent average reward: 3.6
Reward over last 100: 3.28
Average over all episodes so far: 3.8813821557503867
epsilon: 0.01
**** Episode  19400 **** 
Recent average reward: 2.7
Reward over last 100: 3.23
Average over all episodes so far: 3.8807731958762886
epsilon: 0.01
**** Episode  19410 **** 
Recent average reward: 3.2
Reward over last 100: 3.14
Average over all episodes so far: 3.8804224626481196
epsilon: 0.01
**** Episode  19420 **** 
Recent average reward: 2.3
Reward over last 100: 3.1
Average over all episodes so far: 3.879608650875386
epsilon: 0.01
**** Episode  19430 **** 
Recent average reward: 3.6
Reward over last 100: 3.17
Average over all episodes so far: 3.8794647452393205
epsilon: 0.01
**** Episode  19440 **** 
Recent average reward: 4.1
Reward over last 100: 3.23
Average over all episodes so far: 3.8795781893004113
epsilon: 0.01
**** Episode  19450 **** 
Recent average reward: 2.4
Reward over last 100: 3.08
Average over all episodes so far: 3.8788

**** Episode  19950 **** 
Recent average reward: 5.3
Reward over last 100: 4.81
Average over all episodes so far: 3.880451127819549
epsilon: 0.01
**** Episode  19960 **** 
Recent average reward: 3.9
Reward over last 100: 4.64
Average over all episodes so far: 3.8804609218436874
epsilon: 0.01
**** Episode  19970 **** 
Recent average reward: 5.6
Reward over last 100: 4.85
Average over all episodes so far: 3.8813219829744616
epsilon: 0.01
**** Episode  19980 **** 
Recent average reward: 5.2
Reward over last 100: 4.66
Average over all episodes so far: 3.881981981981982
epsilon: 0.01
**** Episode  19990 **** 
Recent average reward: 3.2
Reward over last 100: 4.39
Average over all episodes so far: 3.881640820410205
epsilon: 0.01
**** Episode  20000 **** 
Recent average reward: 2.8
Reward over last 100: 4.13
Average over all episodes so far: 3.8811
epsilon: 0.01
**** Episode  20010 **** 
Recent average reward: 3.9
Reward over last 100: 4.23
Average over all episodes so far: 3.881109445277361
e

**** Episode  20520 **** 
Recent average reward: 4.0
Reward over last 100: 3.24
Average over all episodes so far: 3.8878654970760236
epsilon: 0.01
**** Episode  20530 **** 
Recent average reward: 4.4
Reward over last 100: 3.46
Average over all episodes so far: 3.8881149537262543
epsilon: 0.01
**** Episode  20540 **** 
Recent average reward: 5.8
Reward over last 100: 3.59
Average over all episodes so far: 3.88904576436222
epsilon: 0.01
**** Episode  20550 **** 
Recent average reward: 4.6
Reward over last 100: 3.69
Average over all episodes so far: 3.8893917274939174
epsilon: 0.01
**** Episode  20560 **** 
Recent average reward: 4.8
Reward over last 100: 3.81
Average over all episodes so far: 3.8898346303501947
epsilon: 0.01
**** Episode  20570 **** 
Recent average reward: 5.2
Reward over last 100: 4.02
Average over all episodes so far: 3.8904715605250364
epsilon: 0.01
**** Episode  20580 **** 
Recent average reward: 6.5
Reward over last 100: 4.28
Average over all episodes so far: 3.8917

**** Episode  21100 **** 
Recent average reward: 4.9
Reward over last 100: 5.15
Average over all episodes so far: 3.903175355450237
epsilon: 0.01
**** Episode  21110 **** 
Recent average reward: 7.2
Reward over last 100: 5.22
Average over all episodes so far: 3.9047370914258646
epsilon: 0.01
**** Episode  21120 **** 
Recent average reward: 4.6
Reward over last 100: 4.84
Average over all episodes so far: 3.905066287878788
epsilon: 0.01
**** Episode  21130 **** 
Recent average reward: 3.1
Reward over last 100: 4.57
Average over all episodes so far: 3.9046852815901563
epsilon: 0.01
**** Episode  21140 **** 
Recent average reward: 4.1
Reward over last 100: 4.46
Average over all episodes so far: 3.9047776726584673
epsilon: 0.01
**** Episode  21150 **** 
Recent average reward: 3.3
Reward over last 100: 4.33
Average over all episodes so far: 3.9044917257683216
epsilon: 0.01
**** Episode  21160 **** 
Recent average reward: 3.4
Reward over last 100: 4.13
Average over all episodes so far: 3.9042

**** Episode  21660 **** 
Recent average reward: 2.9
Reward over last 100: 4.4
Average over all episodes so far: 3.9157894736842107
epsilon: 0.01
**** Episode  21670 **** 
Recent average reward: 2.6
Reward over last 100: 3.97
Average over all episodes so far: 3.915182279649285
epsilon: 0.01
**** Episode  21680 **** 
Recent average reward: 3.1
Reward over last 100: 3.68
Average over all episodes so far: 3.9148062730627307
epsilon: 0.01
**** Episode  21690 **** 
Recent average reward: 3.7
Reward over last 100: 3.6
Average over all episodes so far: 3.9147072383586905
epsilon: 0.01
**** Episode  21700 **** 
Recent average reward: 2.9
Reward over last 100: 3.56
Average over all episodes so far: 3.9142396313364056
epsilon: 0.01
**** Episode  21710 **** 
Recent average reward: 4.3
Reward over last 100: 3.4
Average over all episodes so far: 3.9144173192077383
epsilon: 0.01
**** Episode  21720 **** 
Recent average reward: 3.6
Reward over last 100: 3.44
Average over all episodes so far: 3.914272

**** Episode  22220 **** 
Recent average reward: 1.6
Reward over last 100: 3.04
Average over all episodes so far: 3.918181818181818
epsilon: 0.01
**** Episode  22230 **** 
Recent average reward: 1.5
Reward over last 100: 2.85
Average over all episodes so far: 3.917094017094017
epsilon: 0.01
**** Episode  22240 **** 
Recent average reward: 2.4
Reward over last 100: 2.55
Average over all episodes so far: 3.9164118705035973
epsilon: 0.01
**** Episode  22250 **** 
Recent average reward: 2.9
Reward over last 100: 2.47
Average over all episodes so far: 3.9159550561797754
epsilon: 0.01
**** Episode  22260 **** 
Recent average reward: 2.4
Reward over last 100: 2.4
Average over all episodes so far: 3.9152740341419587
epsilon: 0.01
**** Episode  22270 **** 
Recent average reward: 2.3
Reward over last 100: 2.2
Average over all episodes so far: 3.9145487202514593
epsilon: 0.01
**** Episode  22280 **** 
Recent average reward: 4.7
Reward over last 100: 2.4
Average over all episodes so far: 3.9149012

**** Episode  22780 **** 
Recent average reward: 4.5
Reward over last 100: 4.11
Average over all episodes so far: 3.916198419666374
epsilon: 0.01
**** Episode  22790 **** 
Recent average reward: 4.2
Reward over last 100: 4.22
Average over all episodes so far: 3.9163229486616937
epsilon: 0.01
**** Episode  22800 **** 
Recent average reward: 1.9
Reward over last 100: 4.11
Average over all episodes so far: 3.915438596491228
epsilon: 0.01
**** Episode  22810 **** 
Recent average reward: 2.5
Reward over last 100: 3.74
Average over all episodes so far: 3.9148180622533975
epsilon: 0.01
**** Episode  22820 **** 
Recent average reward: 1.7
Reward over last 100: 3.34
Average over all episodes so far: 3.9138475021910604
epsilon: 0.01
**** Episode  24610 **** 
Recent average reward: 2.2
Reward over last 100: 3.05
Average over all episodes so far: 3.922592442096709
epsilon: 0.01
**** Episode  24620 **** 
Recent average reward: 3.4
Reward over last 100: 2.96
Average over all episodes so far: 3.92238

**** Episode  25140 **** 
Recent average reward: 3.4
Reward over last 100: 4.95
Average over all episodes so far: 3.916467780429594
epsilon: 0.01
**** Episode  25150 **** 
Recent average reward: 3.6
Reward over last 100: 4.81
Average over all episodes so far: 3.9163419483101394
epsilon: 0.01
**** Episode  25160 **** 
Recent average reward: 3.6
Reward over last 100: 4.68
Average over all episodes so far: 3.9162162162162164
epsilon: 0.01
**** Episode  25170 **** 
Recent average reward: 4.9
Reward over last 100: 4.44
Average over all episodes so far: 3.916607071911005
epsilon: 0.01
**** Episode  25180 **** 
Recent average reward: 3.4
Reward over last 100: 4.1
Average over all episodes so far: 3.9164019062748214
epsilon: 0.01
**** Episode  25190 **** 
Recent average reward: 5.9
Reward over last 100: 4.19
Average over all episodes so far: 3.917189360857483
epsilon: 0.01
**** Episode  25200 **** 
Recent average reward: 5.7
Reward over last 100: 4.18
Average over all episodes so far: 3.917896

**** Episode  25710 **** 
Recent average reward: 5.7
Reward over last 100: 5.17
Average over all episodes so far: 3.915208090237262
epsilon: 0.01
**** Episode  25720 **** 
Recent average reward: 2.4
Reward over last 100: 4.87
Average over all episodes so far: 3.914618973561431
epsilon: 0.01
**** Episode  25730 **** 
Recent average reward: 3.4
Reward over last 100: 4.74
Average over all episodes so far: 3.91441896618733
epsilon: 0.01
**** Episode  25740 **** 
Recent average reward: 2.6
Reward over last 100: 4.56
Average over all episodes so far: 3.9139083139083137
epsilon: 0.01
**** Episode  25750 **** 
Recent average reward: 2.4
Reward over last 100: 4.44
Average over all episodes so far: 3.9133203883495145
epsilon: 0.01
**** Episode  25760 **** 
Recent average reward: 4.6
Reward over last 100: 4.26
Average over all episodes so far: 3.913586956521739
epsilon: 0.01
**** Episode  25770 **** 
Recent average reward: 4.1
Reward over last 100: 4.15
Average over all episodes so far: 3.9136592

**** Episode  26280 **** 
Recent average reward: 3.2
Reward over last 100: 4.03
Average over all episodes so far: 3.9116057838660576
epsilon: 0.01
**** Episode  26290 **** 
Recent average reward: 3.0
Reward over last 100: 3.78
Average over all episodes so far: 3.911259033853176
epsilon: 0.01
**** Episode  26300 **** 
Recent average reward: 3.3
Reward over last 100: 3.69
Average over all episodes so far: 3.9110266159695817
epsilon: 0.01
**** Episode  26310 **** 
Recent average reward: 4.7
Reward over last 100: 3.69
Average over all episodes so far: 3.911326491828202
epsilon: 0.01
**** Episode  26320 **** 
Recent average reward: 2.5
Reward over last 100: 3.64
Average over all episodes so far: 3.910790273556231
epsilon: 0.01
**** Episode  26330 **** 
Recent average reward: 3.2
Reward over last 100: 3.54
Average over all episodes so far: 3.910520319027725
epsilon: 0.01
**** Episode  26340 **** 
Recent average reward: 3.9
Reward over last 100: 3.56
Average over all episodes so far: 3.910516

**** Episode  26840 **** 
Recent average reward: 3.8
Reward over last 100: 3.91
Average over all episodes so far: 3.9097988077496275
epsilon: 0.01
**** Episode  26850 **** 
Recent average reward: 5.1
Reward over last 100: 4.15
Average over all episodes so far: 3.9102420856610802
epsilon: 0.01
**** Episode  26860 **** 
Recent average reward: 2.7
Reward over last 100: 3.9
Average over all episodes so far: 3.9097915115413255
epsilon: 0.01
**** Episode  26870 **** 
Recent average reward: 4.0
Reward over last 100: 3.93
Average over all episodes so far: 3.909825083736509
epsilon: 0.01
**** Episode  26880 **** 
Recent average reward: 5.9
Reward over last 100: 3.91
Average over all episodes so far: 3.910565476190476
epsilon: 0.01
**** Episode  26890 **** 
Recent average reward: 1.9
Reward over last 100: 3.72
Average over all episodes so far: 3.9098177761249535
epsilon: 0.01
**** Episode  26900 **** 
Recent average reward: 3.6
Reward over last 100: 3.6
Average over all episodes so far: 3.909702

**** Episode  27410 **** 
Recent average reward: 5.0
Reward over last 100: 3.92
Average over all episodes so far: 3.914666180226195
epsilon: 0.01
**** Episode  27420 **** 
Recent average reward: 7.3
Reward over last 100: 4.1
Average over all episodes so far: 3.915900802334063
epsilon: 0.01
**** Episode  27430 **** 
Recent average reward: 8.2
Reward over last 100: 4.32
Average over all episodes so far: 3.9174626321545754
epsilon: 0.01
**** Episode  27440 **** 
Recent average reward: 5.5
Reward over last 100: 4.5
Average over all episodes so far: 3.9180393586005833
epsilon: 0.01
**** Episode  27450 **** 
Recent average reward: 4.3
Reward over last 100: 4.72
Average over all episodes so far: 3.9181785063752277
epsilon: 0.01
**** Episode  27460 **** 
Recent average reward: 3.7
Reward over last 100: 4.69
Average over all episodes so far: 3.9180990531682447
epsilon: 0.01
**** Episode  27470 **** 
Recent average reward: 4.0
Reward over last 100: 4.82
Average over all episodes so far: 3.918128

**** Episode  27970 **** 
Recent average reward: 3.3
Reward over last 100: 3.66
Average over all episodes so far: 3.9226313907758312
epsilon: 0.01
**** Episode  27980 **** 
Recent average reward: 2.1
Reward over last 100: 3.43
Average over all episodes so far: 3.9219799857040742
epsilon: 0.01
**** Episode  27990 **** 
Recent average reward: 5.6
Reward over last 100: 3.61
Average over all episodes so far: 3.922579492675956
epsilon: 0.01
**** Episode  28000 **** 
Recent average reward: 4.0
Reward over last 100: 3.59
Average over all episodes so far: 3.9226071428571427
epsilon: 0.01
**** Episode  28010 **** 
Recent average reward: 2.7
Reward over last 100: 3.51
Average over all episodes so far: 3.9221706533380933
epsilon: 0.01
**** Episode  28020 **** 
Recent average reward: 3.2
Reward over last 100: 3.55
Average over all episodes so far: 3.921912919343326
epsilon: 0.01
**** Episode  28030 **** 
Recent average reward: 5.9
Reward over last 100: 3.58
Average over all episodes so far: 3.9226

In [None]:
plt.figure(2,figsize=[12,5])
plt.title("Score at end of episode")
plt.plot(reward_total[:frames_total])
plt.show()

In [None]:
print("Average number of steps: {}". format(np.average(steps_total[:frames_total])))

In [None]:
print("Average number of steps=reward: {}". format(np.average(steps_total)))
print("Average reward in last 100 episodes: {}". format(np.average(reward_total[frames_total-100:frames_total])))



plt.figure(1,figsize=[12,5])
plt.title("Rewards")
plt.bar(torch.arange(len(rewards_total)), rewards_total,alpha=0.6, color='green')
#plt.plot(rewards_total)
plt.show()

In [None]:
plt.figure(2,figsize=[12,5])
plt.title("Steps to finish episode")
plt.plot(steps_total[:frames_total])
plt.show()