In [1]:
import time

import tensorflow as tf
import numpy as np
import vizdoom as vd

from skimage.transform import rescale
from tqdm import trange
from IPython.display import HTML


In [2]:
#Specify the game scenario and the screen format/resolution

game = vd.DoomGame()
game.set_screen_format(vd.ScreenFormat.BGR24)
game.set_screen_resolution(vd.ScreenResolution.RES_320X240)
game.set_depth_buffer_enabled(False)
game.load_config('take_cover.cfg')

down_sample_ratio = 0.25
width = int(game.get_screen_width()*down_sample_ratio)
height = int(game.get_screen_height()*down_sample_ratio)
channels = game.get_screen_channels() + int(game.is_depth_buffer_enabled())

#Specify the available actions in the scenario

available_actions = game.get_available_buttons()
actions = [list(ohe) for ohe in list(np.identity(len(available_actions)))]
num_actions = len(available_actions)

#Specify the Q-network learning parameters

frame_delay = 12
buffer_size = 25000
epochs = 500
steps_per_epoch = 2000
learning_rate = 0.0025
gamma = 0
start_epsilon = 1.0
end_epsilon = 0.1
batch_size = 100
load_model = False
save_model = True
model_dir = './checkpoints/take_cover.ckpt'
num_ckpts = 40


In [3]:
#Create a buffer object that holds a set of training experiences (state-action-reward tuples)

class Buffer():
    def __init__(self, size=1000):
        self.buffer = list()
        self.length = len(self.buffer)
        self.size = size
        
#Add a new experience to the buffer (remove the oldest experience if the buffer is already full)
        
    def add_experience(self, experience):
        if self.length + 1 >= self.size:
            self.buffer[0:(self.length + 1) - self.size] = []
        
        self.buffer.append(experience)
        self.length = len(self.buffer)
            
#Return a batch of experience arrays randomly sampled from the buffer
            
    def sample_buffer(self, sample_size):
        sample = np.random.randint(self.length, size=sample_size)
        s1 = np.concatenate([self.buffer[idx][0] for idx in sample], axis=0)
        a = np.array([self.buffer[idx][1] for idx in sample])
        r = np.array([self.buffer[idx][2] for idx in sample])
        s2 = np.concatenate([self.buffer[idx][3] for idx in sample], axis=0)
        terminal = np.array([self.buffer[idx][4] for idx in sample], dtype=np.int32)
        
        return s1, a, r, s2, terminal

#Downsample and normalize an image array representing the game state at a given time stamp

def preprocess(image, down_sample_ratio=1):
    if float(down_sample_ratio) != 1.0:
        image = rescale(image=image,
                        scale=(down_sample_ratio,
                               down_sample_ratio),
                        mode='reflect')
    image = image.astype(np.float32)
    image = np.expand_dims(image, axis=0)

    return image

#Test the agent using a currently training or previously trained model

def test_agent(model, num_episodes, load_model, depth, training=True, session=None, model_dir=None):
    if load_model == True:
        sess = tf.Session()
        print('Loading model from', model_dir)
        tf.train.Saver().restore(sess, model_dir)
        
#Require an existing session if a pretrained model isn't provided
        
    elif load_model == False:
        sess = session

    game.set_sound_enabled(False)
    episode_rewards = list()
    
#Avoid reinitializing the game if this was already done by the training process
    
    if training == False:
        game.init()

    for i in range(num_episodes):
        game.new_episode()
    
        while not game.is_episode_finished():
            state = game.get_state()
            
            if depth == False:
                state_buffer = np.moveaxis(state.screen_buffer, 0, 2)
            
            elif depth == True:
                depth_buffer = state.depth_buffer
                state_buffer = np.stack((state.screen_buffer,
                                         depth_buffer), axis=-1)
                
            state1 = preprocess(state_buffer, down_sample_ratio)
            action = model.choose_action(sess, state1)[0]
            reward = game.make_action(actions[action])
            
#Add a delay between each time step so that the episodes occur at normal speed

            time.sleep(0.02)
        
        episode_rewards.append(game.get_total_reward())
        print('Test Episode {} Reward: {}'.format(i + 1, game.get_total_reward()))
        time.sleep(1)
    
#Avoid ending the game so that the training process can continue
    
    if training == False:
        game.close()
    
    return np.mean(episode_rewards)


In [4]:
#Create a Q-network to estimate values and choose actions for a given state

class Q_network():
    def __init__(self, network_name, height, width, channels, learning_rate=0.001):
        self.learning_rate = learning_rate
        self.s_t = tf.placeholder(tf.float32,
                                  shape=[None, height, width, channels],
                                  name=network_name + '_state'
                                 )
        self.a_t = tf.placeholder(tf.int32,
                                  shape=[None],
                                  name=network_name + '_action'
                                 )
        self.Q_target = tf.placeholder(tf.float32,
                                       shape=[None, num_actions],
                                       name=network_name + '_Q_target'
                                      )

        self.input_layer = tf.reshape(self.s_t,
                                      [-1, height, width, channels],
                                      name=network_name + '_input_layer'
                                     )
        self.conv1 = tf.layers.conv2d(inputs=self.input_layer,
                                      filters=32,
                                      kernel_size=[8, 8],
                                      strides=[4, 4],
                                      padding='valid',
                                      activation=tf.nn.relu,
                                      name=network_name + '_conv1_layer'
                                     )
        self.conv2 = tf.layers.conv2d(inputs=self.conv1,
                                      filters=64,
                                      kernel_size=[4, 4],
                                      strides=[2, 2],
                                      padding='valid',
                                      activation=tf.nn.relu,
                                      name=network_name + '_conv2_layer'
                                     )
        self.flatten = tf.layers.flatten(self.conv2,
                                         name=network_name + '_flatten'
                                        )
        self.dense = tf.layers.dense(inputs=self.flatten,
                                      units=512,
                                      activation=tf.nn.relu,
                                      name=network_name + '_dense1_layer'
                                    )
        self.Q_values = tf.layers.dense(inputs=self.dense,
                                        units=len(actions),
                                        activation=None,
                                        name=network_name + '_output_layer'
                                       )        
    
        self.best_action = tf.argmax(self.Q_values, 1)
        self.loss = tf.losses.mean_squared_error(self.Q_values,
                                                 self.Q_target)
        self.adam = tf.train.AdamOptimizer(learning_rate=self.learning_rate,
                                           name=network_name + '_adam'
                                          )
        self.train = self.adam.minimize(self.loss)
        
    def update_lr(self):
        self.learning_rate = 0.98*self.learning_rate
        
        return self.learning_rate

    def calculate_loss(self, session, s, q):
        L, _ = session.run([self.loss, self.train],
                           feed_dict={self.s_t: s,
                                      self.Q_target: q})
    
        return L

#Return the array of Q-values and the best action associated with a given state

    def get_Q_values(self, session, s):
        Q = session.run(self.Q_values,
                        feed_dict={self.s_t: s})

        return Q
    
    def choose_action(self, session, s):
        a = session.run(self.best_action,
                        feed_dict={self.s_t: s})
    
        return a
    
#Create a list of variable update operations

def update_graph(variables):
    update_ops = list()
    
#Assign weight values from the network created first to the one created second
    
    for idx, variable in enumerate(variables[:len(variables)//2]):
        op = variable.assign(variables[idx + len(variables)//2].value())
        update_ops.append(op)
    
    return update_ops

#Update the target network parameters to match those of the online network

def update_target(ops, session):
    for op in update_ops:
        session.run(op)


In [5]:
#For each time step, collect the following data:
#The current game state
#The action that was taken taken
#The reward obtained from the chosen action
#The next game state (store the first game state if the previous action ends the episode)
#A variable indicating whether the episode is over yet


tf.reset_default_graph()

#Instantiate the target network before the online network so that it's updated correctly

target_net = Q_network(network_name='target',
                       learning_rate=learning_rate,
                       height=height,
                       width=width,
                       channels=channels)
DQN = Q_network(network_name='online',
                learning_rate=learning_rate,
                height=height,
                width=width,
                channels=channels)

exp_buffer = Buffer(size=buffer_size)
session = tf.Session()
saver = tf.train.Saver(max_to_keep=num_ckpts, reshape=True)
weights = tf.trainable_variables()

update_ops = update_graph(weights)

if load_model == True:
    print('Loading model from', model_dir)
    tf.train.Saver().restore(session, model_dir)
    
elif load_model == False:
    session.run(tf.global_variables_initializer())

game.set_sound_enabled(False)
game.init()

t = 0
epoch_rank = list()


In [6]:
#Accumulate experiences in the buffer using an epsilon-greedy strategy with three training phases

for epoch in range(epochs):
    epoch_rewards = list()
    
    for step in trange(steps_per_epoch, leave=True):
        experience = list()
        game.new_episode()
        
        while not game.is_episode_finished():
            state = game.get_state()
            
#Substitute an array of zeros for the depth buffer if that setting is disabled

            if game.is_depth_buffer_enabled() == False:
                state1_buffer = np.moveaxis(state.screen_buffer, 0, 2)
            else:
                depth_buffer = state.depth_buffer
                state1_buffer = np.stack((state.screen_buffer,
                                          depth_buffer), axis=-1)
                
            state1 = preprocess(state1_buffer, down_sample_ratio)
            
#Explore the environment by choosing random actions with 100% probability for the first phase of training

            if epoch < 0.1*epochs:
                action = np.random.randint(num_actions)
            
#Increase the probability of greedily choosing an action by a constant amount at each epoch in the second phase
            
            elif epoch < 0.9*epochs:
                epsilon = start_epsilon - (epoch + 1 - 0.2*epochs)*(start_epsilon-end_epsilon)/(0.7*epochs)
            
                if np.random.uniform(0, 1) <= epsilon:
                    action = np.random.randint(num_actions)
                else:
                    action = DQN.choose_action(session, state1)[0]

#Select a random action with 10% probability in the final phase of training
                
            else:
                if np.random.uniform(0, 1) <= end_epsilon:
                    action = np.random.randint(num_actions)
                else:
                    action = DQN.choose_action(session, state1)[0]

            reward = game.make_action(actions[action], frame_delay)
            done = game.is_episode_finished()
            
            if done == False:
                state = game.get_state()
                
                if game.is_depth_buffer_enabled() == False:
                    state2_buffer = np.moveaxis(state.screen_buffer, 0, 2)
                else:
                    depth_buffer = state.depth_buffer
                    state2_buffer = np.stack((state.screen_buffer,
                                              depth_buffer), axis=-1)
                
                state2 = preprocess(state2_buffer, down_sample_ratio)
                
            elif done == True:
                state2 = state1
        
#Add the experience obtained from each time step to the buffer

            t += 1
            exp_buffer.add_experience((state1, action, reward, state2, done))
        
#Sample a minibatch from the buffer if there are enough experiences in the buffer

        if exp_buffer.length > batch_size:
            s1, a, r, s2, terminal = exp_buffer.sample_buffer(batch_size)
            
#Get the target values from the target Q-network
            
            target_Q = np.max(target_net.get_Q_values(session, s2), axis=1)
            
#Train the online Q-network by using a minibatch to update the action-value function
            
            Q2 = DQN.get_Q_values(session, s1)
            Q2[np.arange(batch_size), a] = r + gamma*(1 - terminal)*target_Q
            DQN.calculate_loss(session, s1, Q2)
            
        epoch_rewards.append(game.get_total_reward())
        
#Increase the discount factor at each epoch until it reaches 0.99
    
    if gamma < 0.99:
        gamma = 1-.98*(1-gamma)
    elif gamma >= 0.99:
        gamma = 0.99
        
#Decrease the learning rate at each epoch

    DQN.update_lr()
    target_net.update_lr()
    
    print('Epoch {} Mean Reward: {}'.format(epoch + 1, np.mean(epoch_rewards)))
    
#Update the target network every 10 epochs
    
    if (epoch + 1) % 10 == 0 and epoch > 0:
        update_target(update_ops, session)
        
#Save the model and test the agent for 10 episodes every 20 epochs
    
    if (epoch + 1) % 10 == 0 and epoch > 0:
        if save_model == True:
            checkpoint = model_dir + '-' + str(epoch + 1)
            print('Epoch {} Model saved to {}'.format(epoch + 1, model_dir))
            saver.save(session, model_dir, global_step=epoch + 1)
            
        update_target(update_ops, session)
        
        print('Epoch {} test:'.format(epoch + 1))
        test_reward = test_agent(DQN, num_episodes=20,
                                 training=True,
                                 load_model=False,
                                 depth=False,
                                 session=session,
                                 model_dir=model_dir)
        print('Epoch {} Average Test Reward: {}'.format(epoch + 1, test_reward))
        
        epoch_rank.append((test_reward, epoch + 1))
        
#Return a sorted list of epoch checkpoints based on average test episode reward
        
print(sorted(epoch_rank, reverse=True))
print('{} time steps experienced during training'.format(t))
game.close()
    

100%|██████████| 2000/2000 [15:29<00:00,  2.15it/s]


Epoch 1 Mean Reward: 393.456


100%|██████████| 2000/2000 [15:22<00:00,  2.17it/s]


Epoch 2 Mean Reward: 395.3495


100%|██████████| 2000/2000 [13:15<00:00,  2.52it/s]


Epoch 3 Mean Reward: 396.365


100%|██████████| 2000/2000 [12:58<00:00,  2.57it/s]


Epoch 4 Mean Reward: 392.6765


100%|██████████| 2000/2000 [15:31<00:00,  2.15it/s]


Epoch 5 Mean Reward: 393.8585


100%|██████████| 2000/2000 [14:13<00:00,  2.34it/s]


Epoch 6 Mean Reward: 390.7815


100%|██████████| 2000/2000 [13:52<00:00,  2.40it/s]


Epoch 7 Mean Reward: 384.8125


100%|██████████| 2000/2000 [12:40<00:00,  2.63it/s]


Epoch 8 Mean Reward: 393.3695


100%|██████████| 2000/2000 [11:03<00:00,  3.02it/s]


Epoch 9 Mean Reward: 393.6035


100%|██████████| 2000/2000 [10:38<00:00,  3.13it/s]


Epoch 10 Mean Reward: 387.751
Epoch 10 Model saved to ./checkpoints/take_cover.ckpt
Epoch 10 test:
Test Episode 1 Reward: 199.0
Test Episode 2 Reward: 232.0
Test Episode 3 Reward: 232.0
Test Episode 4 Reward: 181.0
Test Episode 5 Reward: 157.0
Test Episode 6 Reward: 232.0
Test Episode 7 Reward: 200.0
Test Episode 8 Reward: 232.0
Test Episode 9 Reward: 232.0
Test Episode 10 Reward: 232.0
Test Episode 11 Reward: 192.0
Test Episode 12 Reward: 232.0
Test Episode 13 Reward: 282.0
Test Episode 14 Reward: 298.0
Test Episode 15 Reward: 232.0
Test Episode 16 Reward: 186.0
Test Episode 17 Reward: 232.0
Test Episode 18 Reward: 258.0
Test Episode 19 Reward: 232.0
Test Episode 20 Reward: 232.0
Epoch 10 Average Test Reward: 225.25


100%|██████████| 2000/2000 [10:41<00:00,  3.12it/s]


Epoch 11 Mean Reward: 390.0305


100%|██████████| 2000/2000 [10:55<00:00,  3.05it/s]


Epoch 12 Mean Reward: 399.3755


100%|██████████| 2000/2000 [12:52<00:00,  2.59it/s]


Epoch 13 Mean Reward: 394.8005


100%|██████████| 2000/2000 [14:02<00:00,  2.37it/s]


Epoch 14 Mean Reward: 396.446


100%|██████████| 2000/2000 [12:24<00:00,  2.69it/s]


Epoch 15 Mean Reward: 392.2795


100%|██████████| 2000/2000 [11:52<00:00,  2.81it/s]


Epoch 16 Mean Reward: 391.8265


100%|██████████| 2000/2000 [11:42<00:00,  2.85it/s]


Epoch 17 Mean Reward: 394.9615


100%|██████████| 2000/2000 [11:31<00:00,  2.89it/s]


Epoch 18 Mean Reward: 385.1215


100%|██████████| 2000/2000 [11:23<00:00,  2.92it/s]


Epoch 19 Mean Reward: 387.38


100%|██████████| 2000/2000 [11:43<00:00,  2.84it/s]


Epoch 20 Mean Reward: 399.5855
Epoch 20 Model saved to ./checkpoints/take_cover.ckpt
Epoch 20 test:
Test Episode 1 Reward: 153.0
Test Episode 2 Reward: 283.0
Test Episode 3 Reward: 282.0
Test Episode 4 Reward: 269.0
Test Episode 5 Reward: 236.0
Test Episode 6 Reward: 283.0
Test Episode 7 Reward: 151.0
Test Episode 8 Reward: 258.0
Test Episode 9 Reward: 208.0
Test Episode 10 Reward: 239.0
Test Episode 11 Reward: 151.0
Test Episode 12 Reward: 119.0
Test Episode 13 Reward: 140.0
Test Episode 14 Reward: 283.0
Test Episode 15 Reward: 252.0
Test Episode 16 Reward: 283.0
Test Episode 17 Reward: 141.0
Test Episode 18 Reward: 283.0
Test Episode 19 Reward: 267.0
Test Episode 20 Reward: 142.0
Epoch 20 Average Test Reward: 221.15


100%|██████████| 2000/2000 [11:40<00:00,  2.85it/s]


Epoch 21 Mean Reward: 396.6985


100%|██████████| 2000/2000 [11:41<00:00,  2.85it/s]


Epoch 22 Mean Reward: 399.322


100%|██████████| 2000/2000 [11:24<00:00,  2.92it/s]


Epoch 23 Mean Reward: 388.5325


100%|██████████| 2000/2000 [11:23<00:00,  2.93it/s]


Epoch 24 Mean Reward: 388.0705


100%|██████████| 2000/2000 [11:30<00:00,  2.90it/s]


Epoch 25 Mean Reward: 392.356


100%|██████████| 2000/2000 [11:29<00:00,  2.90it/s]


Epoch 26 Mean Reward: 392.001


100%|██████████| 2000/2000 [11:31<00:00,  2.89it/s]


Epoch 27 Mean Reward: 393.0005


100%|██████████| 2000/2000 [11:44<00:00,  2.84it/s]


Epoch 28 Mean Reward: 400.6835


100%|██████████| 2000/2000 [11:28<00:00,  2.91it/s]


Epoch 29 Mean Reward: 391.177


100%|██████████| 2000/2000 [11:35<00:00,  2.87it/s]


Epoch 30 Mean Reward: 390.7915
Epoch 30 Model saved to ./checkpoints/take_cover.ckpt
Epoch 30 test:
Test Episode 1 Reward: 188.0
Test Episode 2 Reward: 232.0
Test Episode 3 Reward: 232.0
Test Episode 4 Reward: 113.0
Test Episode 5 Reward: 232.0
Test Episode 6 Reward: 232.0
Test Episode 7 Reward: 232.0
Test Episode 8 Reward: 232.0
Test Episode 9 Reward: 232.0
Test Episode 10 Reward: 232.0
Test Episode 11 Reward: 232.0
Test Episode 12 Reward: 232.0
Test Episode 13 Reward: 232.0
Test Episode 14 Reward: 284.0
Test Episode 15 Reward: 231.0
Test Episode 16 Reward: 125.0
Test Episode 17 Reward: 232.0
Test Episode 18 Reward: 116.0
Test Episode 19 Reward: 232.0
Test Episode 20 Reward: 232.0
Epoch 30 Average Test Reward: 215.25


100%|██████████| 2000/2000 [11:35<00:00,  2.88it/s]


Epoch 31 Mean Reward: 386.4335


100%|██████████| 2000/2000 [11:32<00:00,  2.89it/s]


Epoch 32 Mean Reward: 392.1005


100%|██████████| 2000/2000 [11:28<00:00,  2.90it/s]


Epoch 33 Mean Reward: 391.026


100%|██████████| 2000/2000 [11:25<00:00,  2.92it/s]


Epoch 34 Mean Reward: 388.1725


100%|██████████| 2000/2000 [11:37<00:00,  2.87it/s]


Epoch 35 Mean Reward: 395.379


100%|██████████| 2000/2000 [11:34<00:00,  2.88it/s]


Epoch 36 Mean Reward: 393.8925


100%|██████████| 2000/2000 [11:26<00:00,  2.91it/s]


Epoch 37 Mean Reward: 389.5345


100%|██████████| 2000/2000 [11:22<00:00,  2.93it/s]


Epoch 38 Mean Reward: 383.2935


100%|██████████| 2000/2000 [11:31<00:00,  2.89it/s]


Epoch 39 Mean Reward: 387.194


100%|██████████| 2000/2000 [11:08<00:00,  2.99it/s]


Epoch 40 Mean Reward: 378.646
Epoch 40 Model saved to ./checkpoints/take_cover.ckpt
Epoch 40 test:
Test Episode 1 Reward: 219.0
Test Episode 2 Reward: 252.0
Test Episode 3 Reward: 174.0
Test Episode 4 Reward: 184.0
Test Episode 5 Reward: 168.0
Test Episode 6 Reward: 200.0
Test Episode 7 Reward: 219.0
Test Episode 8 Reward: 138.0
Test Episode 9 Reward: 219.0
Test Episode 10 Reward: 148.0
Test Episode 11 Reward: 219.0
Test Episode 12 Reward: 168.0
Test Episode 13 Reward: 112.0
Test Episode 14 Reward: 219.0
Test Episode 15 Reward: 187.0
Test Episode 16 Reward: 134.0
Test Episode 17 Reward: 112.0
Test Episode 18 Reward: 136.0
Test Episode 19 Reward: 219.0
Test Episode 20 Reward: 219.0
Epoch 40 Average Test Reward: 182.3


100%|██████████| 2000/2000 [11:36<00:00,  2.87it/s]


Epoch 41 Mean Reward: 395.0215


100%|██████████| 2000/2000 [11:37<00:00,  2.87it/s]


Epoch 42 Mean Reward: 394.643


100%|██████████| 2000/2000 [11:24<00:00,  2.92it/s]


Epoch 43 Mean Reward: 387.8695


100%|██████████| 2000/2000 [11:39<00:00,  2.86it/s]


Epoch 44 Mean Reward: 394.287


100%|██████████| 2000/2000 [11:27<00:00,  2.91it/s]


Epoch 45 Mean Reward: 389.887


100%|██████████| 2000/2000 [11:38<00:00,  2.86it/s]


Epoch 46 Mean Reward: 396.0455


100%|██████████| 2000/2000 [11:30<00:00,  2.90it/s]


Epoch 47 Mean Reward: 386.8825


100%|██████████| 2000/2000 [11:32<00:00,  2.89it/s]


Epoch 48 Mean Reward: 390.496


100%|██████████| 2000/2000 [11:34<00:00,  2.88it/s]


Epoch 49 Mean Reward: 393.89


100%|██████████| 2000/2000 [11:33<00:00,  2.89it/s]


Epoch 50 Mean Reward: 393.1875
Epoch 50 Model saved to ./checkpoints/take_cover.ckpt
Epoch 50 test:
Test Episode 1 Reward: 232.0
Test Episode 2 Reward: 169.0
Test Episode 3 Reward: 211.0
Test Episode 4 Reward: 278.0
Test Episode 5 Reward: 232.0
Test Episode 6 Reward: 175.0
Test Episode 7 Reward: 157.0
Test Episode 8 Reward: 232.0
Test Episode 9 Reward: 224.0
Test Episode 10 Reward: 250.0
Test Episode 11 Reward: 232.0
Test Episode 12 Reward: 232.0
Test Episode 13 Reward: 205.0
Test Episode 14 Reward: 232.0
Test Episode 15 Reward: 232.0
Test Episode 16 Reward: 232.0
Test Episode 17 Reward: 232.0
Test Episode 18 Reward: 341.0
Test Episode 19 Reward: 232.0
Test Episode 20 Reward: 242.0
Epoch 50 Average Test Reward: 228.6


100%|██████████| 2000/2000 [11:39<00:00,  2.86it/s]


Epoch 51 Mean Reward: 395.955


100%|██████████| 2000/2000 [11:29<00:00,  2.90it/s]


Epoch 52 Mean Reward: 391.0335


100%|██████████| 2000/2000 [11:48<00:00,  2.82it/s]


Epoch 53 Mean Reward: 402.106


100%|██████████| 2000/2000 [11:27<00:00,  2.91it/s]


Epoch 54 Mean Reward: 389.387


100%|██████████| 2000/2000 [11:36<00:00,  2.87it/s]


Epoch 55 Mean Reward: 389.729


100%|██████████| 2000/2000 [11:48<00:00,  2.82it/s]


Epoch 56 Mean Reward: 393.563


100%|██████████| 2000/2000 [13:56<00:00,  2.39it/s]


Epoch 57 Mean Reward: 394.1355


100%|██████████| 2000/2000 [12:39<00:00,  2.63it/s]


Epoch 58 Mean Reward: 387.5805


100%|██████████| 2000/2000 [15:25<00:00,  2.16it/s]


Epoch 59 Mean Reward: 384.597


100%|██████████| 2000/2000 [15:01<00:00,  2.22it/s]


Epoch 60 Mean Reward: 393.4455
Epoch 60 Model saved to ./checkpoints/take_cover.ckpt
Epoch 60 test:
Test Episode 1 Reward: 329.0
Test Episode 2 Reward: 336.0
Test Episode 3 Reward: 457.0
Test Episode 4 Reward: 364.0
Test Episode 5 Reward: 329.0
Test Episode 6 Reward: 329.0
Test Episode 7 Reward: 254.0
Test Episode 8 Reward: 329.0
Test Episode 9 Reward: 318.0
Test Episode 10 Reward: 168.0
Test Episode 11 Reward: 208.0
Test Episode 12 Reward: 329.0
Test Episode 13 Reward: 329.0
Test Episode 14 Reward: 329.0
Test Episode 15 Reward: 329.0
Test Episode 16 Reward: 177.0
Test Episode 17 Reward: 329.0
Test Episode 18 Reward: 329.0
Test Episode 19 Reward: 329.0
Test Episode 20 Reward: 490.0
Epoch 60 Average Test Reward: 319.55


100%|██████████| 2000/2000 [13:58<00:00,  2.39it/s]


Epoch 61 Mean Reward: 396.1525


100%|██████████| 2000/2000 [17:13<00:00,  1.94it/s]


Epoch 62 Mean Reward: 400.091


100%|██████████| 2000/2000 [16:48<00:00,  1.98it/s]


Epoch 63 Mean Reward: 397.1765


100%|██████████| 2000/2000 [16:34<00:00,  2.01it/s]


Epoch 64 Mean Reward: 390.5595


100%|██████████| 2000/2000 [12:42<00:00,  2.62it/s]


Epoch 65 Mean Reward: 392.7895


100%|██████████| 2000/2000 [11:32<00:00,  2.89it/s]


Epoch 66 Mean Reward: 391.523


100%|██████████| 2000/2000 [11:36<00:00,  2.87it/s]


Epoch 67 Mean Reward: 393.6135


100%|██████████| 2000/2000 [11:57<00:00,  2.79it/s]


Epoch 68 Mean Reward: 388.522


100%|██████████| 2000/2000 [11:53<00:00,  2.80it/s]


Epoch 69 Mean Reward: 387.191


100%|██████████| 2000/2000 [13:35<00:00,  2.45it/s]


Epoch 70 Mean Reward: 387.4055
Epoch 70 Model saved to ./checkpoints/take_cover.ckpt
Epoch 70 test:
Test Episode 1 Reward: 344.0
Test Episode 2 Reward: 533.0
Test Episode 3 Reward: 437.0
Test Episode 4 Reward: 413.0
Test Episode 5 Reward: 524.0
Test Episode 6 Reward: 346.0
Test Episode 7 Reward: 533.0
Test Episode 8 Reward: 353.0
Test Episode 9 Reward: 533.0
Test Episode 10 Reward: 533.0
Test Episode 11 Reward: 284.0
Test Episode 12 Reward: 533.0
Test Episode 13 Reward: 533.0
Test Episode 14 Reward: 397.0
Test Episode 15 Reward: 295.0
Test Episode 16 Reward: 533.0
Test Episode 17 Reward: 533.0
Test Episode 18 Reward: 533.0
Test Episode 19 Reward: 533.0
Test Episode 20 Reward: 533.0
Epoch 70 Average Test Reward: 462.8


100%|██████████| 2000/2000 [15:18<00:00,  2.18it/s]


Epoch 71 Mean Reward: 390.7875


100%|██████████| 2000/2000 [13:44<00:00,  2.43it/s]


Epoch 72 Mean Reward: 400.2195


100%|██████████| 2000/2000 [14:46<00:00,  2.26it/s]


Epoch 73 Mean Reward: 383.6665


100%|██████████| 2000/2000 [15:00<00:00,  2.22it/s]


Epoch 74 Mean Reward: 388.1445


100%|██████████| 2000/2000 [11:42<00:00,  2.85it/s]


Epoch 75 Mean Reward: 395.5025


100%|██████████| 2000/2000 [12:04<00:00,  2.76it/s]


Epoch 76 Mean Reward: 391.3235


100%|██████████| 2000/2000 [11:45<00:00,  2.84it/s]


Epoch 77 Mean Reward: 392.1585


100%|██████████| 2000/2000 [11:55<00:00,  2.80it/s]


Epoch 78 Mean Reward: 400.623


100%|██████████| 2000/2000 [14:06<00:00,  2.36it/s]


Epoch 79 Mean Reward: 400.708


100%|██████████| 2000/2000 [13:45<00:00,  2.42it/s]


Epoch 80 Mean Reward: 394.161
Epoch 80 Model saved to ./checkpoints/take_cover.ckpt
Epoch 80 test:
Test Episode 1 Reward: 569.0
Test Episode 2 Reward: 599.0
Test Episode 3 Reward: 569.0
Test Episode 4 Reward: 280.0
Test Episode 5 Reward: 569.0
Test Episode 6 Reward: 386.0
Test Episode 7 Reward: 569.0
Test Episode 8 Reward: 454.0
Test Episode 9 Reward: 569.0
Test Episode 10 Reward: 384.0
Test Episode 11 Reward: 233.0
Test Episode 12 Reward: 320.0
Test Episode 13 Reward: 471.0
Test Episode 14 Reward: 343.0
Test Episode 15 Reward: 569.0
Test Episode 16 Reward: 377.0
Test Episode 17 Reward: 349.0
Test Episode 18 Reward: 569.0
Test Episode 19 Reward: 100.0
Test Episode 20 Reward: 147.0
Epoch 80 Average Test Reward: 421.3


100%|██████████| 2000/2000 [13:12<00:00,  2.52it/s]


Epoch 81 Mean Reward: 393.4165


100%|██████████| 2000/2000 [13:45<00:00,  2.42it/s]


Epoch 82 Mean Reward: 396.5675


100%|██████████| 2000/2000 [11:40<00:00,  2.85it/s]


Epoch 83 Mean Reward: 389.2815


100%|██████████| 2000/2000 [11:35<00:00,  2.87it/s]


Epoch 84 Mean Reward: 395.2915


100%|██████████| 2000/2000 [11:35<00:00,  2.87it/s]


Epoch 85 Mean Reward: 388.7755


100%|██████████| 2000/2000 [11:50<00:00,  2.82it/s]


Epoch 86 Mean Reward: 385.73


100%|██████████| 2000/2000 [11:30<00:00,  2.90it/s]


Epoch 87 Mean Reward: 387.614


100%|██████████| 2000/2000 [11:35<00:00,  2.88it/s]


Epoch 88 Mean Reward: 392.9885


100%|██████████| 2000/2000 [11:27<00:00,  2.91it/s]


Epoch 89 Mean Reward: 390.3425


100%|██████████| 2000/2000 [11:35<00:00,  2.87it/s]


Epoch 90 Mean Reward: 395.186
Epoch 90 Model saved to ./checkpoints/take_cover.ckpt
Epoch 90 test:
Test Episode 1 Reward: 713.0
Test Episode 2 Reward: 413.0
Test Episode 3 Reward: 476.0
Test Episode 4 Reward: 713.0
Test Episode 5 Reward: 713.0
Test Episode 6 Reward: 327.0
Test Episode 7 Reward: 325.0
Test Episode 8 Reward: 700.0
Test Episode 9 Reward: 223.0
Test Episode 10 Reward: 713.0
Test Episode 11 Reward: 713.0
Test Episode 12 Reward: 713.0
Test Episode 13 Reward: 713.0
Test Episode 14 Reward: 316.0
Test Episode 15 Reward: 713.0
Test Episode 16 Reward: 713.0
Test Episode 17 Reward: 593.0
Test Episode 18 Reward: 713.0
Test Episode 19 Reward: 361.0
Test Episode 20 Reward: 642.0
Epoch 90 Average Test Reward: 575.3


100%|██████████| 2000/2000 [11:38<00:00,  2.86it/s]


Epoch 91 Mean Reward: 396.045


100%|██████████| 2000/2000 [11:30<00:00,  2.90it/s]


Epoch 92 Mean Reward: 391.5185


100%|██████████| 2000/2000 [11:36<00:00,  2.87it/s]


Epoch 93 Mean Reward: 395.3005


100%|██████████| 2000/2000 [11:34<00:00,  2.88it/s]


Epoch 94 Mean Reward: 394.0255


100%|██████████| 2000/2000 [11:33<00:00,  2.89it/s]


Epoch 95 Mean Reward: 393.109


100%|██████████| 2000/2000 [11:25<00:00,  2.92it/s]


Epoch 96 Mean Reward: 388.7505


100%|██████████| 2000/2000 [11:24<00:00,  2.92it/s]


Epoch 97 Mean Reward: 388.6165


100%|██████████| 2000/2000 [11:36<00:00,  2.87it/s]


Epoch 98 Mean Reward: 394.8


100%|██████████| 2000/2000 [11:36<00:00,  2.87it/s]


Epoch 99 Mean Reward: 395.6285


100%|██████████| 2000/2000 [11:45<00:00,  2.83it/s]


Epoch 100 Mean Reward: 400.4885
Epoch 100 Model saved to ./checkpoints/take_cover.ckpt
Epoch 100 test:
Test Episode 1 Reward: 715.0
Test Episode 2 Reward: 715.0
Test Episode 3 Reward: 715.0
Test Episode 4 Reward: 715.0
Test Episode 5 Reward: 715.0
Test Episode 6 Reward: 215.0
Test Episode 7 Reward: 715.0
Test Episode 8 Reward: 715.0
Test Episode 9 Reward: 430.0
Test Episode 10 Reward: 715.0
Test Episode 11 Reward: 484.0
Test Episode 12 Reward: 567.0
Test Episode 13 Reward: 715.0
Test Episode 14 Reward: 481.0
Test Episode 15 Reward: 257.0
Test Episode 16 Reward: 715.0
Test Episode 17 Reward: 178.0
Test Episode 18 Reward: 715.0
Test Episode 19 Reward: 202.0
Test Episode 20 Reward: 133.0
Epoch 100 Average Test Reward: 540.6


100%|██████████| 2000/2000 [11:44<00:00,  2.84it/s]


Epoch 101 Mean Reward: 399.4955


100%|██████████| 2000/2000 [11:24<00:00,  2.92it/s]


Epoch 102 Mean Reward: 387.532


100%|██████████| 2000/2000 [11:40<00:00,  2.85it/s]


Epoch 103 Mean Reward: 397.364


100%|██████████| 2000/2000 [11:34<00:00,  2.88it/s]


Epoch 104 Mean Reward: 392.4375


100%|██████████| 2000/2000 [11:22<00:00,  2.93it/s]


Epoch 105 Mean Reward: 386.419


100%|██████████| 2000/2000 [11:25<00:00,  2.92it/s]


Epoch 106 Mean Reward: 386.7095


100%|██████████| 2000/2000 [11:41<00:00,  2.85it/s]


Epoch 107 Mean Reward: 397.6175


100%|██████████| 2000/2000 [11:35<00:00,  2.88it/s]


Epoch 108 Mean Reward: 394.546


100%|██████████| 2000/2000 [11:39<00:00,  2.86it/s]


Epoch 109 Mean Reward: 397.2265


100%|██████████| 2000/2000 [11:35<00:00,  2.87it/s]


Epoch 110 Mean Reward: 395.264
Epoch 110 Model saved to ./checkpoints/take_cover.ckpt
Epoch 110 test:
Test Episode 1 Reward: 288.0
Test Episode 2 Reward: 288.0
Test Episode 3 Reward: 288.0
Test Episode 4 Reward: 189.0
Test Episode 5 Reward: 288.0
Test Episode 6 Reward: 288.0
Test Episode 7 Reward: 288.0
Test Episode 8 Reward: 167.0
Test Episode 9 Reward: 288.0
Test Episode 10 Reward: 288.0
Test Episode 11 Reward: 288.0
Test Episode 12 Reward: 318.0
Test Episode 13 Reward: 155.0
Test Episode 14 Reward: 288.0
Test Episode 15 Reward: 288.0
Test Episode 16 Reward: 288.0
Test Episode 17 Reward: 288.0
Test Episode 18 Reward: 288.0
Test Episode 19 Reward: 130.0
Test Episode 20 Reward: 288.0
Epoch 110 Average Test Reward: 263.95


100%|██████████| 2000/2000 [11:49<00:00,  2.82it/s]


Epoch 111 Mean Reward: 402.627


100%|██████████| 2000/2000 [11:34<00:00,  2.88it/s]


Epoch 112 Mean Reward: 393.9485


100%|██████████| 2000/2000 [11:40<00:00,  2.85it/s]


Epoch 113 Mean Reward: 397.8735


100%|██████████| 2000/2000 [11:31<00:00,  2.89it/s]


Epoch 114 Mean Reward: 392.6565


100%|██████████| 2000/2000 [11:31<00:00,  2.89it/s]


Epoch 115 Mean Reward: 389.791


100%|██████████| 2000/2000 [11:29<00:00,  2.90it/s]


Epoch 116 Mean Reward: 387.38


100%|██████████| 2000/2000 [11:42<00:00,  2.85it/s]


Epoch 117 Mean Reward: 396.357


100%|██████████| 2000/2000 [12:14<00:00,  2.72it/s]


Epoch 118 Mean Reward: 393.612


100%|██████████| 2000/2000 [13:07<00:00,  2.54it/s]


Epoch 119 Mean Reward: 401.6435


100%|██████████| 2000/2000 [12:34<00:00,  2.65it/s]


Epoch 120 Mean Reward: 401.377
Epoch 120 Model saved to ./checkpoints/take_cover.ckpt
Epoch 120 test:
Test Episode 1 Reward: 510.0
Test Episode 2 Reward: 358.0
Test Episode 3 Reward: 224.0
Test Episode 4 Reward: 202.0
Test Episode 5 Reward: 540.0
Test Episode 6 Reward: 540.0
Test Episode 7 Reward: 540.0
Test Episode 8 Reward: 112.0
Test Episode 9 Reward: 540.0
Test Episode 10 Reward: 540.0
Test Episode 11 Reward: 540.0
Test Episode 12 Reward: 789.0
Test Episode 13 Reward: 492.0
Test Episode 14 Reward: 541.0
Test Episode 15 Reward: 140.0
Test Episode 16 Reward: 540.0
Test Episode 17 Reward: 253.0
Test Episode 18 Reward: 514.0
Test Episode 19 Reward: 222.0
Test Episode 20 Reward: 540.0
Epoch 120 Average Test Reward: 433.85


100%|██████████| 2000/2000 [11:44<00:00,  2.84it/s]


Epoch 121 Mean Reward: 399.6695


100%|██████████| 2000/2000 [11:48<00:00,  2.82it/s]


Epoch 122 Mean Reward: 402.452


100%|██████████| 2000/2000 [11:51<00:00,  2.81it/s]


Epoch 123 Mean Reward: 403.881


100%|██████████| 2000/2000 [11:40<00:00,  2.86it/s]


Epoch 124 Mean Reward: 396.9155


100%|██████████| 2000/2000 [11:40<00:00,  2.86it/s]


Epoch 125 Mean Reward: 397.08


100%|██████████| 2000/2000 [11:54<00:00,  2.80it/s]


Epoch 126 Mean Reward: 405.012


100%|██████████| 2000/2000 [11:40<00:00,  2.85it/s]


Epoch 127 Mean Reward: 397.4895


100%|██████████| 2000/2000 [11:52<00:00,  2.81it/s]


Epoch 128 Mean Reward: 403.5715


100%|██████████| 2000/2000 [11:52<00:00,  2.81it/s]


Epoch 129 Mean Reward: 403.327


100%|██████████| 2000/2000 [11:52<00:00,  2.81it/s]


Epoch 130 Mean Reward: 403.2595
Epoch 130 Model saved to ./checkpoints/take_cover.ckpt
Epoch 130 test:
Test Episode 1 Reward: 497.0
Test Episode 2 Reward: 104.0
Test Episode 3 Reward: 527.0
Test Episode 4 Reward: 243.0
Test Episode 5 Reward: 581.0
Test Episode 6 Reward: 527.0
Test Episode 7 Reward: 243.0
Test Episode 8 Reward: 527.0
Test Episode 9 Reward: 527.0
Test Episode 10 Reward: 527.0
Test Episode 11 Reward: 446.0
Test Episode 12 Reward: 232.0
Test Episode 13 Reward: 615.0
Test Episode 14 Reward: 527.0
Test Episode 15 Reward: 527.0
Test Episode 16 Reward: 324.0
Test Episode 17 Reward: 502.0
Test Episode 18 Reward: 347.0
Test Episode 19 Reward: 433.0
Test Episode 20 Reward: 527.0
Epoch 130 Average Test Reward: 439.15


100%|██████████| 2000/2000 [11:39<00:00,  2.86it/s]


Epoch 131 Mean Reward: 397.3045


100%|██████████| 2000/2000 [11:46<00:00,  2.83it/s]


Epoch 132 Mean Reward: 400.4485


100%|██████████| 2000/2000 [11:36<00:00,  2.87it/s]


Epoch 133 Mean Reward: 394.29


100%|██████████| 2000/2000 [11:49<00:00,  2.82it/s]


Epoch 134 Mean Reward: 402.1545


100%|██████████| 2000/2000 [12:12<00:00,  2.73it/s]


Epoch 135 Mean Reward: 404.412


100%|██████████| 2000/2000 [11:54<00:00,  2.80it/s]


Epoch 136 Mean Reward: 401.7445


100%|██████████| 2000/2000 [11:51<00:00,  2.81it/s]


Epoch 137 Mean Reward: 402.704


100%|██████████| 2000/2000 [11:46<00:00,  2.83it/s]


Epoch 138 Mean Reward: 399.5665


100%|██████████| 2000/2000 [11:50<00:00,  2.81it/s]


Epoch 139 Mean Reward: 399.8745


100%|██████████| 2000/2000 [11:58<00:00,  2.78it/s]


Epoch 140 Mean Reward: 405.0495
Epoch 140 Model saved to ./checkpoints/take_cover.ckpt
Epoch 140 test:
Test Episode 1 Reward: 345.0
Test Episode 2 Reward: 196.0
Test Episode 3 Reward: 330.0
Test Episode 4 Reward: 317.0
Test Episode 5 Reward: 345.0
Test Episode 6 Reward: 259.0
Test Episode 7 Reward: 345.0
Test Episode 8 Reward: 461.0
Test Episode 9 Reward: 345.0
Test Episode 10 Reward: 345.0
Test Episode 11 Reward: 345.0
Test Episode 12 Reward: 345.0
Test Episode 13 Reward: 191.0
Test Episode 14 Reward: 261.0
Test Episode 15 Reward: 300.0
Test Episode 16 Reward: 231.0
Test Episode 17 Reward: 335.0
Test Episode 18 Reward: 278.0
Test Episode 19 Reward: 242.0
Test Episode 20 Reward: 345.0
Epoch 140 Average Test Reward: 308.05


100%|██████████| 2000/2000 [11:58<00:00,  2.78it/s]


Epoch 141 Mean Reward: 402.7965


100%|██████████| 2000/2000 [11:50<00:00,  2.82it/s]


Epoch 142 Mean Reward: 401.696


100%|██████████| 2000/2000 [11:50<00:00,  2.81it/s]


Epoch 143 Mean Reward: 401.416


100%|██████████| 2000/2000 [12:00<00:00,  2.78it/s]


Epoch 144 Mean Reward: 406.85


100%|██████████| 2000/2000 [12:05<00:00,  2.76it/s]


Epoch 145 Mean Reward: 409.844


100%|██████████| 2000/2000 [11:50<00:00,  2.82it/s]


Epoch 146 Mean Reward: 400.7195


100%|██████████| 2000/2000 [12:13<00:00,  2.73it/s]


Epoch 147 Mean Reward: 416.1515


100%|██████████| 2000/2000 [12:11<00:00,  2.74it/s]


Epoch 148 Mean Reward: 414.2975


100%|██████████| 2000/2000 [12:00<00:00,  2.78it/s]


Epoch 149 Mean Reward: 405.0385


100%|██████████| 2000/2000 [12:12<00:00,  2.73it/s]


Epoch 150 Mean Reward: 414.4475
Epoch 150 Model saved to ./checkpoints/take_cover.ckpt
Epoch 150 test:
Test Episode 1 Reward: 612.0
Test Episode 2 Reward: 178.0
Test Episode 3 Reward: 205.0
Test Episode 4 Reward: 226.0
Test Episode 5 Reward: 790.0
Test Episode 6 Reward: 278.0
Test Episode 7 Reward: 790.0
Test Episode 8 Reward: 226.0
Test Episode 9 Reward: 790.0
Test Episode 10 Reward: 790.0
Test Episode 11 Reward: 790.0
Test Episode 12 Reward: 253.0
Test Episode 13 Reward: 511.0
Test Episode 14 Reward: 790.0
Test Episode 15 Reward: 790.0
Test Episode 16 Reward: 790.0
Test Episode 17 Reward: 790.0
Test Episode 18 Reward: 790.0
Test Episode 19 Reward: 790.0
Test Episode 20 Reward: 790.0
Epoch 150 Average Test Reward: 598.45


100%|██████████| 2000/2000 [12:19<00:00,  2.70it/s]


Epoch 151 Mean Reward: 418.5165


100%|██████████| 2000/2000 [11:59<00:00,  2.78it/s]


Epoch 152 Mean Reward: 407.155


100%|██████████| 2000/2000 [12:09<00:00,  2.74it/s]


Epoch 153 Mean Reward: 408.5595


100%|██████████| 2000/2000 [12:07<00:00,  2.75it/s]


Epoch 154 Mean Reward: 411.6855


100%|██████████| 2000/2000 [11:51<00:00,  2.81it/s]


Epoch 155 Mean Reward: 400.488


100%|██████████| 2000/2000 [12:07<00:00,  2.75it/s]


Epoch 156 Mean Reward: 409.3965


100%|██████████| 2000/2000 [12:14<00:00,  2.72it/s]


Epoch 157 Mean Reward: 412.4595


100%|██████████| 2000/2000 [12:01<00:00,  2.77it/s]


Epoch 158 Mean Reward: 404.945


100%|██████████| 2000/2000 [12:14<00:00,  2.72it/s]


Epoch 159 Mean Reward: 411.208


100%|██████████| 2000/2000 [12:18<00:00,  2.71it/s]


Epoch 160 Mean Reward: 412.526
Epoch 160 Model saved to ./checkpoints/take_cover.ckpt
Epoch 160 test:
Test Episode 1 Reward: 457.0
Test Episode 2 Reward: 336.0
Test Episode 3 Reward: 277.0
Test Episode 4 Reward: 933.0
Test Episode 5 Reward: 336.0
Test Episode 6 Reward: 336.0
Test Episode 7 Reward: 564.0
Test Episode 8 Reward: 336.0
Test Episode 9 Reward: 336.0
Test Episode 10 Reward: 157.0
Test Episode 11 Reward: 219.0
Test Episode 12 Reward: 336.0
Test Episode 13 Reward: 336.0
Test Episode 14 Reward: 336.0
Test Episode 15 Reward: 336.0
Test Episode 16 Reward: 336.0
Test Episode 17 Reward: 329.0
Test Episode 18 Reward: 336.0
Test Episode 19 Reward: 336.0
Test Episode 20 Reward: 458.0
Epoch 160 Average Test Reward: 371.3


100%|██████████| 2000/2000 [12:27<00:00,  2.67it/s]


Epoch 161 Mean Reward: 417.5005


100%|██████████| 2000/2000 [12:33<00:00,  2.65it/s]


Epoch 162 Mean Reward: 423.2985


100%|██████████| 2000/2000 [12:36<00:00,  2.64it/s]


Epoch 163 Mean Reward: 419.816


100%|██████████| 2000/2000 [12:23<00:00,  2.69it/s]


Epoch 164 Mean Reward: 412.3245


100%|██████████| 2000/2000 [12:30<00:00,  2.66it/s]


Epoch 165 Mean Reward: 417.638


100%|██████████| 2000/2000 [12:34<00:00,  2.65it/s]


Epoch 166 Mean Reward: 421.2465


100%|██████████| 2000/2000 [12:31<00:00,  2.66it/s]


Epoch 167 Mean Reward: 420.2715


100%|██████████| 2000/2000 [12:41<00:00,  2.63it/s]


Epoch 168 Mean Reward: 425.1185


100%|██████████| 2000/2000 [12:38<00:00,  2.64it/s]


Epoch 169 Mean Reward: 421.418


100%|██████████| 2000/2000 [12:40<00:00,  2.63it/s]


Epoch 170 Mean Reward: 423.5335
Epoch 170 Model saved to ./checkpoints/take_cover.ckpt
Epoch 170 test:
Test Episode 1 Reward: 548.0
Test Episode 2 Reward: 548.0
Test Episode 3 Reward: 548.0
Test Episode 4 Reward: 284.0
Test Episode 5 Reward: 548.0
Test Episode 6 Reward: 548.0
Test Episode 7 Reward: 548.0
Test Episode 8 Reward: 548.0
Test Episode 9 Reward: 548.0
Test Episode 10 Reward: 156.0
Test Episode 11 Reward: 548.0
Test Episode 12 Reward: 193.0
Test Episode 13 Reward: 548.0
Test Episode 14 Reward: 548.0
Test Episode 15 Reward: 548.0
Test Episode 16 Reward: 548.0
Test Episode 17 Reward: 600.0
Test Episode 18 Reward: 548.0
Test Episode 19 Reward: 213.0
Test Episode 20 Reward: 548.0
Epoch 170 Average Test Reward: 483.3


100%|██████████| 2000/2000 [12:23<00:00,  2.69it/s]


Epoch 171 Mean Reward: 412.6535


100%|██████████| 2000/2000 [12:47<00:00,  2.61it/s]


Epoch 172 Mean Reward: 426.2385


100%|██████████| 2000/2000 [12:39<00:00,  2.63it/s]


Epoch 173 Mean Reward: 415.47


100%|██████████| 2000/2000 [12:48<00:00,  2.60it/s]


Epoch 174 Mean Reward: 422.8625


100%|██████████| 2000/2000 [12:41<00:00,  2.63it/s]


Epoch 175 Mean Reward: 420.1305


100%|██████████| 2000/2000 [12:50<00:00,  2.59it/s]


Epoch 176 Mean Reward: 425.245


100%|██████████| 2000/2000 [12:54<00:00,  2.58it/s]


Epoch 177 Mean Reward: 426.3805


100%|██████████| 2000/2000 [12:46<00:00,  2.61it/s]


Epoch 178 Mean Reward: 423.0985


100%|██████████| 2000/2000 [12:59<00:00,  2.57it/s]


Epoch 179 Mean Reward: 427.903


100%|██████████| 2000/2000 [13:07<00:00,  2.54it/s]


Epoch 180 Mean Reward: 432.4055
Epoch 180 Model saved to ./checkpoints/take_cover.ckpt
Epoch 180 test:
Test Episode 1 Reward: 415.0
Test Episode 2 Reward: 245.0
Test Episode 3 Reward: 617.0
Test Episode 4 Reward: 252.0
Test Episode 5 Reward: 347.0
Test Episode 6 Reward: 347.0
Test Episode 7 Reward: 348.0
Test Episode 8 Reward: 828.0
Test Episode 9 Reward: 347.0
Test Episode 10 Reward: 347.0
Test Episode 11 Reward: 347.0
Test Episode 12 Reward: 300.0
Test Episode 13 Reward: 347.0
Test Episode 14 Reward: 347.0
Test Episode 15 Reward: 321.0
Test Episode 16 Reward: 347.0
Test Episode 17 Reward: 196.0
Test Episode 18 Reward: 599.0
Test Episode 19 Reward: 375.0
Test Episode 20 Reward: 126.0
Epoch 180 Average Test Reward: 369.9


100%|██████████| 2000/2000 [12:35<00:00,  2.65it/s]


Epoch 181 Mean Reward: 422.339


100%|██████████| 2000/2000 [12:58<00:00,  2.57it/s]


Epoch 182 Mean Reward: 428.8215


100%|██████████| 2000/2000 [14:26<00:00,  2.31it/s]


Epoch 183 Mean Reward: 424.847


100%|██████████| 2000/2000 [16:19<00:00,  2.04it/s]


Epoch 184 Mean Reward: 434.638


100%|██████████| 2000/2000 [13:51<00:00,  2.40it/s]


Epoch 185 Mean Reward: 431.599


100%|██████████| 2000/2000 [12:43<00:00,  2.62it/s]


Epoch 186 Mean Reward: 422.982


100%|██████████| 2000/2000 [12:34<00:00,  2.65it/s]


Epoch 187 Mean Reward: 428.636


100%|██████████| 2000/2000 [12:55<00:00,  2.58it/s]


Epoch 188 Mean Reward: 432.7355


100%|██████████| 2000/2000 [12:34<00:00,  2.65it/s]


Epoch 189 Mean Reward: 427.07


100%|██████████| 2000/2000 [12:51<00:00,  2.59it/s]


Epoch 190 Mean Reward: 436.0685
Epoch 190 Model saved to ./checkpoints/take_cover.ckpt
Epoch 190 test:
Test Episode 1 Reward: 331.0
Test Episode 2 Reward: 219.0
Test Episode 3 Reward: 331.0
Test Episode 4 Reward: 427.0
Test Episode 5 Reward: 203.0
Test Episode 6 Reward: 331.0
Test Episode 7 Reward: 181.0
Test Episode 8 Reward: 331.0
Test Episode 9 Reward: 331.0
Test Episode 10 Reward: 461.0
Test Episode 11 Reward: 646.0
Test Episode 12 Reward: 446.0
Test Episode 13 Reward: 331.0
Test Episode 14 Reward: 331.0
Test Episode 15 Reward: 331.0
Test Episode 16 Reward: 269.0
Test Episode 17 Reward: 331.0
Test Episode 18 Reward: 456.0
Test Episode 19 Reward: 331.0
Test Episode 20 Reward: 135.0
Epoch 190 Average Test Reward: 337.65


100%|██████████| 2000/2000 [12:47<00:00,  2.61it/s]


Epoch 191 Mean Reward: 435.83


100%|██████████| 2000/2000 [12:33<00:00,  2.65it/s]


Epoch 192 Mean Reward: 431.815


100%|██████████| 2000/2000 [14:21<00:00,  2.32it/s]


Epoch 193 Mean Reward: 435.7915


100%|██████████| 2000/2000 [19:14<00:00,  1.73it/s]


Epoch 194 Mean Reward: 427.6365


100%|██████████| 2000/2000 [13:58<00:00,  2.38it/s]


Epoch 195 Mean Reward: 433.1415


100%|██████████| 2000/2000 [12:45<00:00,  2.61it/s]


Epoch 196 Mean Reward: 431.1825


100%|██████████| 2000/2000 [18:55<00:00,  1.76it/s]


Epoch 197 Mean Reward: 439.4055


100%|██████████| 2000/2000 [14:51<00:00,  2.24it/s]


Epoch 198 Mean Reward: 433.556


100%|██████████| 2000/2000 [13:00<00:00,  2.56it/s]


Epoch 199 Mean Reward: 442.708


100%|██████████| 2000/2000 [12:53<00:00,  2.58it/s]


Epoch 200 Mean Reward: 437.505
Epoch 200 Model saved to ./checkpoints/take_cover.ckpt
Epoch 200 test:
Test Episode 1 Reward: 760.0
Test Episode 2 Reward: 144.0
Test Episode 3 Reward: 592.0
Test Episode 4 Reward: 760.0
Test Episode 5 Reward: 760.0
Test Episode 6 Reward: 162.0
Test Episode 7 Reward: 760.0
Test Episode 8 Reward: 909.0
Test Episode 9 Reward: 760.0
Test Episode 10 Reward: 760.0
Test Episode 11 Reward: 129.0
Test Episode 12 Reward: 760.0
Test Episode 13 Reward: 223.0
Test Episode 14 Reward: 760.0
Test Episode 15 Reward: 202.0
Test Episode 16 Reward: 229.0
Test Episode 17 Reward: 497.0
Test Episode 18 Reward: 760.0
Test Episode 19 Reward: 760.0
Test Episode 20 Reward: 148.0
Epoch 200 Average Test Reward: 541.75


100%|██████████| 2000/2000 [12:57<00:00,  2.57it/s]


Epoch 201 Mean Reward: 430.7685


100%|██████████| 2000/2000 [12:55<00:00,  2.58it/s]


Epoch 202 Mean Reward: 438.103


100%|██████████| 2000/2000 [13:13<00:00,  2.52it/s]


Epoch 203 Mean Reward: 443.724


100%|██████████| 2000/2000 [13:13<00:00,  2.52it/s]


Epoch 204 Mean Reward: 444.385


100%|██████████| 2000/2000 [13:25<00:00,  2.48it/s]


Epoch 205 Mean Reward: 446.1205


100%|██████████| 2000/2000 [13:03<00:00,  2.55it/s]


Epoch 206 Mean Reward: 435.5085


100%|██████████| 2000/2000 [13:06<00:00,  2.54it/s]


Epoch 207 Mean Reward: 438.6555


100%|██████████| 2000/2000 [13:38<00:00,  2.44it/s]


Epoch 208 Mean Reward: 444.8145


100%|██████████| 2000/2000 [13:28<00:00,  2.47it/s]


Epoch 209 Mean Reward: 448.5775


100%|██████████| 2000/2000 [13:30<00:00,  2.47it/s]


Epoch 210 Mean Reward: 446.502
Epoch 210 Model saved to ./checkpoints/take_cover.ckpt
Epoch 210 test:
Test Episode 1 Reward: 812.0
Test Episode 2 Reward: 812.0
Test Episode 3 Reward: 812.0
Test Episode 4 Reward: 277.0
Test Episode 5 Reward: 253.0
Test Episode 6 Reward: 812.0
Test Episode 7 Reward: 653.0
Test Episode 8 Reward: 812.0
Test Episode 9 Reward: 237.0
Test Episode 10 Reward: 463.0
Test Episode 11 Reward: 812.0
Test Episode 12 Reward: 297.0
Test Episode 13 Reward: 513.0
Test Episode 14 Reward: 812.0
Test Episode 15 Reward: 766.0
Test Episode 16 Reward: 812.0
Test Episode 17 Reward: 257.0
Test Episode 18 Reward: 812.0
Test Episode 19 Reward: 812.0
Test Episode 20 Reward: 812.0
Epoch 210 Average Test Reward: 632.4


100%|██████████| 2000/2000 [13:46<00:00,  2.42it/s]


Epoch 211 Mean Reward: 456.5255


100%|██████████| 2000/2000 [13:17<00:00,  2.51it/s]


Epoch 212 Mean Reward: 440.103


100%|██████████| 2000/2000 [13:32<00:00,  2.46it/s]


Epoch 213 Mean Reward: 445.9265


100%|██████████| 2000/2000 [13:38<00:00,  2.44it/s]


Epoch 214 Mean Reward: 447.2975


100%|██████████| 2000/2000 [13:18<00:00,  2.50it/s]


Epoch 215 Mean Reward: 442.4355


100%|██████████| 2000/2000 [13:53<00:00,  2.40it/s]


Epoch 216 Mean Reward: 455.8415


100%|██████████| 2000/2000 [13:50<00:00,  2.41it/s]


Epoch 217 Mean Reward: 453.5685


100%|██████████| 2000/2000 [13:34<00:00,  2.46it/s]


Epoch 218 Mean Reward: 444.0745


100%|██████████| 2000/2000 [13:46<00:00,  2.42it/s]


Epoch 219 Mean Reward: 450.6785


100%|██████████| 2000/2000 [13:44<00:00,  2.42it/s]


Epoch 220 Mean Reward: 447.8525
Epoch 220 Model saved to ./checkpoints/take_cover.ckpt
Epoch 220 test:
Test Episode 1 Reward: 328.0
Test Episode 2 Reward: 495.0
Test Episode 3 Reward: 235.0
Test Episode 4 Reward: 495.0
Test Episode 5 Reward: 495.0
Test Episode 6 Reward: 495.0
Test Episode 7 Reward: 412.0
Test Episode 8 Reward: 495.0
Test Episode 9 Reward: 495.0
Test Episode 10 Reward: 495.0
Test Episode 11 Reward: 495.0
Test Episode 12 Reward: 495.0
Test Episode 13 Reward: 495.0
Test Episode 14 Reward: 495.0
Test Episode 15 Reward: 128.0
Test Episode 16 Reward: 546.0
Test Episode 17 Reward: 495.0
Test Episode 18 Reward: 495.0
Test Episode 19 Reward: 177.0
Test Episode 20 Reward: 550.0
Epoch 220 Average Test Reward: 440.55


100%|██████████| 2000/2000 [14:01<00:00,  2.38it/s]


Epoch 221 Mean Reward: 456.26


100%|██████████| 2000/2000 [13:32<00:00,  2.46it/s]


Epoch 222 Mean Reward: 443.2895


100%|██████████| 2000/2000 [14:05<00:00,  2.37it/s]


Epoch 223 Mean Reward: 462.0505


100%|██████████| 2000/2000 [13:55<00:00,  2.39it/s]


Epoch 224 Mean Reward: 458.4895


100%|██████████| 2000/2000 [13:54<00:00,  2.40it/s]


Epoch 225 Mean Reward: 455.1875


100%|██████████| 2000/2000 [13:39<00:00,  2.44it/s]


Epoch 226 Mean Reward: 440.103


100%|██████████| 2000/2000 [14:12<00:00,  2.34it/s]


Epoch 227 Mean Reward: 454.651


100%|██████████| 2000/2000 [14:20<00:00,  2.32it/s]


Epoch 228 Mean Reward: 461.1665


100%|██████████| 2000/2000 [14:21<00:00,  2.32it/s]


Epoch 229 Mean Reward: 454.662


100%|██████████| 2000/2000 [14:31<00:00,  2.29it/s]


Epoch 230 Mean Reward: 461.828
Epoch 230 Model saved to ./checkpoints/take_cover.ckpt
Epoch 230 test:
Test Episode 1 Reward: 518.0
Test Episode 2 Reward: 518.0
Test Episode 3 Reward: 136.0
Test Episode 4 Reward: 382.0
Test Episode 5 Reward: 479.0
Test Episode 6 Reward: 867.0
Test Episode 7 Reward: 303.0
Test Episode 8 Reward: 518.0
Test Episode 9 Reward: 518.0
Test Episode 10 Reward: 148.0
Test Episode 11 Reward: 518.0
Test Episode 12 Reward: 119.0
Test Episode 13 Reward: 197.0
Test Episode 14 Reward: 268.0
Test Episode 15 Reward: 319.0
Test Episode 16 Reward: 672.0
Test Episode 17 Reward: 518.0
Test Episode 18 Reward: 518.0
Test Episode 19 Reward: 348.0
Test Episode 20 Reward: 518.0
Epoch 230 Average Test Reward: 419.1


100%|██████████| 2000/2000 [14:18<00:00,  2.33it/s]


Epoch 231 Mean Reward: 452.7075


100%|██████████| 2000/2000 [13:33<00:00,  2.46it/s]


Epoch 232 Mean Reward: 434.8


100%|██████████| 2000/2000 [14:13<00:00,  2.34it/s]


Epoch 233 Mean Reward: 453.859


100%|██████████| 2000/2000 [14:00<00:00,  2.38it/s]


Epoch 234 Mean Reward: 446.5765


100%|██████████| 2000/2000 [14:25<00:00,  2.31it/s]


Epoch 235 Mean Reward: 458.629


100%|██████████| 2000/2000 [14:20<00:00,  2.32it/s]


Epoch 236 Mean Reward: 455.9355


100%|██████████| 2000/2000 [14:09<00:00,  2.36it/s]


Epoch 237 Mean Reward: 451.857


100%|██████████| 2000/2000 [14:16<00:00,  2.34it/s]


Epoch 238 Mean Reward: 460.8565


100%|██████████| 2000/2000 [14:04<00:00,  2.37it/s]


Epoch 239 Mean Reward: 462.066


100%|██████████| 2000/2000 [12:52<00:00,  2.59it/s]


Epoch 240 Mean Reward: 448.5735
Epoch 240 Model saved to ./checkpoints/take_cover.ckpt
Epoch 240 test:
Test Episode 1 Reward: 529.0
Test Episode 2 Reward: 529.0
Test Episode 3 Reward: 510.0
Test Episode 4 Reward: 246.0
Test Episode 5 Reward: 529.0
Test Episode 6 Reward: 514.0
Test Episode 7 Reward: 529.0
Test Episode 8 Reward: 117.0
Test Episode 9 Reward: 529.0
Test Episode 10 Reward: 178.0
Test Episode 11 Reward: 529.0
Test Episode 12 Reward: 124.0
Test Episode 13 Reward: 344.0
Test Episode 14 Reward: 529.0
Test Episode 15 Reward: 529.0
Test Episode 16 Reward: 314.0
Test Episode 17 Reward: 148.0
Test Episode 18 Reward: 529.0
Test Episode 19 Reward: 529.0
Test Episode 20 Reward: 529.0
Epoch 240 Average Test Reward: 415.7


100%|██████████| 2000/2000 [13:50<00:00,  2.41it/s]


Epoch 241 Mean Reward: 456.3235


100%|██████████| 2000/2000 [13:31<00:00,  2.46it/s]


Epoch 242 Mean Reward: 450.893


100%|██████████| 2000/2000 [16:06<00:00,  2.07it/s]


Epoch 243 Mean Reward: 453.088


100%|██████████| 2000/2000 [18:51<00:00,  1.77it/s]


Epoch 244 Mean Reward: 450.432


100%|██████████| 2000/2000 [13:32<00:00,  2.46it/s]


Epoch 245 Mean Reward: 444.763


100%|██████████| 2000/2000 [13:19<00:00,  2.50it/s]


Epoch 246 Mean Reward: 456.3425


100%|██████████| 2000/2000 [13:55<00:00,  2.39it/s]


Epoch 247 Mean Reward: 455.7125


100%|██████████| 2000/2000 [14:12<00:00,  2.35it/s]


Epoch 248 Mean Reward: 461.33


100%|██████████| 2000/2000 [16:49<00:00,  1.98it/s]


Epoch 249 Mean Reward: 465.5105


100%|██████████| 2000/2000 [14:05<00:00,  2.37it/s]


Epoch 250 Mean Reward: 460.1845
Epoch 250 Model saved to ./checkpoints/take_cover.ckpt
Epoch 250 test:
Test Episode 1 Reward: 806.0
Test Episode 2 Reward: 389.0
Test Episode 3 Reward: 264.0
Test Episode 4 Reward: 124.0
Test Episode 5 Reward: 356.0
Test Episode 6 Reward: 806.0
Test Episode 7 Reward: 806.0
Test Episode 8 Reward: 806.0
Test Episode 9 Reward: 806.0
Test Episode 10 Reward: 210.0
Test Episode 11 Reward: 192.0
Test Episode 12 Reward: 195.0
Test Episode 13 Reward: 806.0
Test Episode 14 Reward: 806.0
Test Episode 15 Reward: 334.0
Test Episode 16 Reward: 806.0
Test Episode 17 Reward: 806.0
Test Episode 18 Reward: 806.0
Test Episode 19 Reward: 251.0
Test Episode 20 Reward: 806.0
Epoch 250 Average Test Reward: 559.05


100%|██████████| 2000/2000 [14:17<00:00,  2.33it/s]


Epoch 251 Mean Reward: 458.8705


100%|██████████| 2000/2000 [14:23<00:00,  2.32it/s]


Epoch 252 Mean Reward: 461.236


100%|██████████| 2000/2000 [14:39<00:00,  2.27it/s]


Epoch 253 Mean Reward: 470.5725


100%|██████████| 2000/2000 [14:30<00:00,  2.30it/s]


Epoch 254 Mean Reward: 465.297


100%|██████████| 2000/2000 [14:18<00:00,  2.33it/s]


Epoch 255 Mean Reward: 464.5645


100%|██████████| 2000/2000 [14:45<00:00,  2.26it/s]


Epoch 256 Mean Reward: 475.308


100%|██████████| 2000/2000 [14:31<00:00,  2.29it/s]


Epoch 257 Mean Reward: 460.1615


100%|██████████| 2000/2000 [14:52<00:00,  2.24it/s]


Epoch 258 Mean Reward: 470.11


100%|██████████| 2000/2000 [14:43<00:00,  2.26it/s]


Epoch 259 Mean Reward: 461.847


100%|██████████| 2000/2000 [15:05<00:00,  2.21it/s]


Epoch 260 Mean Reward: 466.848
Epoch 260 Model saved to ./checkpoints/take_cover.ckpt
Epoch 260 test:
Test Episode 1 Reward: 129.0
Test Episode 2 Reward: 946.0
Test Episode 3 Reward: 796.0
Test Episode 4 Reward: 236.0
Test Episode 5 Reward: 946.0
Test Episode 6 Reward: 946.0
Test Episode 7 Reward: 946.0
Test Episode 8 Reward: 153.0
Test Episode 9 Reward: 287.0
Test Episode 10 Reward: 946.0
Test Episode 11 Reward: 636.0
Test Episode 12 Reward: 946.0
Test Episode 13 Reward: 946.0
Test Episode 14 Reward: 946.0
Test Episode 15 Reward: 946.0
Test Episode 16 Reward: 184.0
Test Episode 17 Reward: 946.0
Test Episode 18 Reward: 501.0
Test Episode 19 Reward: 343.0
Test Episode 20 Reward: 946.0
Epoch 260 Average Test Reward: 683.55


100%|██████████| 2000/2000 [14:59<00:00,  2.22it/s]


Epoch 261 Mean Reward: 478.095


100%|██████████| 2000/2000 [14:43<00:00,  2.26it/s]


Epoch 262 Mean Reward: 470.6365


100%|██████████| 2000/2000 [14:48<00:00,  2.25it/s]


Epoch 263 Mean Reward: 469.6255


100%|██████████| 2000/2000 [14:59<00:00,  2.22it/s]


Epoch 264 Mean Reward: 475.567


100%|██████████| 2000/2000 [14:45<00:00,  2.26it/s]


Epoch 265 Mean Reward: 465.181


100%|██████████| 2000/2000 [15:01<00:00,  2.22it/s]


Epoch 266 Mean Reward: 473.767


100%|██████████| 2000/2000 [14:55<00:00,  2.23it/s]


Epoch 267 Mean Reward: 470.002


100%|██████████| 2000/2000 [14:48<00:00,  2.25it/s]


Epoch 268 Mean Reward: 461.1225


100%|██████████| 2000/2000 [15:20<00:00,  2.17it/s]


Epoch 269 Mean Reward: 478.524


100%|██████████| 2000/2000 [15:15<00:00,  2.19it/s]


Epoch 270 Mean Reward: 476.78
Epoch 270 Model saved to ./checkpoints/take_cover.ckpt
Epoch 270 test:
Test Episode 1 Reward: 106.0
Test Episode 2 Reward: 417.0
Test Episode 3 Reward: 417.0
Test Episode 4 Reward: 417.0
Test Episode 5 Reward: 299.0
Test Episode 6 Reward: 417.0
Test Episode 7 Reward: 417.0
Test Episode 8 Reward: 287.0
Test Episode 9 Reward: 378.0
Test Episode 10 Reward: 761.0
Test Episode 11 Reward: 184.0
Test Episode 12 Reward: 213.0
Test Episode 13 Reward: 417.0
Test Episode 14 Reward: 417.0
Test Episode 15 Reward: 344.0
Test Episode 16 Reward: 417.0
Test Episode 17 Reward: 417.0
Test Episode 18 Reward: 101.0
Test Episode 19 Reward: 186.0
Test Episode 20 Reward: 777.0
Epoch 270 Average Test Reward: 369.45


100%|██████████| 2000/2000 [15:14<00:00,  2.19it/s]


Epoch 271 Mean Reward: 479.119


100%|██████████| 2000/2000 [15:28<00:00,  2.15it/s]


Epoch 272 Mean Reward: 477.583


100%|██████████| 2000/2000 [15:15<00:00,  2.19it/s]


Epoch 273 Mean Reward: 472.582


100%|██████████| 2000/2000 [15:16<00:00,  2.18it/s]


Epoch 274 Mean Reward: 468.528


100%|██████████| 2000/2000 [15:13<00:00,  2.19it/s]


Epoch 275 Mean Reward: 473.161


100%|██████████| 2000/2000 [15:49<00:00,  2.11it/s]


Epoch 276 Mean Reward: 487.6385


100%|██████████| 2000/2000 [15:24<00:00,  2.16it/s]


Epoch 277 Mean Reward: 478.064


100%|██████████| 2000/2000 [15:28<00:00,  2.15it/s]


Epoch 278 Mean Reward: 479.3965


100%|██████████| 2000/2000 [15:47<00:00,  2.11it/s]


Epoch 279 Mean Reward: 487.7455


100%|██████████| 2000/2000 [15:28<00:00,  2.16it/s]


Epoch 280 Mean Reward: 482.1385
Epoch 280 Model saved to ./checkpoints/take_cover.ckpt
Epoch 280 test:
Test Episode 1 Reward: 466.0
Test Episode 2 Reward: 407.0
Test Episode 3 Reward: 617.0
Test Episode 4 Reward: 466.0
Test Episode 5 Reward: 466.0
Test Episode 6 Reward: 466.0
Test Episode 7 Reward: 466.0
Test Episode 8 Reward: 466.0
Test Episode 9 Reward: 466.0
Test Episode 10 Reward: 466.0
Test Episode 11 Reward: 134.0
Test Episode 12 Reward: 466.0
Test Episode 13 Reward: 438.0
Test Episode 14 Reward: 139.0
Test Episode 15 Reward: 265.0
Test Episode 16 Reward: 238.0
Test Episode 17 Reward: 714.0
Test Episode 18 Reward: 466.0
Test Episode 19 Reward: 466.0
Test Episode 20 Reward: 466.0
Epoch 280 Average Test Reward: 427.2


100%|██████████| 2000/2000 [14:43<00:00,  2.26it/s]


Epoch 281 Mean Reward: 482.155


100%|██████████| 2000/2000 [18:46<00:00,  1.77it/s]


Epoch 282 Mean Reward: 488.803


100%|██████████| 2000/2000 [21:11<00:00,  1.57it/s]


Epoch 283 Mean Reward: 472.9615


100%|██████████| 2000/2000 [15:20<00:00,  2.17it/s]


Epoch 284 Mean Reward: 481.2985


100%|██████████| 2000/2000 [14:43<00:00,  2.26it/s]


Epoch 285 Mean Reward: 481.1875


100%|██████████| 2000/2000 [15:50<00:00,  2.10it/s]


Epoch 286 Mean Reward: 476.6645


100%|██████████| 2000/2000 [23:30<00:00,  1.42it/s]


Epoch 287 Mean Reward: 496.349


100%|██████████| 2000/2000 [23:43<00:00,  1.41it/s]


Epoch 288 Mean Reward: 487.017


100%|██████████| 2000/2000 [23:19<00:00,  1.43it/s]


Epoch 289 Mean Reward: 478.9845


100%|██████████| 2000/2000 [23:35<00:00,  1.41it/s]


Epoch 290 Mean Reward: 486.1435
Epoch 290 Model saved to ./checkpoints/take_cover.ckpt
Epoch 290 test:
Test Episode 1 Reward: 156.0
Test Episode 2 Reward: 340.0
Test Episode 3 Reward: 340.0
Test Episode 4 Reward: 235.0
Test Episode 5 Reward: 369.0
Test Episode 6 Reward: 175.0
Test Episode 7 Reward: 149.0
Test Episode 8 Reward: 340.0
Test Episode 9 Reward: 570.0
Test Episode 10 Reward: 124.0
Test Episode 11 Reward: 340.0
Test Episode 12 Reward: 340.0
Test Episode 13 Reward: 215.0
Test Episode 14 Reward: 340.0
Test Episode 15 Reward: 244.0
Test Episode 16 Reward: 340.0
Test Episode 17 Reward: 340.0
Test Episode 18 Reward: 340.0
Test Episode 19 Reward: 217.0
Test Episode 20 Reward: 340.0
Epoch 290 Average Test Reward: 292.7


100%|██████████| 2000/2000 [23:42<00:00,  1.41it/s]


Epoch 291 Mean Reward: 491.9205


100%|██████████| 2000/2000 [23:49<00:00,  1.40it/s]


Epoch 292 Mean Reward: 489.699


100%|██████████| 2000/2000 [24:14<00:00,  1.37it/s]


Epoch 293 Mean Reward: 498.1695


100%|██████████| 2000/2000 [23:56<00:00,  1.39it/s]


Epoch 294 Mean Reward: 490.5245


100%|██████████| 2000/2000 [23:45<00:00,  1.40it/s]


Epoch 295 Mean Reward: 494.789


100%|██████████| 2000/2000 [18:25<00:00,  1.81it/s]


Epoch 296 Mean Reward: 493.298


100%|██████████| 2000/2000 [13:31<00:00,  2.46it/s]


Epoch 297 Mean Reward: 491.942


100%|██████████| 2000/2000 [13:36<00:00,  2.45it/s]


Epoch 298 Mean Reward: 499.032


100%|██████████| 2000/2000 [13:40<00:00,  2.44it/s]


Epoch 299 Mean Reward: 500.997


100%|██████████| 2000/2000 [13:49<00:00,  2.41it/s]


Epoch 300 Mean Reward: 509.921
Epoch 300 Model saved to ./checkpoints/take_cover.ckpt
Epoch 300 test:
Test Episode 1 Reward: 617.0
Test Episode 2 Reward: 515.0
Test Episode 3 Reward: 515.0
Test Episode 4 Reward: 515.0
Test Episode 5 Reward: 272.0
Test Episode 6 Reward: 215.0
Test Episode 7 Reward: 515.0
Test Episode 8 Reward: 676.0
Test Episode 9 Reward: 515.0
Test Episode 10 Reward: 515.0
Test Episode 11 Reward: 515.0
Test Episode 12 Reward: 172.0
Test Episode 13 Reward: 515.0
Test Episode 14 Reward: 597.0
Test Episode 15 Reward: 184.0
Test Episode 16 Reward: 262.0
Test Episode 17 Reward: 175.0
Test Episode 18 Reward: 515.0
Test Episode 19 Reward: 174.0
Test Episode 20 Reward: 116.0
Epoch 300 Average Test Reward: 404.75


100%|██████████| 2000/2000 [13:42<00:00,  2.43it/s]


Epoch 301 Mean Reward: 503.2555


100%|██████████| 2000/2000 [13:47<00:00,  2.42it/s]


Epoch 302 Mean Reward: 498.795


100%|██████████| 2000/2000 [13:56<00:00,  2.39it/s]


Epoch 303 Mean Reward: 507.9505


100%|██████████| 2000/2000 [13:57<00:00,  2.39it/s]


Epoch 304 Mean Reward: 509.0445


100%|██████████| 2000/2000 [14:27<00:00,  2.31it/s]


Epoch 305 Mean Reward: 523.762


100%|██████████| 2000/2000 [14:22<00:00,  2.32it/s]


Epoch 306 Mean Reward: 512.5615


100%|██████████| 2000/2000 [13:38<00:00,  2.44it/s]


Epoch 307 Mean Reward: 494.2765


100%|██████████| 2000/2000 [13:36<00:00,  2.45it/s]


Epoch 308 Mean Reward: 494.566


100%|██████████| 2000/2000 [13:59<00:00,  2.38it/s]


Epoch 309 Mean Reward: 509.872


100%|██████████| 2000/2000 [13:50<00:00,  2.41it/s]


Epoch 310 Mean Reward: 497.901
Epoch 310 Model saved to ./checkpoints/take_cover.ckpt
Epoch 310 test:
Test Episode 1 Reward: 572.0
Test Episode 2 Reward: 717.0
Test Episode 3 Reward: 572.0
Test Episode 4 Reward: 229.0
Test Episode 5 Reward: 369.0
Test Episode 6 Reward: 168.0
Test Episode 7 Reward: 572.0
Test Episode 8 Reward: 125.0
Test Episode 9 Reward: 572.0
Test Episode 10 Reward: 132.0
Test Episode 11 Reward: 480.0
Test Episode 12 Reward: 257.0
Test Episode 13 Reward: 572.0
Test Episode 14 Reward: 572.0
Test Episode 15 Reward: 572.0
Test Episode 16 Reward: 572.0
Test Episode 17 Reward: 572.0
Test Episode 18 Reward: 572.0
Test Episode 19 Reward: 572.0
Test Episode 20 Reward: 572.0
Epoch 310 Average Test Reward: 467.05


100%|██████████| 2000/2000 [14:01<00:00,  2.38it/s]


Epoch 311 Mean Reward: 502.2405


100%|██████████| 2000/2000 [14:08<00:00,  2.36it/s]


Epoch 312 Mean Reward: 505.958


100%|██████████| 2000/2000 [13:57<00:00,  2.39it/s]


Epoch 313 Mean Reward: 502.418


100%|██████████| 2000/2000 [13:59<00:00,  2.38it/s]


Epoch 314 Mean Reward: 499.118


100%|██████████| 2000/2000 [14:22<00:00,  2.32it/s]


Epoch 315 Mean Reward: 515.348


100%|██████████| 2000/2000 [14:11<00:00,  2.35it/s]


Epoch 316 Mean Reward: 514.066


100%|██████████| 2000/2000 [14:18<00:00,  2.33it/s]


Epoch 317 Mean Reward: 515.7935


100%|██████████| 2000/2000 [14:09<00:00,  2.36it/s]


Epoch 318 Mean Reward: 510.0925


100%|██████████| 2000/2000 [14:17<00:00,  2.33it/s]


Epoch 319 Mean Reward: 511.5655


100%|██████████| 2000/2000 [14:22<00:00,  2.32it/s]


Epoch 320 Mean Reward: 517.483
Epoch 320 Model saved to ./checkpoints/take_cover.ckpt
Epoch 320 test:
Test Episode 1 Reward: 1240.0
Test Episode 2 Reward: 121.0
Test Episode 3 Reward: 164.0
Test Episode 4 Reward: 1240.0
Test Episode 5 Reward: 1240.0
Test Episode 6 Reward: 1240.0
Test Episode 7 Reward: 116.0
Test Episode 8 Reward: 125.0
Test Episode 9 Reward: 551.0
Test Episode 10 Reward: 1240.0
Test Episode 11 Reward: 504.0
Test Episode 12 Reward: 238.0
Test Episode 13 Reward: 1240.0
Test Episode 14 Reward: 282.0
Test Episode 15 Reward: 137.0
Test Episode 16 Reward: 143.0
Test Episode 17 Reward: 244.0
Test Episode 18 Reward: 1240.0
Test Episode 19 Reward: 1240.0
Test Episode 20 Reward: 218.0
Epoch 320 Average Test Reward: 638.15


100%|██████████| 2000/2000 [14:33<00:00,  2.29it/s]


Epoch 321 Mean Reward: 515.5485


100%|██████████| 2000/2000 [14:52<00:00,  2.24it/s]


Epoch 322 Mean Reward: 529.1555


100%|██████████| 2000/2000 [14:28<00:00,  2.30it/s]


Epoch 323 Mean Reward: 520.0


100%|██████████| 2000/2000 [13:44<00:00,  2.43it/s]


Epoch 324 Mean Reward: 491.4545


100%|██████████| 2000/2000 [14:34<00:00,  2.29it/s]


Epoch 325 Mean Reward: 517.7545


100%|██████████| 2000/2000 [14:27<00:00,  2.31it/s]


Epoch 326 Mean Reward: 516.0705


100%|██████████| 2000/2000 [14:10<00:00,  2.35it/s]


Epoch 327 Mean Reward: 506.3865


100%|██████████| 2000/2000 [14:14<00:00,  2.34it/s]


Epoch 328 Mean Reward: 508.12


100%|██████████| 2000/2000 [14:14<00:00,  2.34it/s]


Epoch 329 Mean Reward: 510.874


100%|██████████| 2000/2000 [14:08<00:00,  2.36it/s]


Epoch 330 Mean Reward: 509.1375
Epoch 330 Model saved to ./checkpoints/take_cover.ckpt
Epoch 330 test:
Test Episode 1 Reward: 158.0
Test Episode 2 Reward: 519.0
Test Episode 3 Reward: 519.0
Test Episode 4 Reward: 381.0
Test Episode 5 Reward: 519.0
Test Episode 6 Reward: 519.0
Test Episode 7 Reward: 319.0
Test Episode 8 Reward: 571.0
Test Episode 9 Reward: 208.0
Test Episode 10 Reward: 510.0
Test Episode 11 Reward: 519.0
Test Episode 12 Reward: 519.0
Test Episode 13 Reward: 274.0
Test Episode 14 Reward: 239.0
Test Episode 15 Reward: 519.0
Test Episode 16 Reward: 519.0
Test Episode 17 Reward: 519.0
Test Episode 18 Reward: 519.0
Test Episode 19 Reward: 320.0
Test Episode 20 Reward: 519.0
Epoch 330 Average Test Reward: 434.45


100%|██████████| 2000/2000 [14:22<00:00,  2.32it/s]


Epoch 331 Mean Reward: 511.937


100%|██████████| 2000/2000 [14:17<00:00,  2.33it/s]


Epoch 332 Mean Reward: 512.474


100%|██████████| 2000/2000 [14:42<00:00,  2.27it/s]


Epoch 333 Mean Reward: 528.1875


100%|██████████| 2000/2000 [15:19<00:00,  2.18it/s]


Epoch 334 Mean Reward: 514.953


100%|██████████| 2000/2000 [23:44<00:00,  1.40it/s]


Epoch 335 Mean Reward: 529.953


100%|██████████| 2000/2000 [24:41<00:00,  1.35it/s]


Epoch 336 Mean Reward: 519.7835


100%|██████████| 2000/2000 [15:27<00:00,  2.16it/s]


Epoch 337 Mean Reward: 525.6015


100%|██████████| 2000/2000 [24:04<00:00,  1.39it/s]


Epoch 338 Mean Reward: 526.3


100%|██████████| 2000/2000 [26:41<00:00,  1.25it/s]


Epoch 339 Mean Reward: 528.0005


100%|██████████| 2000/2000 [24:14<00:00,  1.38it/s]


Epoch 340 Mean Reward: 515.98
Epoch 340 Model saved to ./checkpoints/take_cover.ckpt
Epoch 340 test:
Test Episode 1 Reward: 529.0
Test Episode 2 Reward: 171.0
Test Episode 3 Reward: 529.0
Test Episode 4 Reward: 121.0
Test Episode 5 Reward: 254.0
Test Episode 6 Reward: 529.0
Test Episode 7 Reward: 529.0
Test Episode 8 Reward: 614.0
Test Episode 9 Reward: 509.0
Test Episode 10 Reward: 529.0
Test Episode 11 Reward: 257.0
Test Episode 12 Reward: 529.0
Test Episode 13 Reward: 529.0
Test Episode 14 Reward: 169.0
Test Episode 15 Reward: 306.0
Test Episode 16 Reward: 195.0
Test Episode 17 Reward: 152.0
Test Episode 18 Reward: 396.0
Test Episode 19 Reward: 169.0
Test Episode 20 Reward: 529.0
Epoch 340 Average Test Reward: 377.25


100%|██████████| 2000/2000 [18:51<00:00,  1.77it/s]


Epoch 341 Mean Reward: 525.81


100%|██████████| 2000/2000 [15:58<00:00,  2.09it/s]


Epoch 342 Mean Reward: 531.12


100%|██████████| 2000/2000 [20:34<00:00,  1.62it/s]


Epoch 343 Mean Reward: 515.112


100%|██████████| 2000/2000 [16:16<00:00,  2.05it/s]


Epoch 344 Mean Reward: 534.3145


100%|██████████| 2000/2000 [16:36<00:00,  2.01it/s]


Epoch 345 Mean Reward: 529.66


100%|██████████| 2000/2000 [18:00<00:00,  1.85it/s]


Epoch 346 Mean Reward: 527.7305


100%|██████████| 2000/2000 [15:30<00:00,  2.15it/s]


Epoch 347 Mean Reward: 531.7045


100%|██████████| 2000/2000 [15:56<00:00,  2.09it/s]


Epoch 348 Mean Reward: 518.899


100%|██████████| 2000/2000 [16:46<00:00,  1.99it/s]


Epoch 349 Mean Reward: 534.428


100%|██████████| 2000/2000 [15:54<00:00,  2.10it/s]


Epoch 350 Mean Reward: 517.1995
Epoch 350 Model saved to ./checkpoints/take_cover.ckpt
Epoch 350 test:
Test Episode 1 Reward: 410.0
Test Episode 2 Reward: 410.0
Test Episode 3 Reward: 450.0
Test Episode 4 Reward: 277.0
Test Episode 5 Reward: 240.0
Test Episode 6 Reward: 448.0
Test Episode 7 Reward: 410.0
Test Episode 8 Reward: 410.0
Test Episode 9 Reward: 410.0
Test Episode 10 Reward: 567.0
Test Episode 11 Reward: 107.0
Test Episode 12 Reward: 410.0
Test Episode 13 Reward: 410.0
Test Episode 14 Reward: 449.0
Test Episode 15 Reward: 261.0
Test Episode 16 Reward: 410.0
Test Episode 17 Reward: 240.0
Test Episode 18 Reward: 410.0
Test Episode 19 Reward: 410.0
Test Episode 20 Reward: 410.0
Epoch 350 Average Test Reward: 377.45


100%|██████████| 2000/2000 [17:01<00:00,  1.96it/s]


Epoch 351 Mean Reward: 533.4805


100%|██████████| 2000/2000 [16:28<00:00,  2.02it/s]


Epoch 352 Mean Reward: 528.8755


100%|██████████| 2000/2000 [16:43<00:00,  1.99it/s]


Epoch 353 Mean Reward: 531.0465


100%|██████████| 2000/2000 [16:21<00:00,  2.04it/s]


Epoch 354 Mean Reward: 520.68


100%|██████████| 2000/2000 [16:42<00:00,  1.99it/s]


Epoch 355 Mean Reward: 554.5335


100%|██████████| 2000/2000 [16:14<00:00,  2.05it/s]


Epoch 356 Mean Reward: 536.328


100%|██████████| 2000/2000 [16:45<00:00,  1.99it/s]


Epoch 357 Mean Reward: 538.1265


100%|██████████| 2000/2000 [19:59<00:00,  1.67it/s]


Epoch 358 Mean Reward: 543.8175


100%|██████████| 2000/2000 [28:18<00:00,  1.18it/s]


Epoch 359 Mean Reward: 534.093


100%|██████████| 2000/2000 [27:55<00:00,  1.19it/s]


Epoch 360 Mean Reward: 539.9115
Epoch 360 Model saved to ./checkpoints/take_cover.ckpt
Epoch 360 test:
Test Episode 1 Reward: 916.0
Test Episode 2 Reward: 394.0
Test Episode 3 Reward: 420.0
Test Episode 4 Reward: 159.0
Test Episode 5 Reward: 186.0
Test Episode 6 Reward: 916.0
Test Episode 7 Reward: 487.0
Test Episode 8 Reward: 380.0
Test Episode 9 Reward: 916.0
Test Episode 10 Reward: 916.0
Test Episode 11 Reward: 916.0
Test Episode 12 Reward: 361.0
Test Episode 13 Reward: 236.0
Test Episode 14 Reward: 958.0
Test Episode 15 Reward: 916.0
Test Episode 16 Reward: 580.0
Test Episode 17 Reward: 528.0
Test Episode 18 Reward: 916.0
Test Episode 19 Reward: 916.0
Test Episode 20 Reward: 916.0
Epoch 360 Average Test Reward: 646.65


100%|██████████| 2000/2000 [24:08<00:00,  1.38it/s]


Epoch 361 Mean Reward: 538.6985


100%|██████████| 2000/2000 [22:01<00:00,  1.51it/s]


Epoch 362 Mean Reward: 549.5115


100%|██████████| 2000/2000 [16:24<00:00,  2.03it/s]


Epoch 363 Mean Reward: 546.0435


100%|██████████| 2000/2000 [27:09<00:00,  1.23it/s]


Epoch 364 Mean Reward: 554.1565


100%|██████████| 2000/2000 [27:52<00:00,  1.20it/s]


Epoch 365 Mean Reward: 549.878


100%|██████████| 2000/2000 [26:54<00:00,  1.24it/s]


Epoch 366 Mean Reward: 532.1295


100%|██████████| 2000/2000 [27:21<00:00,  1.22it/s]


Epoch 367 Mean Reward: 538.6035


100%|██████████| 2000/2000 [23:15<00:00,  1.43it/s]


Epoch 368 Mean Reward: 544.3535


100%|██████████| 2000/2000 [18:00<00:00,  1.85it/s]


Epoch 369 Mean Reward: 539.49


100%|██████████| 2000/2000 [17:57<00:00,  1.86it/s]


Epoch 370 Mean Reward: 535.3105
Epoch 370 Model saved to ./checkpoints/take_cover.ckpt
Epoch 370 test:
Test Episode 1 Reward: 340.0
Test Episode 2 Reward: 396.0
Test Episode 3 Reward: 446.0
Test Episode 4 Reward: 615.0
Test Episode 5 Reward: 223.0
Test Episode 6 Reward: 615.0
Test Episode 7 Reward: 305.0
Test Episode 8 Reward: 615.0
Test Episode 9 Reward: 615.0
Test Episode 10 Reward: 615.0
Test Episode 11 Reward: 615.0
Test Episode 12 Reward: 369.0
Test Episode 13 Reward: 615.0
Test Episode 14 Reward: 150.0
Test Episode 15 Reward: 239.0
Test Episode 16 Reward: 120.0
Test Episode 17 Reward: 615.0
Test Episode 18 Reward: 615.0
Test Episode 19 Reward: 123.0
Test Episode 20 Reward: 615.0
Epoch 370 Average Test Reward: 443.05


100%|██████████| 2000/2000 [17:17<00:00,  1.93it/s]


Epoch 371 Mean Reward: 542.562


100%|██████████| 2000/2000 [17:04<00:00,  1.95it/s]


Epoch 372 Mean Reward: 556.3575


100%|██████████| 2000/2000 [16:19<00:00,  2.04it/s]


Epoch 373 Mean Reward: 556.519


100%|██████████| 2000/2000 [16:17<00:00,  2.05it/s]


Epoch 374 Mean Reward: 558.2355


100%|██████████| 2000/2000 [17:14<00:00,  1.93it/s]


Epoch 375 Mean Reward: 554.827


100%|██████████| 2000/2000 [17:35<00:00,  1.90it/s]


Epoch 376 Mean Reward: 555.392


100%|██████████| 2000/2000 [17:13<00:00,  1.94it/s]


Epoch 377 Mean Reward: 558.6315


100%|██████████| 2000/2000 [16:55<00:00,  1.97it/s]


Epoch 378 Mean Reward: 560.744


100%|██████████| 2000/2000 [16:50<00:00,  1.98it/s]


Epoch 379 Mean Reward: 561.128


100%|██████████| 2000/2000 [16:56<00:00,  1.97it/s]


Epoch 380 Mean Reward: 566.443
Epoch 380 Model saved to ./checkpoints/take_cover.ckpt
Epoch 380 test:
Test Episode 1 Reward: 226.0
Test Episode 2 Reward: 569.0
Test Episode 3 Reward: 569.0
Test Episode 4 Reward: 329.0
Test Episode 5 Reward: 267.0
Test Episode 6 Reward: 569.0
Test Episode 7 Reward: 628.0
Test Episode 8 Reward: 205.0
Test Episode 9 Reward: 113.0
Test Episode 10 Reward: 287.0
Test Episode 11 Reward: 163.0
Test Episode 12 Reward: 312.0
Test Episode 13 Reward: 569.0
Test Episode 14 Reward: 605.0
Test Episode 15 Reward: 173.0
Test Episode 16 Reward: 569.0
Test Episode 17 Reward: 569.0
Test Episode 18 Reward: 569.0
Test Episode 19 Reward: 141.0
Test Episode 20 Reward: 569.0
Epoch 380 Average Test Reward: 400.05


100%|██████████| 2000/2000 [16:44<00:00,  1.99it/s]


Epoch 381 Mean Reward: 567.4325


100%|██████████| 2000/2000 [16:20<00:00,  2.04it/s]


Epoch 382 Mean Reward: 551.3735


100%|██████████| 2000/2000 [16:32<00:00,  2.02it/s]


Epoch 383 Mean Reward: 562.473


100%|██████████| 2000/2000 [16:22<00:00,  2.04it/s]


Epoch 384 Mean Reward: 556.38


100%|██████████| 2000/2000 [16:45<00:00,  1.99it/s]


Epoch 385 Mean Reward: 566.626


100%|██████████| 2000/2000 [16:27<00:00,  2.03it/s]


Epoch 386 Mean Reward: 560.2685


100%|██████████| 2000/2000 [16:13<00:00,  2.06it/s]


Epoch 387 Mean Reward: 553.536


100%|██████████| 2000/2000 [16:31<00:00,  2.02it/s]


Epoch 388 Mean Reward: 563.624


100%|██████████| 2000/2000 [16:47<00:00,  1.99it/s]


Epoch 389 Mean Reward: 571.7825


100%|██████████| 2000/2000 [16:02<00:00,  2.08it/s]


Epoch 390 Mean Reward: 543.912
Epoch 390 Model saved to ./checkpoints/take_cover.ckpt
Epoch 390 test:
Test Episode 1 Reward: 947.0
Test Episode 2 Reward: 673.0
Test Episode 3 Reward: 196.0
Test Episode 4 Reward: 425.0
Test Episode 5 Reward: 125.0
Test Episode 6 Reward: 673.0
Test Episode 7 Reward: 673.0
Test Episode 8 Reward: 673.0
Test Episode 9 Reward: 673.0
Test Episode 10 Reward: 247.0
Test Episode 11 Reward: 673.0
Test Episode 12 Reward: 673.0
Test Episode 13 Reward: 673.0
Test Episode 14 Reward: 673.0
Test Episode 15 Reward: 424.0
Test Episode 16 Reward: 104.0
Test Episode 17 Reward: 673.0
Test Episode 18 Reward: 673.0
Test Episode 19 Reward: 428.0
Test Episode 20 Reward: 673.0
Epoch 390 Average Test Reward: 548.6


100%|██████████| 2000/2000 [17:37<00:00,  1.89it/s]


Epoch 391 Mean Reward: 579.1015


100%|██████████| 2000/2000 [17:14<00:00,  1.93it/s]


Epoch 392 Mean Reward: 572.6885


100%|██████████| 2000/2000 [16:39<00:00,  2.00it/s]


Epoch 393 Mean Reward: 560.503


100%|██████████| 2000/2000 [16:58<00:00,  1.96it/s]


Epoch 394 Mean Reward: 567.203


100%|██████████| 2000/2000 [16:57<00:00,  1.97it/s]


Epoch 395 Mean Reward: 573.741


100%|██████████| 2000/2000 [17:11<00:00,  1.94it/s]


Epoch 396 Mean Reward: 581.584


100%|██████████| 2000/2000 [17:25<00:00,  1.91it/s]


Epoch 397 Mean Reward: 585.2315


100%|██████████| 2000/2000 [16:31<00:00,  2.02it/s]


Epoch 398 Mean Reward: 559.0825


100%|██████████| 2000/2000 [16:43<00:00,  1.99it/s]


Epoch 399 Mean Reward: 562.3975


100%|██████████| 2000/2000 [17:31<00:00,  1.90it/s]


Epoch 400 Mean Reward: 578.828
Epoch 400 Model saved to ./checkpoints/take_cover.ckpt
Epoch 400 test:
Test Episode 1 Reward: 472.0
Test Episode 2 Reward: 187.0
Test Episode 3 Reward: 472.0
Test Episode 4 Reward: 245.0
Test Episode 5 Reward: 289.0
Test Episode 6 Reward: 472.0
Test Episode 7 Reward: 528.0
Test Episode 8 Reward: 472.0
Test Episode 9 Reward: 438.0
Test Episode 10 Reward: 472.0
Test Episode 11 Reward: 472.0
Test Episode 12 Reward: 472.0
Test Episode 13 Reward: 204.0
Test Episode 14 Reward: 472.0
Test Episode 15 Reward: 472.0
Test Episode 16 Reward: 126.0
Test Episode 17 Reward: 472.0
Test Episode 18 Reward: 472.0
Test Episode 19 Reward: 472.0
Test Episode 20 Reward: 121.0
Epoch 400 Average Test Reward: 390.1


100%|██████████| 2000/2000 [16:25<00:00,  2.03it/s]


Epoch 401 Mean Reward: 548.372


100%|██████████| 2000/2000 [17:11<00:00,  1.94it/s]


Epoch 402 Mean Reward: 579.387


100%|██████████| 2000/2000 [17:09<00:00,  1.94it/s]


Epoch 403 Mean Reward: 577.6085


100%|██████████| 2000/2000 [16:33<00:00,  2.01it/s]


Epoch 404 Mean Reward: 568.585


100%|██████████| 2000/2000 [19:10<00:00,  1.74it/s]


Epoch 405 Mean Reward: 601.5105


100%|██████████| 2000/2000 [23:49<00:00,  1.40it/s]


Epoch 406 Mean Reward: 583.7725


100%|██████████| 2000/2000 [23:27<00:00,  1.42it/s]


Epoch 407 Mean Reward: 592.5945


100%|██████████| 2000/2000 [18:41<00:00,  1.78it/s]


Epoch 408 Mean Reward: 565.6955


100%|██████████| 2000/2000 [18:48<00:00,  1.77it/s]


Epoch 409 Mean Reward: 576.9855


100%|██████████| 2000/2000 [18:27<00:00,  1.81it/s]


Epoch 410 Mean Reward: 585.1975
Epoch 410 Model saved to ./checkpoints/take_cover.ckpt
Epoch 410 test:
Test Episode 1 Reward: 517.0
Test Episode 2 Reward: 517.0
Test Episode 3 Reward: 628.0
Test Episode 4 Reward: 320.0
Test Episode 5 Reward: 517.0
Test Episode 6 Reward: 517.0
Test Episode 7 Reward: 517.0
Test Episode 8 Reward: 179.0
Test Episode 9 Reward: 517.0
Test Episode 10 Reward: 517.0
Test Episode 11 Reward: 144.0
Test Episode 12 Reward: 517.0
Test Episode 13 Reward: 380.0
Test Episode 14 Reward: 517.0
Test Episode 15 Reward: 204.0
Test Episode 16 Reward: 517.0
Test Episode 17 Reward: 517.0
Test Episode 18 Reward: 563.0
Test Episode 19 Reward: 292.0
Test Episode 20 Reward: 517.0
Epoch 410 Average Test Reward: 445.7


100%|██████████| 2000/2000 [18:49<00:00,  1.77it/s]


Epoch 411 Mean Reward: 592.014


100%|██████████| 2000/2000 [18:55<00:00,  1.76it/s]


Epoch 412 Mean Reward: 598.608


100%|██████████| 2000/2000 [18:47<00:00,  1.77it/s]


Epoch 413 Mean Reward: 603.7475


100%|██████████| 2000/2000 [19:54<00:00,  1.67it/s]


Epoch 414 Mean Reward: 600.1225


100%|██████████| 2000/2000 [18:52<00:00,  1.77it/s]


Epoch 415 Mean Reward: 597.3145


100%|██████████| 2000/2000 [24:41<00:00,  1.35it/s]


Epoch 416 Mean Reward: 594.7045


100%|██████████| 2000/2000 [29:01<00:00,  1.15it/s]


Epoch 417 Mean Reward: 601.1865


100%|██████████| 2000/2000 [19:26<00:00,  1.71it/s]


Epoch 418 Mean Reward: 617.356


100%|██████████| 2000/2000 [17:43<00:00,  1.88it/s]


Epoch 419 Mean Reward: 606.7025


100%|██████████| 2000/2000 [25:35<00:00,  1.30it/s]


Epoch 420 Mean Reward: 608.6365
Epoch 420 Model saved to ./checkpoints/take_cover.ckpt
Epoch 420 test:
Test Episode 1 Reward: 251.0
Test Episode 2 Reward: 171.0
Test Episode 3 Reward: 179.0
Test Episode 4 Reward: 114.0
Test Episode 5 Reward: 411.0
Test Episode 6 Reward: 215.0
Test Episode 7 Reward: 183.0
Test Episode 8 Reward: 471.0
Test Episode 9 Reward: 411.0
Test Episode 10 Reward: 302.0
Test Episode 11 Reward: 168.0
Test Episode 12 Reward: 411.0
Test Episode 13 Reward: 411.0
Test Episode 14 Reward: 202.0
Test Episode 15 Reward: 250.0
Test Episode 16 Reward: 411.0
Test Episode 17 Reward: 119.0
Test Episode 18 Reward: 160.0
Test Episode 19 Reward: 397.0
Test Episode 20 Reward: 481.0
Epoch 420 Average Test Reward: 285.9


100%|██████████| 2000/2000 [24:11<00:00,  1.38it/s]


Epoch 421 Mean Reward: 608.7225


100%|██████████| 2000/2000 [21:02<00:00,  1.58it/s]


Epoch 422 Mean Reward: 601.703


100%|██████████| 2000/2000 [31:37<00:00,  1.05it/s]


Epoch 423 Mean Reward: 597.0695


100%|██████████| 2000/2000 [22:23<00:00,  1.49it/s]


Epoch 424 Mean Reward: 602.6315


100%|██████████| 2000/2000 [19:37<00:00,  1.70it/s]


Epoch 425 Mean Reward: 599.793


100%|██████████| 2000/2000 [20:04<00:00,  1.66it/s]


Epoch 426 Mean Reward: 611.1535


100%|██████████| 2000/2000 [20:21<00:00,  1.64it/s]


Epoch 427 Mean Reward: 622.805


100%|██████████| 2000/2000 [19:59<00:00,  1.67it/s]


Epoch 428 Mean Reward: 617.269


100%|██████████| 2000/2000 [28:48<00:00,  1.16it/s]


Epoch 429 Mean Reward: 616.2455


100%|██████████| 2000/2000 [27:59<00:00,  1.19it/s]


Epoch 430 Mean Reward: 604.709
Epoch 430 Model saved to ./checkpoints/take_cover.ckpt
Epoch 430 test:
Test Episode 1 Reward: 342.0
Test Episode 2 Reward: 342.0
Test Episode 3 Reward: 342.0
Test Episode 4 Reward: 342.0
Test Episode 5 Reward: 105.0
Test Episode 6 Reward: 342.0
Test Episode 7 Reward: 414.0
Test Episode 8 Reward: 649.0
Test Episode 9 Reward: 108.0
Test Episode 10 Reward: 342.0
Test Episode 11 Reward: 110.0
Test Episode 12 Reward: 342.0
Test Episode 13 Reward: 342.0
Test Episode 14 Reward: 323.0
Test Episode 15 Reward: 342.0
Test Episode 16 Reward: 232.0
Test Episode 17 Reward: 342.0
Test Episode 18 Reward: 342.0
Test Episode 19 Reward: 118.0
Test Episode 20 Reward: 142.0
Epoch 430 Average Test Reward: 298.15


100%|██████████| 2000/2000 [19:35<00:00,  1.70it/s]


Epoch 431 Mean Reward: 632.1755


100%|██████████| 2000/2000 [20:29<00:00,  1.63it/s]


Epoch 432 Mean Reward: 622.9795


100%|██████████| 2000/2000 [19:12<00:00,  1.74it/s]


Epoch 433 Mean Reward: 619.9805


100%|██████████| 2000/2000 [18:41<00:00,  1.78it/s]


Epoch 434 Mean Reward: 621.3085


100%|██████████| 2000/2000 [18:36<00:00,  1.79it/s]


Epoch 435 Mean Reward: 612.564


100%|██████████| 2000/2000 [17:28<00:00,  1.91it/s]


Epoch 436 Mean Reward: 598.57


100%|██████████| 2000/2000 [18:27<00:00,  1.81it/s]


Epoch 437 Mean Reward: 607.4635


100%|██████████| 2000/2000 [17:55<00:00,  1.86it/s]


Epoch 438 Mean Reward: 593.3265


100%|██████████| 2000/2000 [18:28<00:00,  1.81it/s]


Epoch 439 Mean Reward: 612.546


100%|██████████| 2000/2000 [18:36<00:00,  1.79it/s]


Epoch 440 Mean Reward: 602.5675
Epoch 440 Model saved to ./checkpoints/take_cover.ckpt
Epoch 440 test:
Test Episode 1 Reward: 409.0
Test Episode 2 Reward: 119.0
Test Episode 3 Reward: 409.0
Test Episode 4 Reward: 142.0
Test Episode 5 Reward: 409.0
Test Episode 6 Reward: 409.0
Test Episode 7 Reward: 196.0
Test Episode 8 Reward: 203.0
Test Episode 9 Reward: 361.0
Test Episode 10 Reward: 409.0
Test Episode 11 Reward: 409.0
Test Episode 12 Reward: 409.0
Test Episode 13 Reward: 186.0
Test Episode 14 Reward: 451.0
Test Episode 15 Reward: 117.0
Test Episode 16 Reward: 409.0
Test Episode 17 Reward: 142.0
Test Episode 18 Reward: 340.0
Test Episode 19 Reward: 409.0
Test Episode 20 Reward: 304.0
Epoch 440 Average Test Reward: 312.1


100%|██████████| 2000/2000 [26:00<00:00,  1.28it/s]


Epoch 441 Mean Reward: 594.492


100%|██████████| 2000/2000 [30:58<00:00,  1.08it/s]


Epoch 442 Mean Reward: 614.755


100%|██████████| 2000/2000 [23:41<00:00,  1.41it/s]


Epoch 443 Mean Reward: 612.836


100%|██████████| 2000/2000 [21:43<00:00,  1.53it/s]


Epoch 444 Mean Reward: 619.3535


100%|██████████| 2000/2000 [23:45<00:00,  1.40it/s]


Epoch 445 Mean Reward: 619.142


100%|██████████| 2000/2000 [19:30<00:00,  1.71it/s]


Epoch 446 Mean Reward: 633.9815


100%|██████████| 2000/2000 [19:24<00:00,  1.72it/s]


Epoch 447 Mean Reward: 628.94


100%|██████████| 2000/2000 [19:04<00:00,  1.75it/s]


Epoch 448 Mean Reward: 622.1315


100%|██████████| 2000/2000 [19:45<00:00,  1.69it/s]


Epoch 449 Mean Reward: 643.899


100%|██████████| 2000/2000 [19:48<00:00,  1.68it/s]


Epoch 450 Mean Reward: 642.052
Epoch 450 Model saved to ./checkpoints/take_cover.ckpt
Epoch 450 test:
Test Episode 1 Reward: 1389.0
Test Episode 2 Reward: 293.0
Test Episode 3 Reward: 375.0
Test Episode 4 Reward: 550.0
Test Episode 5 Reward: 1389.0
Test Episode 6 Reward: 239.0
Test Episode 7 Reward: 194.0
Test Episode 8 Reward: 1389.0
Test Episode 9 Reward: 647.0
Test Episode 10 Reward: 1389.0
Test Episode 11 Reward: 123.0
Test Episode 12 Reward: 1389.0
Test Episode 13 Reward: 1389.0
Test Episode 14 Reward: 1389.0
Test Episode 15 Reward: 135.0
Test Episode 16 Reward: 1389.0
Test Episode 17 Reward: 139.0
Test Episode 18 Reward: 544.0
Test Episode 19 Reward: 1389.0
Test Episode 20 Reward: 1389.0
Epoch 450 Average Test Reward: 856.45


100%|██████████| 2000/2000 [19:50<00:00,  1.68it/s]


Epoch 451 Mean Reward: 646.789


100%|██████████| 2000/2000 [20:28<00:00,  1.63it/s]


Epoch 452 Mean Reward: 668.2075


100%|██████████| 2000/2000 [19:46<00:00,  1.69it/s]


Epoch 453 Mean Reward: 646.675


100%|██████████| 2000/2000 [19:38<00:00,  1.70it/s]


Epoch 454 Mean Reward: 639.2185


100%|██████████| 2000/2000 [19:33<00:00,  1.70it/s]


Epoch 455 Mean Reward: 636.839


100%|██████████| 2000/2000 [18:36<00:00,  1.79it/s]


Epoch 456 Mean Reward: 608.05


100%|██████████| 2000/2000 [18:19<00:00,  1.82it/s]


Epoch 457 Mean Reward: 596.964


100%|██████████| 2000/2000 [19:08<00:00,  1.74it/s]


Epoch 458 Mean Reward: 623.2215


100%|██████████| 2000/2000 [19:02<00:00,  1.75it/s]


Epoch 459 Mean Reward: 620.9855


100%|██████████| 2000/2000 [19:25<00:00,  1.72it/s]


Epoch 460 Mean Reward: 635.847
Epoch 460 Model saved to ./checkpoints/take_cover.ckpt
Epoch 460 test:
Test Episode 1 Reward: 209.0
Test Episode 2 Reward: 209.0
Test Episode 3 Reward: 209.0
Test Episode 4 Reward: 696.0
Test Episode 5 Reward: 331.0
Test Episode 6 Reward: 209.0
Test Episode 7 Reward: 287.0
Test Episode 8 Reward: 259.0
Test Episode 9 Reward: 209.0
Test Episode 10 Reward: 211.0
Test Episode 11 Reward: 381.0
Test Episode 12 Reward: 184.0
Test Episode 13 Reward: 209.0
Test Episode 14 Reward: 101.0
Test Episode 15 Reward: 393.0
Test Episode 16 Reward: 209.0
Test Episode 17 Reward: 209.0
Test Episode 18 Reward: 209.0
Test Episode 19 Reward: 172.0
Test Episode 20 Reward: 209.0
Epoch 460 Average Test Reward: 255.25


100%|██████████| 2000/2000 [19:42<00:00,  1.69it/s]


Epoch 461 Mean Reward: 637.4715


100%|██████████| 2000/2000 [19:07<00:00,  1.74it/s]


Epoch 462 Mean Reward: 613.8055


100%|██████████| 2000/2000 [20:03<00:00,  1.66it/s]


Epoch 463 Mean Reward: 635.376


100%|██████████| 2000/2000 [20:30<00:00,  1.63it/s]


Epoch 464 Mean Reward: 634.0815


100%|██████████| 2000/2000 [20:40<00:00,  1.61it/s]


Epoch 465 Mean Reward: 631.583


100%|██████████| 2000/2000 [21:21<00:00,  1.56it/s]


Epoch 466 Mean Reward: 649.792


100%|██████████| 2000/2000 [20:58<00:00,  1.59it/s]


Epoch 467 Mean Reward: 629.14


100%|██████████| 2000/2000 [20:36<00:00,  1.62it/s]


Epoch 468 Mean Reward: 633.995


100%|██████████| 2000/2000 [21:04<00:00,  1.58it/s]


Epoch 469 Mean Reward: 633.857


100%|██████████| 2000/2000 [21:12<00:00,  1.57it/s]


Epoch 470 Mean Reward: 631.1765
Epoch 470 Model saved to ./checkpoints/take_cover.ckpt
Epoch 470 test:
Test Episode 1 Reward: 412.0
Test Episode 2 Reward: 100.0
Test Episode 3 Reward: 412.0
Test Episode 4 Reward: 162.0
Test Episode 5 Reward: 310.0
Test Episode 6 Reward: 779.0
Test Episode 7 Reward: 412.0
Test Episode 8 Reward: 281.0
Test Episode 9 Reward: 412.0
Test Episode 10 Reward: 146.0
Test Episode 11 Reward: 919.0
Test Episode 12 Reward: 412.0
Test Episode 13 Reward: 412.0
Test Episode 14 Reward: 412.0
Test Episode 15 Reward: 651.0
Test Episode 16 Reward: 610.0
Test Episode 17 Reward: 702.0
Test Episode 18 Reward: 402.0
Test Episode 19 Reward: 412.0
Test Episode 20 Reward: 361.0
Epoch 470 Average Test Reward: 435.95


100%|██████████| 2000/2000 [22:35<00:00,  1.48it/s]


Epoch 471 Mean Reward: 652.4615


100%|██████████| 2000/2000 [22:20<00:00,  1.49it/s]


Epoch 472 Mean Reward: 634.3975


100%|██████████| 2000/2000 [22:07<00:00,  1.51it/s]


Epoch 473 Mean Reward: 615.697


100%|██████████| 2000/2000 [22:29<00:00,  1.48it/s]


Epoch 474 Mean Reward: 622.92


100%|██████████| 2000/2000 [23:34<00:00,  1.41it/s]


Epoch 475 Mean Reward: 655.4805


100%|██████████| 2000/2000 [24:37<00:00,  1.35it/s]


Epoch 476 Mean Reward: 688.3445


100%|██████████| 2000/2000 [22:50<00:00,  1.46it/s]


Epoch 477 Mean Reward: 635.4865


100%|██████████| 2000/2000 [22:45<00:00,  1.46it/s]


Epoch 478 Mean Reward: 635.523


100%|██████████| 2000/2000 [23:24<00:00,  1.42it/s]


Epoch 479 Mean Reward: 645.663


100%|██████████| 2000/2000 [22:36<00:00,  1.47it/s]


Epoch 480 Mean Reward: 619.2845
Epoch 480 Model saved to ./checkpoints/take_cover.ckpt
Epoch 480 test:
Test Episode 1 Reward: 134.0
Test Episode 2 Reward: 1150.0
Test Episode 3 Reward: 1150.0
Test Episode 4 Reward: 107.0
Test Episode 5 Reward: 1150.0
Test Episode 6 Reward: 206.0
Test Episode 7 Reward: 138.0
Test Episode 8 Reward: 1150.0
Test Episode 9 Reward: 1150.0
Test Episode 10 Reward: 1150.0
Test Episode 11 Reward: 151.0
Test Episode 12 Reward: 1150.0
Test Episode 13 Reward: 270.0
Test Episode 14 Reward: 576.0
Test Episode 15 Reward: 1150.0
Test Episode 16 Reward: 1150.0
Test Episode 17 Reward: 537.0
Test Episode 18 Reward: 1150.0
Test Episode 19 Reward: 1150.0
Test Episode 20 Reward: 1150.0
Epoch 480 Average Test Reward: 795.95


100%|██████████| 2000/2000 [20:36<00:00,  1.62it/s]


Epoch 481 Mean Reward: 622.235


100%|██████████| 2000/2000 [20:07<00:00,  1.66it/s]


Epoch 482 Mean Reward: 618.2115


100%|██████████| 2000/2000 [19:58<00:00,  1.67it/s]


Epoch 483 Mean Reward: 619.3955


100%|██████████| 2000/2000 [20:00<00:00,  1.67it/s]


Epoch 484 Mean Reward: 609.5485


100%|██████████| 2000/2000 [20:03<00:00,  1.66it/s]


Epoch 485 Mean Reward: 618.7145


100%|██████████| 2000/2000 [19:23<00:00,  1.72it/s]


Epoch 486 Mean Reward: 593.975


100%|██████████| 2000/2000 [19:09<00:00,  1.74it/s]


Epoch 487 Mean Reward: 592.4835


100%|██████████| 2000/2000 [20:07<00:00,  1.66it/s]


Epoch 488 Mean Reward: 624.9615


100%|██████████| 2000/2000 [19:50<00:00,  1.68it/s]


Epoch 489 Mean Reward: 628.2065


100%|██████████| 2000/2000 [19:42<00:00,  1.69it/s]


Epoch 490 Mean Reward: 604.4855
Epoch 490 Model saved to ./checkpoints/take_cover.ckpt
Epoch 490 test:
Test Episode 1 Reward: 366.0
Test Episode 2 Reward: 335.0
Test Episode 3 Reward: 335.0
Test Episode 4 Reward: 310.0
Test Episode 5 Reward: 335.0
Test Episode 6 Reward: 335.0
Test Episode 7 Reward: 335.0
Test Episode 8 Reward: 123.0
Test Episode 9 Reward: 335.0
Test Episode 10 Reward: 335.0
Test Episode 11 Reward: 161.0
Test Episode 12 Reward: 131.0
Test Episode 13 Reward: 335.0
Test Episode 14 Reward: 506.0
Test Episode 15 Reward: 335.0
Test Episode 16 Reward: 348.0
Test Episode 17 Reward: 335.0
Test Episode 18 Reward: 226.0
Test Episode 19 Reward: 335.0
Test Episode 20 Reward: 335.0
Epoch 490 Average Test Reward: 309.55


100%|██████████| 2000/2000 [20:44<00:00,  1.61it/s]


Epoch 491 Mean Reward: 599.046


100%|██████████| 2000/2000 [22:08<00:00,  1.51it/s]


Epoch 492 Mean Reward: 633.457


100%|██████████| 2000/2000 [21:27<00:00,  1.55it/s]


Epoch 493 Mean Reward: 630.099


100%|██████████| 2000/2000 [18:24<00:00,  1.81it/s]


Epoch 494 Mean Reward: 614.7795


100%|██████████| 2000/2000 [18:32<00:00,  1.80it/s]


Epoch 495 Mean Reward: 612.602


100%|██████████| 2000/2000 [18:24<00:00,  1.81it/s]


Epoch 496 Mean Reward: 630.8125


100%|██████████| 2000/2000 [24:02<00:00,  1.39it/s]


Epoch 497 Mean Reward: 611.358


100%|██████████| 2000/2000 [31:00<00:00,  1.07it/s]


Epoch 498 Mean Reward: 631.671


100%|██████████| 2000/2000 [26:41<00:00,  1.25it/s]


Epoch 499 Mean Reward: 641.312


100%|██████████| 2000/2000 [21:03<00:00,  1.58it/s]


Epoch 500 Mean Reward: 654.0015
Epoch 500 Model saved to ./checkpoints/take_cover.ckpt
Epoch 500 test:
Test Episode 1 Reward: 345.0
Test Episode 2 Reward: 100.0
Test Episode 3 Reward: 472.0
Test Episode 4 Reward: 128.0
Test Episode 5 Reward: 345.0
Test Episode 6 Reward: 248.0
Test Episode 7 Reward: 345.0
Test Episode 8 Reward: 345.0
Test Episode 9 Reward: 108.0
Test Episode 10 Reward: 345.0
Test Episode 11 Reward: 345.0
Test Episode 12 Reward: 345.0
Test Episode 13 Reward: 112.0
Test Episode 14 Reward: 163.0
Test Episode 15 Reward: 345.0
Test Episode 16 Reward: 118.0
Test Episode 17 Reward: 249.0
Test Episode 18 Reward: 296.0
Test Episode 19 Reward: 187.0
Test Episode 20 Reward: 117.0
Epoch 500 Average Test Reward: 252.9
[(856.45, 450), (795.95, 480), (683.55, 260), (646.65, 360), (638.15, 320), (632.4, 210), (598.45, 150), (575.3, 90), (559.05, 250), (548.6, 390), (541.75, 200), (540.6, 100), (483.3, 170), (467.05, 310), (462.8, 70), (445.7, 410), (443.05, 370), (440.55, 220), (439.15

In [None]:
"""
Delete the "checkpoint" file in model directory and manually specify a checkpoint file path
if a screen resolution that's different from the one that the agent was trained on is to be used.
"""

#Get a list of checkpoints saved during training

ckpts = tf.train.get_checkpoint_state('checkpoints').all_model_checkpoint_paths

#Test the trained model at a specific checkpoint by only choosing actions with a greedy strategy

ckpt_rank = list()

test_reward = test_agent(DQN, num_episodes=20,
                                 training=False,
                                 load_model=True,
                                 depth=False,
                                 model_dir=ckpts[34])
    
print('Epoch {} Average Test Reward: {}'.format(checkpoint, test_reward))
ckpt_rank.append((test_reward, int(checkpoint.split('-')[-1])))
    
print(sorted(ckpt_rank, reverse=True))


Loading model from checkpoints\take_cover.ckpt-450
INFO:tensorflow:Restoring parameters from checkpoints\take_cover.ckpt-450
Test Episode 1 Reward: 1389.0
Test Episode 2 Reward: 187.0
Test Episode 3 Reward: 191.0
Test Episode 4 Reward: 1389.0
Test Episode 5 Reward: 321.0
Test Episode 6 Reward: 1389.0
Test Episode 7 Reward: 1389.0
Test Episode 8 Reward: 361.0
Test Episode 9 Reward: 1389.0
Test Episode 10 Reward: 110.0
Test Episode 11 Reward: 248.0
Test Episode 12 Reward: 1389.0
Test Episode 13 Reward: 592.0
Test Episode 14 Reward: 1389.0
Test Episode 15 Reward: 241.0
Test Episode 16 Reward: 733.0
Test Episode 17 Reward: 213.0
Test Episode 18 Reward: 124.0
Test Episode 19 Reward: 1389.0
