In [1]:
import time

import tensorflow as tf
import numpy as np
import vizdoom as vd

from skimage.transform import rescale
from tqdm import trange
from IPython.display import HTML


In [2]:
#Specify the game scenario and the screen format/resolution

game = vd.DoomGame()
game.set_screen_format(vd.ScreenFormat.BGR24)
game.set_screen_resolution(vd.ScreenResolution.RES_640X480)
game.set_depth_buffer_enabled(False)
game.load_config('take_cover.cfg')

down_sample_ratio = 0.125
width = int(game.get_screen_width()*down_sample_ratio)
height = int(game.get_screen_height()*down_sample_ratio)
channels = game.get_screen_channels() + int(game.is_depth_buffer_enabled())

#Specify the available actions in the scenario

available_actions = game.get_available_buttons()
actions = [list(ohe) for ohe in list(np.identity(len(available_actions)))]
num_actions = len(available_actions)

#Specify the Q-network learning parameters

frame_delay = 12
buffer_size = 25000
epochs = 1000
steps_per_epoch = 2000
phase1 = 0.2*epochs
phase2 = 0.9*epochs
learning_rate = 0.0025
gamma = 0
start_epsilon = 1.0
end_epsilon = 0.1
batch_size = 100
load_model = False
save_model = True
model_dir = './checkpoints/take_cover.ckpt'
num_ckpts = 40


In [3]:
#Create a buffer object that holds a set of training experiences (state-action-reward tuples)

class Buffer():
    def __init__(self, size=1000):
        self.buffer = list()
        self.length = len(self.buffer)
        self.size = size
        
#Add a new experience to the buffer (remove the oldest experience if the buffer is already full)
        
    def add_experience(self, experience):
        if self.length + 1 >= self.size:
            self.buffer[0:(self.length + 1) - self.size] = []
        
        self.buffer.append(experience)
        self.length = len(self.buffer)
            
#Return a batch of experience arrays randomly sampled from the buffer
            
    def sample_buffer(self, sample_size):
        sample = np.random.randint(self.length, size=sample_size)
        s1 = np.concatenate([self.buffer[idx][0] for idx in sample], axis=0)
        a = np.array([self.buffer[idx][1] for idx in sample])
        r = np.array([self.buffer[idx][2] for idx in sample])
        s2 = np.concatenate([self.buffer[idx][3] for idx in sample], axis=0)
        terminal = np.array([self.buffer[idx][4] for idx in sample], dtype=np.int32)
        
        return s1, a, r, s2, terminal

#Downsample and normalize an image array representing the game state at a given time stamp

def preprocess(image, down_sample_ratio=1):
    if float(down_sample_ratio) != 1.0:
        image = rescale(image=image,
                        scale=(down_sample_ratio,
                               down_sample_ratio),
                        mode='reflect')
    image = image.astype(np.float32)
    image = np.expand_dims(image, axis=0)

    return image

#Test the agent using a currently training or previously trained model

def test_agent(model, num_episodes, load_model, depth, training=True, session=None, model_dir=None):
    if load_model == True:
        sess = tf.Session()
        print('Loading model from', model_dir)
        tf.train.Saver().restore(sess, model_dir)
        
#Require an existing session if a pretrained model isn't provided
        
    elif load_model == False:
        sess = session

    game.set_sound_enabled(False)
    episode_rewards = list()
    
    game.init()

    for i in range(num_episodes):
        game.new_episode()
    
        while not game.is_episode_finished():
            state = game.get_state()
            
            if depth == False:
                state_buffer = np.moveaxis(state.screen_buffer, 0, 2)
            
            elif depth == True:
                depth_buffer = state.depth_buffer
                state_buffer = np.stack((state.screen_buffer,
                                         depth_buffer), axis=-1)
                
            state1 = preprocess(state_buffer, down_sample_ratio)
            action = model.choose_action(sess, state1)[0]
            reward = game.make_action(actions[action])
            
#Add a delay between each time step so that the episodes occur at normal speed

            time.sleep(0.02)
        
        episode_rewards.append(game.get_total_reward())
        print('Test Episode {} Reward: {}'.format(i + 1, game.get_total_reward()))
    
    game.close()

    return np.mean(episode_rewards)


In [4]:
#Create a Q-network to estimate values and choose actions for a given state

class Q_network():
    def __init__(self, network_name, height, width, channels, learning_rate=0.001):
        self.learning_rate = learning_rate
        self.s_t = tf.placeholder(tf.float32,
                                  shape=[None, height, width, channels],
                                  name=network_name + '_state'
                                 )
        self.a_t = tf.placeholder(tf.int32,
                                  shape=[None],
                                  name=network_name + '_action'
                                 )
        self.Q_target = tf.placeholder(tf.float32,
                                       shape=[None, num_actions],
                                       name=network_name + '_Q_target'
                                      )

        self.input_layer = tf.reshape(self.s_t,
                                      [-1, height, width, channels],
                                      name=network_name + '_input_layer'
                                     )
        self.conv1 = tf.layers.conv2d(inputs=self.input_layer,
                                      filters=32,
                                      kernel_size=[8, 8],
                                      strides=[4, 4],
                                      padding='valid',
                                      activation=tf.nn.relu,
                                      name=network_name + '_conv1_layer'
                                     )
        self.conv2 = tf.layers.conv2d(inputs=self.conv1,
                                      filters=64,
                                      kernel_size=[4, 4],
                                      strides=[2, 2],
                                      padding='valid',
                                      activation=tf.nn.relu,
                                      name=network_name + '_conv2_layer'
                                     )
        self.flatten = tf.layers.flatten(self.conv2,
                                         name=network_name + '_flatten'
                                        )
        self.dense = tf.layers.dense(inputs=self.flatten,
                                      units=256,
                                      activation=tf.nn.relu,
                                      name=network_name + '_dense1_layer'
                                    )
        self.Q_values = tf.layers.dense(inputs=self.dense,
                                        units=len(actions),
                                        activation=None,
                                        name=network_name + '_output_layer'
                                       )        
    
        self.best_action = tf.argmax(self.Q_values, 1)
        self.loss = tf.losses.mean_squared_error(self.Q_values,
                                                 self.Q_target)
        self.adam = tf.train.AdamOptimizer(learning_rate=self.learning_rate,
                                           name=network_name + '_adam'
                                          )
        self.train = self.adam.minimize(self.loss)
        
    def update_lr(self):
        self.learning_rate = 0.98*self.learning_rate
        
        return self.learning_rate

    def calculate_loss(self, session, s, q):
        L, _ = session.run([self.loss, self.train],
                           feed_dict={self.s_t: s,
                                      self.Q_target: q})
    
        return L

#Return the array of Q-values and the best action associated with a given state

    def get_Q_values(self, session, s):
        Q = session.run(self.Q_values,
                        feed_dict={self.s_t: s})

        return Q
    
    def choose_action(self, session, s):
        a = session.run(self.best_action,
                        feed_dict={self.s_t: s})
    
        return a
    
#Create a list of variable update operations

def update_graph(variables):
    update_ops = list()
    
#Assign weight values from the network created first to the one created second
    
    for idx, variable in enumerate(variables[:len(variables)//2]):
        op = variable.assign(variables[idx + len(variables)//2].value())
        update_ops.append(op)
    
    return update_ops

#Update the target network parameters to match those of the online network

def update_target(ops, session):
    for op in update_ops:
        session.run(op)


In [5]:
#For each time step, collect the following data:
#The current game state
#The action that was taken taken
#The reward obtained from the chosen action
#The next game state (store the first game state if the previous action ends the episode)
#A variable indicating whether the episode is over yet


tf.reset_default_graph()

#Instantiate the target network before the online network so that it's updated correctly

target_net = Q_network(network_name='target',
                       learning_rate=learning_rate,
                       height=height,
                       width=width,
                       channels=channels)
DQN = Q_network(network_name='online',
                learning_rate=learning_rate,
                height=height,
                width=width,
                channels=channels)

exp_buffer = Buffer(size=buffer_size)
session = tf.Session()
saver = tf.train.Saver(max_to_keep=num_ckpts, reshape=True)
weights = tf.trainable_variables()

update_ops = update_graph(weights)

if load_model == True:
    print('Loading model from', model_dir)
    tf.train.Saver().restore(session, model_dir)
    
elif load_model == False:
    session.run(tf.global_variables_initializer())

game.set_sound_enabled(False)
#game.init()

t = 0
epoch_rank = list()


In [6]:
#Accumulate experiences in the buffer using an epsilon-greedy strategy with three training phases

for epoch in range(epochs):
    epoch_rewards = list()
    game.init()

    for step in trange(steps_per_epoch, leave=True):
        experience = list()
        
        while not game.is_episode_finished():
            state = game.get_state()
            
#Substitute an array of zeros for the depth buffer if that setting is disabled

            if game.is_depth_buffer_enabled() == False:
                state1_buffer = np.moveaxis(state.screen_buffer, 0, 2)
            else:
                depth_buffer = state.depth_buffer
                state1_buffer = np.stack((state.screen_buffer,
                                          depth_buffer), axis=-1)
                
            state1 = preprocess(state1_buffer, down_sample_ratio)
            
#Explore the environment by choosing random actions with 100% probability for the first phase of training

            if epoch < phase1:
                action = np.random.randint(num_actions)
            
#Increase the probability of greedily choosing an action by a constant amount at each epoch in the second phase
            
            elif epoch < phase2:
                epsilon = start_epsilon - (epoch + 1 - phase1)*(start_epsilon - end_epsilon)/(phase2 - phase1)
            
                if np.random.uniform(0, 1) <= epsilon:
                    action = np.random.randint(num_actions)
                else:
                    action = DQN.choose_action(session, state1)[0]

#Select a random action with 10% probability in the final phase of training
                
            else:
                if np.random.uniform(0, 1) <= end_epsilon:
                    action = np.random.randint(num_actions)
                else:
                    action = DQN.choose_action(session, state1)[0]

            reward = game.make_action(actions[action], frame_delay)
            done = game.is_episode_finished()
            
            if done == False:
                state = game.get_state()
                
                if game.is_depth_buffer_enabled() == False:
                    state2_buffer = np.moveaxis(state.screen_buffer, 0, 2)
                else:
                    depth_buffer = state.depth_buffer
                    state2_buffer = np.stack((state.screen_buffer,
                                              depth_buffer), axis=-1)
                
                state2 = preprocess(state2_buffer, down_sample_ratio)
                
            elif done == True:
                state2 = state1
        
#Add the experience obtained from each time step to the buffer

            t += 1
            exp_buffer.add_experience((state1, action, reward, state2, done))
        
#Sample a minibatch from the buffer if there are enough experiences in the buffer

        if exp_buffer.length > batch_size:
            s1, a, r, s2, terminal = exp_buffer.sample_buffer(batch_size)
            
#Get the target values from the target Q-network
            
            target_Q = np.max(target_net.get_Q_values(session, s2), axis=1)
            
#Train the online Q-network by using a minibatch to update the action-value function
            
            Q2 = DQN.get_Q_values(session, s1)
            Q2[np.arange(batch_size), a] = r + gamma*(1 - terminal)*target_Q
            DQN.calculate_loss(session, s1, Q2)
            
        epoch_rewards.append(game.get_total_reward())
    
    game.close()
    
#Increase the discount factor at each epoch until it reaches 0.99
    
    if gamma < 0.99:
        gamma = 1-.98*(1-gamma)
    elif gamma >= 0.99:
        gamma = 0.99
        
#Decrease the learning rate at each epoch

    DQN.update_lr()
    target_net.update_lr()
    
    print('Epoch {} Mean Reward: {}'.format(epoch + 1, np.mean(epoch_rewards)))
    
#Update the target network every 10 epochs
    
    if (epoch + 1) % 10 == 0 and epoch > 0:
        update_target(update_ops, session)
        
#Save the model and test the agent for 10 episodes every 20 epochs
    
    if (epoch + 1) % 10 == 0 and epoch > 0:
        if save_model == True:
            checkpoint = model_dir + '-' + str(epoch + 1)
            print('Epoch {} Model saved to {}'.format(epoch + 1, model_dir))
            saver.save(session, model_dir, global_step=epoch + 1)
            
        update_target(update_ops, session)
        
        print('Epoch {} test:'.format(epoch + 1))
        test_reward = test_agent(DQN, num_episodes=20,
                                 training=True,
                                 load_model=False,
                                 depth=False,
                                 session=session,
                                 model_dir=model_dir)
        print('Epoch {} Average Test Reward: {}'.format(epoch + 1, test_reward))
        
        epoch_rank.append((test_reward, epoch + 1))
        
#Return a sorted list of epoch checkpoints based on average test episode reward
        
print(sorted(epoch_rank, reverse=True))
print('{} time steps experienced during training'.format(t))
#game.close()
    

100%|██████████| 2000/2000 [00:00<00:00, 2679.63it/s]


Epoch 1 Mean Reward: 264.0


100%|██████████| 2000/2000 [00:00<00:00, 2003.62it/s]


Epoch 2 Mean Reward: 377.0


100%|██████████| 2000/2000 [00:01<00:00, 1925.75it/s]


Epoch 3 Mean Reward: 404.0


100%|██████████| 2000/2000 [00:32<00:00, 60.62it/s]


Epoch 4 Mean Reward: 240.0


100%|██████████| 2000/2000 [00:33<00:00, 59.29it/s]


Epoch 5 Mean Reward: 807.0


100%|██████████| 2000/2000 [00:32<00:00, 60.76it/s]


Epoch 6 Mean Reward: 460.0


100%|██████████| 2000/2000 [00:33<00:00, 60.18it/s]


Epoch 7 Mean Reward: 540.0


100%|██████████| 2000/2000 [00:33<00:00, 59.95it/s]


Epoch 8 Mean Reward: 553.0


100%|██████████| 2000/2000 [00:33<00:00, 59.87it/s]


Epoch 9 Mean Reward: 566.0


100%|██████████| 2000/2000 [00:32<00:00, 61.24it/s]


Epoch 10 Mean Reward: 332.0
Epoch 10 Model saved to ./checkpoints/take_cover.ckpt
Epoch 10 test:
Test Episode 1 Reward: 136.0
Test Episode 2 Reward: 240.0
Test Episode 3 Reward: 334.0
Test Episode 4 Reward: 297.0
Test Episode 5 Reward: 334.0
Test Episode 6 Reward: 334.0
Test Episode 7 Reward: 334.0
Test Episode 8 Reward: 535.0
Test Episode 9 Reward: 334.0
Test Episode 10 Reward: 334.0
Test Episode 11 Reward: 334.0
Test Episode 12 Reward: 334.0
Test Episode 13 Reward: 220.0
Test Episode 14 Reward: 276.0
Test Episode 15 Reward: 501.0
Test Episode 16 Reward: 334.0
Test Episode 17 Reward: 334.0
Test Episode 18 Reward: 334.0
Test Episode 19 Reward: 522.0
Test Episode 20 Reward: 320.0
Epoch 10 Average Test Reward: 336.05


100%|██████████| 2000/2000 [00:33<00:00, 60.23it/s]


Epoch 11 Mean Reward: 581.0


100%|██████████| 2000/2000 [00:33<00:00, 60.18it/s]


Epoch 12 Mean Reward: 548.0


100%|██████████| 2000/2000 [00:32<00:00, 61.70it/s]


Epoch 13 Mean Reward: 262.0


100%|██████████| 2000/2000 [00:32<00:00, 61.22it/s]


Epoch 14 Mean Reward: 287.0


100%|██████████| 2000/2000 [00:32<00:00, 61.84it/s]


Epoch 15 Mean Reward: 242.0


100%|██████████| 2000/2000 [00:32<00:00, 61.20it/s]


Epoch 16 Mean Reward: 355.0


100%|██████████| 2000/2000 [00:33<00:00, 59.07it/s]


Epoch 17 Mean Reward: 791.0


100%|██████████| 2000/2000 [00:32<00:00, 61.61it/s]


Epoch 18 Mean Reward: 286.0


100%|██████████| 2000/2000 [00:32<00:00, 61.52it/s]


Epoch 19 Mean Reward: 289.0


100%|██████████| 2000/2000 [00:32<00:00, 62.23it/s]


Epoch 20 Mean Reward: 206.0
Epoch 20 Model saved to ./checkpoints/take_cover.ckpt
Epoch 20 test:
Test Episode 1 Reward: 133.0
Test Episode 2 Reward: 176.0
Test Episode 3 Reward: 285.0
Test Episode 4 Reward: 362.0
Test Episode 5 Reward: 111.0
Test Episode 6 Reward: 285.0
Test Episode 7 Reward: 285.0
Test Episode 8 Reward: 285.0
Test Episode 9 Reward: 285.0
Test Episode 10 Reward: 113.0
Test Episode 11 Reward: 236.0
Test Episode 12 Reward: 216.0
Test Episode 13 Reward: 285.0
Test Episode 14 Reward: 226.0
Test Episode 15 Reward: 99.0
Test Episode 16 Reward: 285.0
Test Episode 17 Reward: 285.0
Test Episode 18 Reward: 285.0
Test Episode 19 Reward: 285.0
Test Episode 20 Reward: 116.0
Epoch 20 Average Test Reward: 231.9


100%|██████████| 2000/2000 [00:32<00:00, 61.83it/s]


Epoch 21 Mean Reward: 271.0


100%|██████████| 2000/2000 [00:33<00:00, 59.71it/s]


Epoch 22 Mean Reward: 655.0


100%|██████████| 2000/2000 [00:32<00:00, 61.52it/s]


Epoch 23 Mean Reward: 324.0


100%|██████████| 2000/2000 [00:32<00:00, 62.06it/s]


Epoch 24 Mean Reward: 214.0


100%|██████████| 2000/2000 [00:33<00:00, 59.35it/s]


Epoch 25 Mean Reward: 665.0


100%|██████████| 2000/2000 [00:32<00:00, 60.90it/s]


Epoch 26 Mean Reward: 346.0


100%|██████████| 2000/2000 [00:32<00:00, 61.90it/s]


Epoch 27 Mean Reward: 221.0


100%|██████████| 2000/2000 [00:32<00:00, 62.42it/s]


Epoch 28 Mean Reward: 168.0


100%|██████████| 2000/2000 [00:33<00:00, 58.89it/s]


Epoch 29 Mean Reward: 857.0


100%|██████████| 2000/2000 [00:32<00:00, 61.78it/s]


Epoch 30 Mean Reward: 298.0
Epoch 30 Model saved to ./checkpoints/take_cover.ckpt
Epoch 30 test:
Test Episode 1 Reward: 261.0
Test Episode 2 Reward: 192.0
Test Episode 3 Reward: 217.0
Test Episode 4 Reward: 261.0
Test Episode 5 Reward: 249.0
Test Episode 6 Reward: 261.0
Test Episode 7 Reward: 216.0
Test Episode 8 Reward: 261.0
Test Episode 9 Reward: 182.0
Test Episode 10 Reward: 115.0
Test Episode 11 Reward: 301.0
Test Episode 12 Reward: 143.0
Test Episode 13 Reward: 320.0
Test Episode 14 Reward: 176.0
Test Episode 15 Reward: 261.0
Test Episode 16 Reward: 178.0
Test Episode 17 Reward: 101.0
Test Episode 18 Reward: 261.0
Test Episode 19 Reward: 223.0
Test Episode 20 Reward: 261.0
Epoch 30 Average Test Reward: 222.0


100%|██████████| 2000/2000 [00:33<00:00, 59.26it/s]


Epoch 31 Mean Reward: 298.0


100%|██████████| 2000/2000 [00:31<00:00, 62.52it/s]


Epoch 32 Mean Reward: 109.0


100%|██████████| 2000/2000 [00:34<00:00, 58.42it/s]


Epoch 33 Mean Reward: 881.0


100%|██████████| 2000/2000 [00:34<00:00, 57.66it/s]


Epoch 34 Mean Reward: 1060.0


100%|██████████| 2000/2000 [00:32<00:00, 61.90it/s]


Epoch 35 Mean Reward: 224.0


100%|██████████| 2000/2000 [00:33<00:00, 60.31it/s]


Epoch 36 Mean Reward: 534.0


100%|██████████| 2000/2000 [00:32<00:00, 61.78it/s]


Epoch 37 Mean Reward: 267.0


100%|██████████| 2000/2000 [00:32<00:00, 61.34it/s]


Epoch 38 Mean Reward: 391.0


100%|██████████| 2000/2000 [00:33<00:00, 60.42it/s]


Epoch 39 Mean Reward: 522.0


100%|██████████| 2000/2000 [00:32<00:00, 62.41it/s]


Epoch 40 Mean Reward: 127.0
Epoch 40 Model saved to ./checkpoints/take_cover.ckpt
Epoch 40 test:
Test Episode 1 Reward: 215.0
Test Episode 2 Reward: 131.0
Test Episode 3 Reward: 318.0
Test Episode 4 Reward: 215.0
Test Episode 5 Reward: 292.0
Test Episode 6 Reward: 215.0
Test Episode 7 Reward: 272.0
Test Episode 8 Reward: 215.0
Test Episode 9 Reward: 279.0
Test Episode 10 Reward: 215.0
Test Episode 11 Reward: 315.0
Test Episode 12 Reward: 203.0
Test Episode 13 Reward: 215.0
Test Episode 14 Reward: 109.0
Test Episode 15 Reward: 215.0
Test Episode 16 Reward: 215.0
Test Episode 17 Reward: 215.0
Test Episode 18 Reward: 221.0
Test Episode 19 Reward: 165.0
Test Episode 20 Reward: 215.0
Epoch 40 Average Test Reward: 222.75


100%|██████████| 2000/2000 [00:32<00:00, 61.18it/s]


Epoch 41 Mean Reward: 319.0


100%|██████████| 2000/2000 [00:32<00:00, 61.33it/s]


Epoch 42 Mean Reward: 305.0


100%|██████████| 2000/2000 [00:32<00:00, 61.32it/s]


Epoch 43 Mean Reward: 347.0


100%|██████████| 2000/2000 [00:33<00:00, 60.39it/s]


Epoch 44 Mean Reward: 813.0


100%|██████████| 2000/2000 [00:32<00:00, 61.72it/s]


Epoch 45 Mean Reward: 565.0


100%|██████████| 2000/2000 [00:31<00:00, 63.00it/s]


Epoch 46 Mean Reward: 271.0


100%|██████████| 2000/2000 [00:31<00:00, 63.38it/s]


Epoch 47 Mean Reward: 253.0


100%|██████████| 2000/2000 [00:32<00:00, 62.39it/s]


Epoch 48 Mean Reward: 429.0


100%|██████████| 2000/2000 [00:31<00:00, 62.98it/s]


Epoch 49 Mean Reward: 369.0


100%|██████████| 2000/2000 [00:31<00:00, 64.06it/s]


Epoch 50 Mean Reward: 154.0
Epoch 50 Model saved to ./checkpoints/take_cover.ckpt
Epoch 50 test:
Test Episode 1 Reward: 334.0
Test Episode 2 Reward: 334.0
Test Episode 3 Reward: 332.0
Test Episode 4 Reward: 334.0
Test Episode 5 Reward: 334.0
Test Episode 6 Reward: 165.0
Test Episode 7 Reward: 334.0
Test Episode 8 Reward: 294.0
Test Episode 9 Reward: 334.0
Test Episode 10 Reward: 173.0
Test Episode 11 Reward: 405.0
Test Episode 12 Reward: 268.0
Test Episode 13 Reward: 334.0
Test Episode 14 Reward: 148.0
Test Episode 15 Reward: 334.0
Test Episode 16 Reward: 328.0
Test Episode 17 Reward: 318.0
Test Episode 18 Reward: 340.0
Test Episode 19 Reward: 281.0
Test Episode 20 Reward: 248.0
Epoch 50 Average Test Reward: 298.6


100%|██████████| 2000/2000 [00:30<00:00, 66.39it/s]


Epoch 51 Mean Reward: 331.0


100%|██████████| 2000/2000 [00:30<00:00, 66.10it/s]


Epoch 52 Mean Reward: 330.0


100%|██████████| 2000/2000 [00:29<00:00, 67.36it/s]


Epoch 53 Mean Reward: 134.0


100%|██████████| 2000/2000 [00:28<00:00, 69.89it/s]


Epoch 54 Mean Reward: 506.0


100%|██████████| 2000/2000 [00:28<00:00, 69.66it/s]


Epoch 55 Mean Reward: 539.0


100%|██████████| 2000/2000 [00:28<00:00, 70.97it/s]


Epoch 56 Mean Reward: 283.0


100%|██████████| 2000/2000 [00:28<00:00, 71.18it/s]


Epoch 57 Mean Reward: 245.0


100%|██████████| 2000/2000 [00:28<00:00, 70.54it/s]


Epoch 58 Mean Reward: 344.0


100%|██████████| 2000/2000 [00:28<00:00, 69.51it/s]


Epoch 59 Mean Reward: 571.0


100%|██████████| 2000/2000 [00:28<00:00, 69.49it/s]


Epoch 60 Mean Reward: 552.0
Epoch 60 Model saved to ./checkpoints/take_cover.ckpt
Epoch 60 test:
Test Episode 1 Reward: 207.0
Test Episode 2 Reward: 274.0
Test Episode 3 Reward: 357.0
Test Episode 4 Reward: 274.0
Test Episode 5 Reward: 274.0
Test Episode 6 Reward: 168.0
Test Episode 7 Reward: 109.0
Test Episode 8 Reward: 274.0
Test Episode 9 Reward: 317.0
Test Episode 10 Reward: 274.0
Test Episode 11 Reward: 190.0
Test Episode 12 Reward: 362.0
Test Episode 13 Reward: 274.0
Test Episode 14 Reward: 361.0
Test Episode 15 Reward: 274.0
Test Episode 16 Reward: 238.0
Test Episode 17 Reward: 390.0
Test Episode 18 Reward: 91.0
Test Episode 19 Reward: 283.0
Test Episode 20 Reward: 274.0
Epoch 60 Average Test Reward: 263.25


100%|██████████| 2000/2000 [00:27<00:00, 73.34it/s]


Epoch 61 Mean Reward: 244.0


100%|██████████| 2000/2000 [00:27<00:00, 71.77it/s]


Epoch 62 Mean Reward: 539.0


100%|██████████| 2000/2000 [00:27<00:00, 73.13it/s]


Epoch 63 Mean Reward: 271.0


100%|██████████| 2000/2000 [00:27<00:00, 73.78it/s]


Epoch 64 Mean Reward: 297.0


100%|██████████| 2000/2000 [00:28<00:00, 69.40it/s]


Epoch 65 Mean Reward: 1001.0


100%|██████████| 2000/2000 [00:27<00:00, 74.01it/s]


Epoch 66 Mean Reward: 270.0


100%|██████████| 2000/2000 [00:27<00:00, 73.89it/s]


Epoch 67 Mean Reward: 295.0


100%|██████████| 2000/2000 [00:26<00:00, 74.52it/s]


Epoch 68 Mean Reward: 263.0


100%|██████████| 2000/2000 [00:26<00:00, 74.29it/s]


Epoch 69 Mean Reward: 280.0


100%|██████████| 2000/2000 [00:26<00:00, 74.21it/s]


Epoch 70 Mean Reward: 278.0
Epoch 70 Model saved to ./checkpoints/take_cover.ckpt
Epoch 70 test:
Test Episode 1 Reward: 219.0
Test Episode 2 Reward: 334.0
Test Episode 3 Reward: 334.0
Test Episode 4 Reward: 187.0
Test Episode 5 Reward: 175.0
Test Episode 6 Reward: 334.0
Test Episode 7 Reward: 334.0
Test Episode 8 Reward: 334.0
Test Episode 9 Reward: 334.0
Test Episode 10 Reward: 187.0
Test Episode 11 Reward: 187.0
Test Episode 12 Reward: 447.0
Test Episode 13 Reward: 334.0
Test Episode 14 Reward: 334.0
Test Episode 15 Reward: 334.0
Test Episode 16 Reward: 334.0
Test Episode 17 Reward: 229.0
Test Episode 18 Reward: 369.0
Test Episode 19 Reward: 203.0
Test Episode 20 Reward: 232.0
Epoch 70 Average Test Reward: 288.75


100%|██████████| 2000/2000 [00:27<00:00, 74.05it/s]


Epoch 71 Mean Reward: 272.0


100%|██████████| 2000/2000 [00:30<00:00, 65.31it/s]


Epoch 72 Mean Reward: 713.0


100%|██████████| 2000/2000 [00:32<00:00, 61.20it/s]


Epoch 73 Mean Reward: 270.0


100%|██████████| 2000/2000 [00:32<00:00, 61.39it/s]


Epoch 74 Mean Reward: 287.0


100%|██████████| 2000/2000 [00:32<00:00, 62.12it/s]


Epoch 75 Mean Reward: 151.0


100%|██████████| 2000/2000 [00:32<00:00, 61.57it/s]


Epoch 76 Mean Reward: 273.0


100%|██████████| 2000/2000 [00:32<00:00, 61.34it/s]


Epoch 77 Mean Reward: 332.0


100%|██████████| 2000/2000 [00:32<00:00, 61.96it/s]


Epoch 78 Mean Reward: 231.0


100%|██████████| 2000/2000 [00:32<00:00, 61.96it/s]


Epoch 79 Mean Reward: 280.0


100%|██████████| 2000/2000 [00:32<00:00, 60.90it/s]


Epoch 80 Mean Reward: 494.0
Epoch 80 Model saved to ./checkpoints/take_cover.ckpt
Epoch 80 test:
Test Episode 1 Reward: 215.0
Test Episode 2 Reward: 215.0
Test Episode 3 Reward: 563.0
Test Episode 4 Reward: 215.0
Test Episode 5 Reward: 307.0
Test Episode 6 Reward: 167.0
Test Episode 7 Reward: 215.0
Test Episode 8 Reward: 147.0
Test Episode 9 Reward: 215.0
Test Episode 10 Reward: 215.0
Test Episode 11 Reward: 215.0
Test Episode 12 Reward: 178.0
Test Episode 13 Reward: 105.0
Test Episode 14 Reward: 215.0
Test Episode 15 Reward: 215.0
Test Episode 16 Reward: 215.0
Test Episode 17 Reward: 215.0
Test Episode 18 Reward: 215.0
Test Episode 19 Reward: 337.0
Test Episode 20 Reward: 174.0
Epoch 80 Average Test Reward: 227.9


100%|██████████| 2000/2000 [00:32<00:00, 62.20it/s]


Epoch 81 Mean Reward: 109.0


100%|██████████| 2000/2000 [00:33<00:00, 59.48it/s]


Epoch 82 Mean Reward: 723.0


100%|██████████| 2000/2000 [00:32<00:00, 62.10it/s]


Epoch 83 Mean Reward: 156.0


100%|██████████| 2000/2000 [00:32<00:00, 61.02it/s]


Epoch 84 Mean Reward: 308.0


100%|██████████| 2000/2000 [00:33<00:00, 60.55it/s]


Epoch 85 Mean Reward: 473.0


100%|██████████| 2000/2000 [00:32<00:00, 60.73it/s]


Epoch 86 Mean Reward: 474.0


100%|██████████| 2000/2000 [00:32<00:00, 61.22it/s]


Epoch 87 Mean Reward: 360.0


100%|██████████| 2000/2000 [00:34<00:00, 58.13it/s]


Epoch 88 Mean Reward: 633.0


100%|██████████| 2000/2000 [00:31<00:00, 62.68it/s]


Epoch 89 Mean Reward: 136.0


100%|██████████| 2000/2000 [00:33<00:00, 59.69it/s]


Epoch 90 Mean Reward: 705.0
Epoch 90 Model saved to ./checkpoints/take_cover.ckpt
Epoch 90 test:
Test Episode 1 Reward: 268.0
Test Episode 2 Reward: 382.0
Test Episode 3 Reward: 214.0
Test Episode 4 Reward: 214.0
Test Episode 5 Reward: 214.0
Test Episode 6 Reward: 104.0
Test Episode 7 Reward: 193.0
Test Episode 8 Reward: 146.0
Test Episode 9 Reward: 214.0
Test Episode 10 Reward: 113.0
Test Episode 11 Reward: 214.0
Test Episode 12 Reward: 286.0
Test Episode 13 Reward: 214.0
Test Episode 14 Reward: 214.0
Test Episode 15 Reward: 122.0
Test Episode 16 Reward: 124.0
Test Episode 17 Reward: 132.0
Test Episode 18 Reward: 196.0
Test Episode 19 Reward: 214.0
Test Episode 20 Reward: 214.0
Epoch 90 Average Test Reward: 199.6


100%|██████████| 2000/2000 [00:32<00:00, 61.42it/s]


Epoch 91 Mean Reward: 319.0


100%|██████████| 2000/2000 [00:32<00:00, 61.59it/s]


Epoch 92 Mean Reward: 286.0


100%|██████████| 2000/2000 [00:33<00:00, 59.61it/s]


Epoch 93 Mean Reward: 681.0


100%|██████████| 2000/2000 [00:33<00:00, 60.24it/s]


Epoch 94 Mean Reward: 512.0


100%|██████████| 2000/2000 [00:32<00:00, 61.45it/s]


Epoch 95 Mean Reward: 275.0


100%|██████████| 2000/2000 [00:32<00:00, 62.12it/s]


Epoch 96 Mean Reward: 108.0


100%|██████████| 2000/2000 [00:33<00:00, 59.60it/s]


Epoch 97 Mean Reward: 586.0


100%|██████████| 2000/2000 [00:32<00:00, 61.18it/s]


Epoch 98 Mean Reward: 271.0


100%|██████████| 2000/2000 [00:32<00:00, 61.58it/s]


Epoch 99 Mean Reward: 273.0


100%|██████████| 2000/2000 [00:32<00:00, 61.16it/s]


Epoch 100 Mean Reward: 317.0
Epoch 100 Model saved to ./checkpoints/take_cover.ckpt
Epoch 100 test:
Test Episode 1 Reward: 320.0
Test Episode 2 Reward: 117.0
Test Episode 3 Reward: 320.0
Test Episode 4 Reward: 320.0
Test Episode 5 Reward: 320.0
Test Episode 6 Reward: 320.0
Test Episode 7 Reward: 320.0
Test Episode 8 Reward: 320.0
Test Episode 9 Reward: 409.0
Test Episode 10 Reward: 320.0
Test Episode 11 Reward: 320.0
Test Episode 12 Reward: 320.0
Test Episode 13 Reward: 320.0
Test Episode 14 Reward: 253.0
Test Episode 15 Reward: 128.0
Test Episode 16 Reward: 133.0
Test Episode 17 Reward: 320.0
Test Episode 18 Reward: 320.0
Test Episode 19 Reward: 320.0
Test Episode 20 Reward: 215.0
Epoch 100 Average Test Reward: 286.75


100%|██████████| 2000/2000 [00:32<00:00, 61.73it/s]


Epoch 101 Mean Reward: 218.0


100%|██████████| 2000/2000 [00:32<00:00, 62.29it/s]


Epoch 102 Mean Reward: 115.0


100%|██████████| 2000/2000 [00:32<00:00, 61.64it/s]


Epoch 103 Mean Reward: 232.0


100%|██████████| 2000/2000 [00:34<00:00, 58.48it/s]


Epoch 104 Mean Reward: 540.0


100%|██████████| 2000/2000 [00:34<00:00, 57.62it/s]


Epoch 105 Mean Reward: 603.0


100%|██████████| 2000/2000 [00:33<00:00, 58.87it/s]


Epoch 106 Mean Reward: 338.0


100%|██████████| 2000/2000 [00:33<00:00, 59.06it/s]


Epoch 107 Mean Reward: 264.0


100%|██████████| 2000/2000 [00:35<00:00, 57.06it/s]


Epoch 108 Mean Reward: 714.0


100%|██████████| 2000/2000 [00:33<00:00, 59.56it/s]


Epoch 109 Mean Reward: 226.0


100%|██████████| 2000/2000 [00:34<00:00, 58.56it/s]


Epoch 110 Mean Reward: 405.0
Epoch 110 Model saved to ./checkpoints/take_cover.ckpt
Epoch 110 test:
Test Episode 1 Reward: 238.0
Test Episode 2 Reward: 238.0
Test Episode 3 Reward: 238.0
Test Episode 4 Reward: 238.0
Test Episode 5 Reward: 238.0
Test Episode 6 Reward: 238.0
Test Episode 7 Reward: 238.0
Test Episode 8 Reward: 256.0
Test Episode 9 Reward: 238.0
Test Episode 10 Reward: 238.0
Test Episode 11 Reward: 238.0
Test Episode 12 Reward: 341.0
Test Episode 13 Reward: 238.0
Test Episode 14 Reward: 239.0
Test Episode 15 Reward: 238.0
Test Episode 16 Reward: 237.0
Test Episode 17 Reward: 306.0
Test Episode 18 Reward: 238.0
Test Episode 19 Reward: 238.0
Test Episode 20 Reward: 187.0
Epoch 110 Average Test Reward: 244.9


100%|██████████| 2000/2000 [00:33<00:00, 60.17it/s]


Epoch 111 Mean Reward: 509.0


100%|██████████| 2000/2000 [00:32<00:00, 61.40it/s]


Epoch 112 Mean Reward: 236.0


100%|██████████| 2000/2000 [00:32<00:00, 62.07it/s]


Epoch 113 Mean Reward: 241.0


100%|██████████| 2000/2000 [00:32<00:00, 61.26it/s]


Epoch 114 Mean Reward: 407.0


100%|██████████| 2000/2000 [00:33<00:00, 60.39it/s]


Epoch 115 Mean Reward: 517.0


100%|██████████| 2000/2000 [00:33<00:00, 60.52it/s]


Epoch 116 Mean Reward: 483.0


100%|██████████| 2000/2000 [00:32<00:00, 61.00it/s]


Epoch 117 Mean Reward: 403.0


100%|██████████| 2000/2000 [00:32<00:00, 61.41it/s]


Epoch 118 Mean Reward: 332.0


100%|██████████| 2000/2000 [00:32<00:00, 61.78it/s]


Epoch 119 Mean Reward: 238.0


100%|██████████| 2000/2000 [00:32<00:00, 62.07it/s]


Epoch 120 Mean Reward: 233.0
Epoch 120 Model saved to ./checkpoints/take_cover.ckpt
Epoch 120 test:
Test Episode 1 Reward: 289.0
Test Episode 2 Reward: 289.0
Test Episode 3 Reward: 218.0
Test Episode 4 Reward: 108.0
Test Episode 5 Reward: 270.0
Test Episode 6 Reward: 289.0
Test Episode 7 Reward: 202.0
Test Episode 8 Reward: 289.0
Test Episode 9 Reward: 289.0
Test Episode 10 Reward: 289.0
Test Episode 11 Reward: 289.0
Test Episode 12 Reward: 338.0
Test Episode 13 Reward: 123.0
Test Episode 14 Reward: 186.0
Test Episode 15 Reward: 289.0
Test Episode 16 Reward: 289.0
Test Episode 17 Reward: 289.0
Test Episode 18 Reward: 247.0
Test Episode 19 Reward: 289.0
Test Episode 20 Reward: 289.0
Epoch 120 Average Test Reward: 258.0


100%|██████████| 2000/2000 [00:27<00:00, 71.71it/s]


Epoch 121 Mean Reward: 509.0


100%|██████████| 2000/2000 [00:26<00:00, 74.68it/s]


Epoch 122 Mean Reward: 392.0


100%|██████████| 2000/2000 [00:26<00:00, 75.26it/s]


Epoch 123 Mean Reward: 237.0


100%|██████████| 2000/2000 [00:26<00:00, 75.26it/s]


Epoch 124 Mean Reward: 252.0


100%|██████████| 2000/2000 [00:27<00:00, 72.62it/s]


Epoch 125 Mean Reward: 607.0


100%|██████████| 2000/2000 [00:26<00:00, 75.74it/s]


Epoch 126 Mean Reward: 172.0


100%|██████████| 2000/2000 [00:26<00:00, 75.10it/s]


Epoch 127 Mean Reward: 289.0


100%|██████████| 2000/2000 [00:26<00:00, 74.82it/s]


Epoch 128 Mean Reward: 340.0


100%|██████████| 2000/2000 [00:26<00:00, 74.33it/s]


Epoch 129 Mean Reward: 202.0


100%|██████████| 2000/2000 [00:27<00:00, 73.86it/s]


Epoch 130 Mean Reward: 171.0
Epoch 130 Model saved to ./checkpoints/take_cover.ckpt
Epoch 130 test:
Test Episode 1 Reward: 316.0
Test Episode 2 Reward: 316.0
Test Episode 3 Reward: 318.0
Test Episode 4 Reward: 316.0
Test Episode 5 Reward: 316.0
Test Episode 6 Reward: 141.0
Test Episode 7 Reward: 185.0
Test Episode 8 Reward: 316.0
Test Episode 9 Reward: 316.0
Test Episode 10 Reward: 115.0
Test Episode 11 Reward: 209.0
Test Episode 12 Reward: 316.0
Test Episode 13 Reward: 171.0
Test Episode 14 Reward: 316.0
Test Episode 15 Reward: 316.0
Test Episode 16 Reward: 194.0
Test Episode 17 Reward: 316.0
Test Episode 18 Reward: 450.0
Test Episode 19 Reward: 316.0
Test Episode 20 Reward: 316.0
Epoch 130 Average Test Reward: 278.75


100%|██████████| 2000/2000 [00:26<00:00, 74.08it/s]


Epoch 131 Mean Reward: 418.0


100%|██████████| 2000/2000 [00:26<00:00, 74.21it/s]


Epoch 132 Mean Reward: 465.0


100%|██████████| 2000/2000 [00:26<00:00, 74.94it/s]


Epoch 133 Mean Reward: 329.0


100%|██████████| 2000/2000 [00:27<00:00, 73.93it/s]


Epoch 134 Mean Reward: 472.0


100%|██████████| 2000/2000 [00:26<00:00, 74.44it/s]


Epoch 135 Mean Reward: 325.0


100%|██████████| 2000/2000 [00:27<00:00, 73.42it/s]


Epoch 136 Mean Reward: 323.0


100%|██████████| 2000/2000 [00:28<00:00, 69.59it/s]


Epoch 137 Mean Reward: 678.0


100%|██████████| 2000/2000 [00:27<00:00, 72.54it/s]


Epoch 138 Mean Reward: 332.0


100%|██████████| 2000/2000 [00:30<00:00, 65.44it/s]


Epoch 139 Mean Reward: 510.0


100%|██████████| 2000/2000 [00:34<00:00, 58.82it/s]


Epoch 140 Mean Reward: 827.0
Epoch 140 Model saved to ./checkpoints/take_cover.ckpt
Epoch 140 test:
Test Episode 1 Reward: 281.0
Test Episode 2 Reward: 281.0
Test Episode 3 Reward: 158.0
Test Episode 4 Reward: 273.0
Test Episode 5 Reward: 290.0
Test Episode 6 Reward: 177.0
Test Episode 7 Reward: 153.0
Test Episode 8 Reward: 281.0
Test Episode 9 Reward: 179.0
Test Episode 10 Reward: 124.0
Test Episode 11 Reward: 265.0
Test Episode 12 Reward: 281.0
Test Episode 13 Reward: 529.0
Test Episode 14 Reward: 281.0
Test Episode 15 Reward: 266.0
Test Episode 16 Reward: 115.0
Test Episode 17 Reward: 307.0
Test Episode 18 Reward: 281.0
Test Episode 19 Reward: 281.0
Test Episode 20 Reward: 281.0
Epoch 140 Average Test Reward: 254.2


100%|██████████| 2000/2000 [00:32<00:00, 61.36it/s]


Epoch 141 Mean Reward: 275.0


100%|██████████| 2000/2000 [00:32<00:00, 61.41it/s]


Epoch 142 Mean Reward: 245.0


100%|██████████| 2000/2000 [00:32<00:00, 60.73it/s]


Epoch 143 Mean Reward: 402.0


100%|██████████| 2000/2000 [00:32<00:00, 60.92it/s]


Epoch 144 Mean Reward: 328.0


100%|██████████| 2000/2000 [00:32<00:00, 61.06it/s]


Epoch 145 Mean Reward: 293.0


100%|██████████| 2000/2000 [00:33<00:00, 59.62it/s]


Epoch 146 Mean Reward: 676.0


100%|██████████| 2000/2000 [00:33<00:00, 60.23it/s]


Epoch 147 Mean Reward: 542.0


100%|██████████| 2000/2000 [00:32<00:00, 61.58it/s]


Epoch 148 Mean Reward: 285.0


100%|██████████| 2000/2000 [00:32<00:00, 61.72it/s]


Epoch 149 Mean Reward: 239.0


100%|██████████| 2000/2000 [00:32<00:00, 61.60it/s]


Epoch 150 Mean Reward: 260.0
Epoch 150 Model saved to ./checkpoints/take_cover.ckpt
Epoch 150 test:
Test Episode 1 Reward: 134.0
Test Episode 2 Reward: 341.0
Test Episode 3 Reward: 341.0
Test Episode 4 Reward: 141.0
Test Episode 5 Reward: 113.0
Test Episode 6 Reward: 341.0
Test Episode 7 Reward: 128.0
Test Episode 8 Reward: 119.0
Test Episode 9 Reward: 216.0
Test Episode 10 Reward: 341.0
Test Episode 11 Reward: 276.0
Test Episode 12 Reward: 341.0
Test Episode 13 Reward: 341.0
Test Episode 14 Reward: 264.0
Test Episode 15 Reward: 341.0
Test Episode 16 Reward: 341.0
Test Episode 17 Reward: 341.0
Test Episode 18 Reward: 341.0
Test Episode 19 Reward: 214.0
Test Episode 20 Reward: 136.0
Epoch 150 Average Test Reward: 257.55


100%|██████████| 2000/2000 [00:33<00:00, 60.17it/s]


Epoch 151 Mean Reward: 537.0


100%|██████████| 2000/2000 [00:32<00:00, 61.12it/s]


Epoch 152 Mean Reward: 383.0


100%|██████████| 2000/2000 [00:32<00:00, 61.26it/s]


Epoch 153 Mean Reward: 347.0


100%|██████████| 2000/2000 [00:32<00:00, 60.79it/s]


Epoch 154 Mean Reward: 375.0


100%|██████████| 2000/2000 [00:32<00:00, 61.86it/s]


Epoch 155 Mean Reward: 219.0


100%|██████████| 2000/2000 [00:33<00:00, 60.37it/s]


Epoch 156 Mean Reward: 561.0


100%|██████████| 2000/2000 [00:32<00:00, 61.98it/s]


Epoch 157 Mean Reward: 208.0


100%|██████████| 2000/2000 [00:32<00:00, 61.48it/s]


Epoch 158 Mean Reward: 213.0


100%|██████████| 2000/2000 [00:32<00:00, 61.24it/s]


Epoch 159 Mean Reward: 247.0


100%|██████████| 2000/2000 [00:33<00:00, 59.61it/s]


Epoch 160 Mean Reward: 350.0
Epoch 160 Model saved to ./checkpoints/take_cover.ckpt
Epoch 160 test:
Test Episode 1 Reward: 245.0
Test Episode 2 Reward: 431.0
Test Episode 3 Reward: 245.0
Test Episode 4 Reward: 312.0
Test Episode 5 Reward: 245.0
Test Episode 6 Reward: 329.0
Test Episode 7 Reward: 261.0
Test Episode 8 Reward: 245.0
Test Episode 9 Reward: 163.0
Test Episode 10 Reward: 245.0
Test Episode 11 Reward: 245.0
Test Episode 12 Reward: 245.0
Test Episode 13 Reward: 151.0
Test Episode 14 Reward: 367.0
Test Episode 15 Reward: 141.0
Test Episode 16 Reward: 245.0
Test Episode 17 Reward: 130.0
Test Episode 18 Reward: 126.0
Test Episode 19 Reward: 245.0
Test Episode 20 Reward: 141.0
Epoch 160 Average Test Reward: 237.85


100%|██████████| 2000/2000 [00:32<00:00, 61.99it/s]


Epoch 161 Mean Reward: 227.0


100%|██████████| 2000/2000 [00:33<00:00, 59.53it/s]


Epoch 162 Mean Reward: 696.0


100%|██████████| 2000/2000 [00:33<00:00, 60.18it/s]


Epoch 163 Mean Reward: 579.0


100%|██████████| 2000/2000 [00:32<00:00, 61.55it/s]


Epoch 164 Mean Reward: 294.0


100%|██████████| 2000/2000 [00:33<00:00, 59.06it/s]


Epoch 165 Mean Reward: 822.0


100%|██████████| 2000/2000 [00:32<00:00, 61.43it/s]


Epoch 166 Mean Reward: 221.0


100%|██████████| 2000/2000 [00:32<00:00, 61.14it/s]


Epoch 167 Mean Reward: 236.0


100%|██████████| 2000/2000 [00:33<00:00, 59.27it/s]


Epoch 168 Mean Reward: 494.0


100%|██████████| 2000/2000 [00:32<00:00, 60.75it/s]


Epoch 169 Mean Reward: 213.0


100%|██████████| 2000/2000 [00:32<00:00, 61.47it/s]


Epoch 170 Mean Reward: 304.0
Epoch 170 Model saved to ./checkpoints/take_cover.ckpt
Epoch 170 test:
Test Episode 1 Reward: 598.0
Test Episode 2 Reward: 263.0
Test Episode 3 Reward: 278.0
Test Episode 4 Reward: 215.0
Test Episode 5 Reward: 215.0
Test Episode 6 Reward: 215.0
Test Episode 7 Reward: 215.0
Test Episode 8 Reward: 237.0
Test Episode 9 Reward: 215.0
Test Episode 10 Reward: 272.0
Test Episode 11 Reward: 215.0
Test Episode 12 Reward: 215.0
Test Episode 13 Reward: 314.0
Test Episode 14 Reward: 193.0
Test Episode 15 Reward: 215.0
Test Episode 16 Reward: 215.0
Test Episode 17 Reward: 215.0
Test Episode 18 Reward: 190.0
Test Episode 19 Reward: 103.0
Test Episode 20 Reward: 259.0
Epoch 170 Average Test Reward: 242.85


100%|██████████| 2000/2000 [00:32<00:00, 60.65it/s]


Epoch 171 Mean Reward: 410.0


100%|██████████| 2000/2000 [00:33<00:00, 60.19it/s]


Epoch 172 Mean Reward: 513.0


100%|██████████| 2000/2000 [00:33<00:00, 60.29it/s]


Epoch 173 Mean Reward: 530.0


100%|██████████| 2000/2000 [00:33<00:00, 60.29it/s]


Epoch 174 Mean Reward: 432.0


100%|██████████| 2000/2000 [00:32<00:00, 60.62it/s]


Epoch 175 Mean Reward: 489.0


100%|██████████| 2000/2000 [00:33<00:00, 59.58it/s]


Epoch 176 Mean Reward: 670.0


100%|██████████| 2000/2000 [00:32<00:00, 61.36it/s]


Epoch 177 Mean Reward: 286.0


100%|██████████| 2000/2000 [00:32<00:00, 61.87it/s]


Epoch 178 Mean Reward: 240.0


100%|██████████| 2000/2000 [00:33<00:00, 58.92it/s]


Epoch 179 Mean Reward: 809.0


100%|██████████| 2000/2000 [00:32<00:00, 61.96it/s]


Epoch 180 Mean Reward: 190.0
Epoch 180 Model saved to ./checkpoints/take_cover.ckpt
Epoch 180 test:
Test Episode 1 Reward: 378.0
Test Episode 2 Reward: 279.0
Test Episode 3 Reward: 279.0
Test Episode 4 Reward: 279.0
Test Episode 5 Reward: 195.0
Test Episode 6 Reward: 279.0
Test Episode 7 Reward: 255.0
Test Episode 8 Reward: 279.0
Test Episode 9 Reward: 151.0
Test Episode 10 Reward: 279.0
Test Episode 11 Reward: 234.0
Test Episode 12 Reward: 279.0
Test Episode 13 Reward: 279.0
Test Episode 14 Reward: 279.0
Test Episode 15 Reward: 209.0
Test Episode 16 Reward: 279.0
Test Episode 17 Reward: 279.0
Test Episode 18 Reward: 231.0
Test Episode 19 Reward: 189.0
Test Episode 20 Reward: 214.0
Epoch 180 Average Test Reward: 256.25


100%|██████████| 2000/2000 [00:33<00:00, 59.05it/s]


Epoch 181 Mean Reward: 711.0


100%|██████████| 2000/2000 [00:32<00:00, 61.48it/s]


Epoch 182 Mean Reward: 324.0


100%|██████████| 2000/2000 [00:33<00:00, 58.84it/s]


Epoch 183 Mean Reward: 812.0


100%|██████████| 2000/2000 [00:33<00:00, 59.55it/s]


Epoch 184 Mean Reward: 320.0


100%|██████████| 2000/2000 [00:32<00:00, 60.69it/s]


Epoch 185 Mean Reward: 296.0


100%|██████████| 2000/2000 [00:33<00:00, 60.27it/s]


Epoch 186 Mean Reward: 559.0


100%|██████████| 2000/2000 [00:32<00:00, 62.34it/s]


Epoch 187 Mean Reward: 138.0


100%|██████████| 2000/2000 [00:33<00:00, 60.36it/s]


Epoch 188 Mean Reward: 500.0


100%|██████████| 2000/2000 [00:32<00:00, 61.13it/s]


Epoch 189 Mean Reward: 306.0


100%|██████████| 2000/2000 [00:32<00:00, 61.90it/s]


Epoch 190 Mean Reward: 181.0
Epoch 190 Model saved to ./checkpoints/take_cover.ckpt
Epoch 190 test:
Test Episode 1 Reward: 246.0
Test Episode 2 Reward: 201.0
Test Episode 3 Reward: 189.0
Test Episode 4 Reward: 246.0
Test Episode 5 Reward: 246.0
Test Episode 6 Reward: 246.0
Test Episode 7 Reward: 246.0
Test Episode 8 Reward: 284.0
Test Episode 9 Reward: 246.0
Test Episode 10 Reward: 246.0
Test Episode 11 Reward: 302.0
Test Episode 12 Reward: 278.0
Test Episode 13 Reward: 246.0
Test Episode 14 Reward: 246.0
Test Episode 15 Reward: 288.0
Test Episode 16 Reward: 246.0
Test Episode 17 Reward: 246.0
Test Episode 18 Reward: 246.0
Test Episode 19 Reward: 235.0
Test Episode 20 Reward: 146.0
Epoch 190 Average Test Reward: 243.75


100%|██████████| 2000/2000 [00:34<00:00, 58.53it/s]


Epoch 191 Mean Reward: 805.0


100%|██████████| 2000/2000 [00:33<00:00, 60.09it/s]


Epoch 192 Mean Reward: 555.0


100%|██████████| 2000/2000 [00:33<00:00, 59.97it/s]


Epoch 193 Mean Reward: 546.0


100%|██████████| 2000/2000 [00:33<00:00, 60.57it/s]


Epoch 194 Mean Reward: 445.0


100%|██████████| 2000/2000 [00:32<00:00, 60.80it/s]


Epoch 195 Mean Reward: 397.0


100%|██████████| 2000/2000 [00:32<00:00, 61.49it/s]


Epoch 196 Mean Reward: 266.0


100%|██████████| 2000/2000 [00:33<00:00, 59.33it/s]


Epoch 197 Mean Reward: 708.0


100%|██████████| 2000/2000 [00:34<00:00, 58.50it/s]


Epoch 198 Mean Reward: 790.0


100%|██████████| 2000/2000 [00:32<00:00, 61.36it/s]


Epoch 199 Mean Reward: 289.0


100%|██████████| 2000/2000 [00:32<00:00, 62.13it/s]


Epoch 200 Mean Reward: 317.0
Epoch 200 Model saved to ./checkpoints/take_cover.ckpt
Epoch 200 test:
Test Episode 1 Reward: 182.0
Test Episode 2 Reward: 332.0
Test Episode 3 Reward: 247.0
Test Episode 4 Reward: 113.0
Test Episode 5 Reward: 247.0
Test Episode 6 Reward: 323.0
Test Episode 7 Reward: 247.0
Test Episode 8 Reward: 247.0
Test Episode 9 Reward: 125.0
Test Episode 10 Reward: 247.0
Test Episode 11 Reward: 200.0
Test Episode 12 Reward: 92.0
Test Episode 13 Reward: 247.0
Test Episode 14 Reward: 111.0
Test Episode 15 Reward: 277.0
Test Episode 16 Reward: 247.0
Test Episode 17 Reward: 232.0
Test Episode 18 Reward: 285.0
Test Episode 19 Reward: 247.0
Test Episode 20 Reward: 219.0
Epoch 200 Average Test Reward: 223.35


100%|██████████| 2000/2000 [00:28<00:00, 70.11it/s]


Epoch 201 Mean Reward: 248.0


100%|██████████| 2000/2000 [00:28<00:00, 70.45it/s]


Epoch 202 Mean Reward: 495.0


100%|██████████| 2000/2000 [00:28<00:00, 71.19it/s]


Epoch 203 Mean Reward: 350.0


100%|██████████| 2000/2000 [00:28<00:00, 70.67it/s]


Epoch 204 Mean Reward: 517.0


100%|██████████| 2000/2000 [00:28<00:00, 69.61it/s]


Epoch 205 Mean Reward: 561.0


100%|██████████| 2000/2000 [00:27<00:00, 71.48it/s]


Epoch 206 Mean Reward: 327.0


100%|██████████| 2000/2000 [00:28<00:00, 70.55it/s]


Epoch 207 Mean Reward: 465.0


100%|██████████| 2000/2000 [00:28<00:00, 71.04it/s]


Epoch 208 Mean Reward: 405.0


100%|██████████| 2000/2000 [00:27<00:00, 71.49it/s]


Epoch 209 Mean Reward: 611.0


100%|██████████| 2000/2000 [00:27<00:00, 72.72it/s]


Epoch 210 Mean Reward: 342.0
Epoch 210 Model saved to ./checkpoints/take_cover.ckpt
Epoch 210 test:
Test Episode 1 Reward: 149.0
Test Episode 2 Reward: 329.0
Test Episode 3 Reward: 329.0
Test Episode 4 Reward: 329.0
Test Episode 5 Reward: 108.0
Test Episode 6 Reward: 104.0
Test Episode 7 Reward: 138.0
Test Episode 8 Reward: 329.0
Test Episode 9 Reward: 329.0
Test Episode 10 Reward: 329.0
Test Episode 11 Reward: 329.0
Test Episode 12 Reward: 329.0
Test Episode 13 Reward: 114.0
Test Episode 14 Reward: 113.0
Test Episode 15 Reward: 175.0
Test Episode 16 Reward: 329.0
Test Episode 17 Reward: 329.0
Test Episode 18 Reward: 224.0
Test Episode 19 Reward: 329.0
Test Episode 20 Reward: 304.0
Epoch 210 Average Test Reward: 252.4


100%|██████████| 2000/2000 [00:32<00:00, 61.54it/s]


Epoch 211 Mean Reward: 287.0


100%|██████████| 2000/2000 [00:32<00:00, 60.91it/s]


Epoch 212 Mean Reward: 396.0


100%|██████████| 2000/2000 [00:33<00:00, 59.62it/s]


Epoch 213 Mean Reward: 601.0


100%|██████████| 2000/2000 [00:34<00:00, 58.29it/s]


Epoch 214 Mean Reward: 873.0


100%|██████████| 2000/2000 [00:32<00:00, 61.83it/s]


Epoch 215 Mean Reward: 224.0


100%|██████████| 2000/2000 [00:32<00:00, 61.54it/s]


Epoch 216 Mean Reward: 275.0


100%|██████████| 2000/2000 [00:32<00:00, 62.36it/s]


Epoch 217 Mean Reward: 128.0


100%|██████████| 2000/2000 [00:33<00:00, 59.07it/s]


Epoch 218 Mean Reward: 754.0


100%|██████████| 2000/2000 [00:32<00:00, 62.36it/s]


Epoch 219 Mean Reward: 185.0


100%|██████████| 2000/2000 [00:32<00:00, 61.63it/s]


Epoch 220 Mean Reward: 286.0
Epoch 220 Model saved to ./checkpoints/take_cover.ckpt
Epoch 220 test:
Test Episode 1 Reward: 254.0
Test Episode 2 Reward: 218.0
Test Episode 3 Reward: 146.0
Test Episode 4 Reward: 254.0
Test Episode 5 Reward: 247.0
Test Episode 6 Reward: 128.0
Test Episode 7 Reward: 95.0
Test Episode 8 Reward: 126.0
Test Episode 9 Reward: 184.0
Test Episode 10 Reward: 95.0
Test Episode 11 Reward: 98.0
Test Episode 12 Reward: 168.0
Test Episode 13 Reward: 424.0
Test Episode 14 Reward: 98.0
Test Episode 15 Reward: 126.0
Test Episode 16 Reward: 254.0
Test Episode 17 Reward: 100.0
Test Episode 18 Reward: 254.0
Test Episode 19 Reward: 254.0
Test Episode 20 Reward: 254.0
Epoch 220 Average Test Reward: 188.85


100%|██████████| 2000/2000 [00:32<00:00, 61.19it/s]


Epoch 221 Mean Reward: 376.0


100%|██████████| 2000/2000 [00:33<00:00, 59.33it/s]


Epoch 222 Mean Reward: 836.0


100%|██████████| 2000/2000 [00:32<00:00, 61.56it/s]


Epoch 223 Mean Reward: 323.0


100%|██████████| 2000/2000 [00:33<00:00, 60.27it/s]


Epoch 224 Mean Reward: 496.0


100%|██████████| 2000/2000 [00:32<00:00, 61.96it/s]


Epoch 225 Mean Reward: 209.0


100%|██████████| 2000/2000 [00:32<00:00, 61.48it/s]


Epoch 226 Mean Reward: 322.0


100%|██████████| 2000/2000 [00:34<00:00, 58.37it/s]


Epoch 227 Mean Reward: 924.0


100%|██████████| 2000/2000 [00:32<00:00, 61.56it/s]


Epoch 228 Mean Reward: 273.0


100%|██████████| 2000/2000 [00:32<00:00, 60.81it/s]


Epoch 229 Mean Reward: 413.0


100%|██████████| 2000/2000 [00:32<00:00, 61.93it/s]


Epoch 230 Mean Reward: 286.0
Epoch 230 Model saved to ./checkpoints/take_cover.ckpt
Epoch 230 test:
Test Episode 1 Reward: 172.0
Test Episode 2 Reward: 366.0
Test Episode 3 Reward: 273.0
Test Episode 4 Reward: 176.0
Test Episode 5 Reward: 145.0
Test Episode 6 Reward: 236.0
Test Episode 7 Reward: 138.0
Test Episode 8 Reward: 273.0
Test Episode 9 Reward: 345.0
Test Episode 10 Reward: 273.0
Test Episode 11 Reward: 273.0
Test Episode 12 Reward: 133.0
Test Episode 13 Reward: 273.0
Test Episode 14 Reward: 214.0
Test Episode 15 Reward: 285.0
Test Episode 16 Reward: 273.0
Test Episode 17 Reward: 273.0
Test Episode 18 Reward: 273.0
Test Episode 19 Reward: 118.0
Test Episode 20 Reward: 274.0
Epoch 230 Average Test Reward: 239.3


100%|██████████| 2000/2000 [00:33<00:00, 60.47it/s]


Epoch 231 Mean Reward: 512.0


100%|██████████| 2000/2000 [00:32<00:00, 61.43it/s]


Epoch 232 Mean Reward: 293.0


100%|██████████| 2000/2000 [00:32<00:00, 61.84it/s]


Epoch 233 Mean Reward: 237.0


100%|██████████| 2000/2000 [00:32<00:00, 61.40it/s]


Epoch 234 Mean Reward: 330.0


100%|██████████| 2000/2000 [00:32<00:00, 62.31it/s]


Epoch 235 Mean Reward: 185.0


100%|██████████| 2000/2000 [00:32<00:00, 61.07it/s]


Epoch 236 Mean Reward: 342.0


100%|██████████| 2000/2000 [00:33<00:00, 60.11it/s]


Epoch 237 Mean Reward: 530.0


100%|██████████| 2000/2000 [00:33<00:00, 60.45it/s]


Epoch 238 Mean Reward: 488.0


100%|██████████| 2000/2000 [00:32<00:00, 60.77it/s]


Epoch 239 Mean Reward: 470.0


100%|██████████| 2000/2000 [00:32<00:00, 61.92it/s]


Epoch 240 Mean Reward: 260.0
Epoch 240 Model saved to ./checkpoints/take_cover.ckpt
Epoch 240 test:
Test Episode 1 Reward: 185.0
Test Episode 2 Reward: 148.0
Test Episode 3 Reward: 376.0
Test Episode 4 Reward: 376.0
Test Episode 5 Reward: 376.0
Test Episode 6 Reward: 116.0
Test Episode 7 Reward: 160.0
Test Episode 8 Reward: 170.0
Test Episode 9 Reward: 376.0
Test Episode 10 Reward: 376.0
Test Episode 11 Reward: 234.0
Test Episode 12 Reward: 143.0
Test Episode 13 Reward: 376.0
Test Episode 14 Reward: 376.0
Test Episode 15 Reward: 172.0
Test Episode 16 Reward: 386.0
Test Episode 17 Reward: 376.0
Test Episode 18 Reward: 376.0
Test Episode 19 Reward: 352.0
Test Episode 20 Reward: 376.0
Epoch 240 Average Test Reward: 291.3


100%|██████████| 2000/2000 [00:35<00:00, 57.00it/s]


Epoch 241 Mean Reward: 763.0


100%|██████████| 2000/2000 [00:34<00:00, 57.78it/s]


Epoch 242 Mean Reward: 566.0


100%|██████████| 2000/2000 [00:33<00:00, 59.55it/s]


Epoch 243 Mean Reward: 534.0


100%|██████████| 2000/2000 [00:32<00:00, 61.85it/s]


Epoch 244 Mean Reward: 204.0


100%|██████████| 2000/2000 [00:32<00:00, 61.63it/s]


Epoch 245 Mean Reward: 345.0


100%|██████████| 2000/2000 [00:32<00:00, 62.40it/s]


Epoch 246 Mean Reward: 160.0


100%|██████████| 2000/2000 [00:32<00:00, 60.93it/s]


Epoch 247 Mean Reward: 491.0


100%|██████████| 2000/2000 [00:32<00:00, 62.14it/s]


Epoch 248 Mean Reward: 183.0


100%|██████████| 2000/2000 [00:32<00:00, 61.07it/s]


Epoch 249 Mean Reward: 395.0


100%|██████████| 2000/2000 [00:32<00:00, 61.64it/s]


Epoch 250 Mean Reward: 272.0
Epoch 250 Model saved to ./checkpoints/take_cover.ckpt
Epoch 250 test:
Test Episode 1 Reward: 285.0
Test Episode 2 Reward: 203.0
Test Episode 3 Reward: 214.0
Test Episode 4 Reward: 214.0
Test Episode 5 Reward: 214.0
Test Episode 6 Reward: 470.0
Test Episode 7 Reward: 113.0
Test Episode 8 Reward: 214.0
Test Episode 9 Reward: 214.0
Test Episode 10 Reward: 252.0
Test Episode 11 Reward: 193.0
Test Episode 12 Reward: 214.0
Test Episode 13 Reward: 214.0
Test Episode 14 Reward: 237.0
Test Episode 15 Reward: 136.0
Test Episode 16 Reward: 111.0
Test Episode 17 Reward: 214.0
Test Episode 18 Reward: 214.0
Test Episode 19 Reward: 426.0
Test Episode 20 Reward: 214.0
Epoch 250 Average Test Reward: 228.3


100%|██████████| 2000/2000 [00:32<00:00, 61.50it/s]


Epoch 251 Mean Reward: 294.0


100%|██████████| 2000/2000 [00:32<00:00, 62.12it/s]


Epoch 252 Mean Reward: 215.0


100%|██████████| 2000/2000 [00:33<00:00, 60.07it/s]


Epoch 253 Mean Reward: 604.0


100%|██████████| 2000/2000 [00:33<00:00, 60.36it/s]


Epoch 254 Mean Reward: 513.0


100%|██████████| 2000/2000 [00:34<00:00, 57.59it/s]


Epoch 255 Mean Reward: 1159.0


100%|██████████| 2000/2000 [00:33<00:00, 60.42it/s]


Epoch 256 Mean Reward: 565.0


100%|██████████| 2000/2000 [00:31<00:00, 63.53it/s]


Epoch 257 Mean Reward: 286.0


100%|██████████| 2000/2000 [00:28<00:00, 70.11it/s]


Epoch 258 Mean Reward: 145.0


100%|██████████| 2000/2000 [00:27<00:00, 74.02it/s]


Epoch 259 Mean Reward: 248.0


100%|██████████| 2000/2000 [00:27<00:00, 72.83it/s]


Epoch 260 Mean Reward: 381.0
Epoch 260 Model saved to ./checkpoints/take_cover.ckpt
Epoch 260 test:
Test Episode 1 Reward: 180.0
Test Episode 2 Reward: 288.0
Test Episode 3 Reward: 283.0
Test Episode 4 Reward: 359.0
Test Episode 5 Reward: 288.0
Test Episode 6 Reward: 288.0
Test Episode 7 Reward: 190.0
Test Episode 8 Reward: 288.0
Test Episode 9 Reward: 288.0
Test Episode 10 Reward: 288.0
Test Episode 11 Reward: 288.0
Test Episode 12 Reward: 288.0
Test Episode 13 Reward: 288.0
Test Episode 14 Reward: 215.0
Test Episode 15 Reward: 288.0
Test Episode 16 Reward: 196.0
Test Episode 17 Reward: 148.0
Test Episode 18 Reward: 327.0
Test Episode 19 Reward: 120.0
Test Episode 20 Reward: 92.0
Epoch 260 Average Test Reward: 249.5


100%|██████████| 2000/2000 [00:27<00:00, 71.74it/s]


Epoch 261 Mean Reward: 428.0


100%|██████████| 2000/2000 [00:27<00:00, 71.71it/s]


Epoch 262 Mean Reward: 509.0


100%|██████████| 2000/2000 [00:29<00:00, 67.51it/s]


Epoch 263 Mean Reward: 1240.0


100%|██████████| 2000/2000 [00:27<00:00, 72.84it/s]


Epoch 264 Mean Reward: 317.0


100%|██████████| 2000/2000 [00:27<00:00, 72.55it/s]


Epoch 265 Mean Reward: 376.0


100%|██████████| 2000/2000 [00:28<00:00, 70.90it/s]


Epoch 266 Mean Reward: 702.0


100%|██████████| 2000/2000 [00:27<00:00, 73.60it/s]


Epoch 267 Mean Reward: 277.0


100%|██████████| 2000/2000 [00:26<00:00, 75.43it/s]


Epoch 268 Mean Reward: 199.0


100%|██████████| 2000/2000 [00:26<00:00, 75.05it/s]


Epoch 269 Mean Reward: 276.0


100%|██████████| 2000/2000 [00:27<00:00, 72.58it/s]


Epoch 270 Mean Reward: 695.0
Epoch 270 Model saved to ./checkpoints/take_cover.ckpt
Epoch 270 test:
Test Episode 1 Reward: 334.0
Test Episode 2 Reward: 213.0
Test Episode 3 Reward: 182.0
Test Episode 4 Reward: 182.0
Test Episode 5 Reward: 213.0
Test Episode 6 Reward: 214.0
Test Episode 7 Reward: 213.0
Test Episode 8 Reward: 213.0
Test Episode 9 Reward: 213.0
Test Episode 10 Reward: 286.0
Test Episode 11 Reward: 353.0
Test Episode 12 Reward: 213.0
Test Episode 13 Reward: 229.0
Test Episode 14 Reward: 132.0
Test Episode 15 Reward: 213.0
Test Episode 16 Reward: 213.0
Test Episode 17 Reward: 192.0
Test Episode 18 Reward: 219.0
Test Episode 19 Reward: 213.0
Test Episode 20 Reward: 213.0
Epoch 270 Average Test Reward: 222.65


100%|██████████| 2000/2000 [00:26<00:00, 74.32it/s]


Epoch 271 Mean Reward: 402.0


100%|██████████| 2000/2000 [00:27<00:00, 72.80it/s]


Epoch 272 Mean Reward: 657.0


100%|██████████| 2000/2000 [00:26<00:00, 74.66it/s]


Epoch 273 Mean Reward: 345.0


100%|██████████| 2000/2000 [00:27<00:00, 74.00it/s]


Epoch 274 Mean Reward: 490.0


100%|██████████| 2000/2000 [00:26<00:00, 74.99it/s]


Epoch 275 Mean Reward: 239.0


100%|██████████| 2000/2000 [00:26<00:00, 74.08it/s]


Epoch 276 Mean Reward: 442.0


100%|██████████| 2000/2000 [00:26<00:00, 74.19it/s]


Epoch 277 Mean Reward: 414.0


100%|██████████| 2000/2000 [00:27<00:00, 73.86it/s]


Epoch 278 Mean Reward: 491.0


100%|██████████| 2000/2000 [00:26<00:00, 74.84it/s]


Epoch 279 Mean Reward: 317.0


100%|██████████| 2000/2000 [00:27<00:00, 72.26it/s]


Epoch 280 Mean Reward: 712.0
Epoch 280 Model saved to ./checkpoints/take_cover.ckpt
Epoch 280 test:
Test Episode 1 Reward: 205.0
Test Episode 2 Reward: 152.0
Test Episode 3 Reward: 158.0
Test Episode 4 Reward: 133.0
Test Episode 5 Reward: 327.0
Test Episode 6 Reward: 117.0
Test Episode 7 Reward: 327.0
Test Episode 8 Reward: 145.0
Test Episode 9 Reward: 434.0
Test Episode 10 Reward: 314.0
Test Episode 11 Reward: 327.0
Test Episode 12 Reward: 327.0
Test Episode 13 Reward: 327.0
Test Episode 14 Reward: 327.0
Test Episode 15 Reward: 324.0
Test Episode 16 Reward: 279.0
Test Episode 17 Reward: 327.0
Test Episode 18 Reward: 327.0
Test Episode 19 Reward: 327.0
Test Episode 20 Reward: 327.0
Epoch 280 Average Test Reward: 276.55


100%|██████████| 2000/2000 [00:26<00:00, 74.71it/s]


Epoch 281 Mean Reward: 337.0


100%|██████████| 2000/2000 [00:27<00:00, 73.32it/s]


Epoch 282 Mean Reward: 511.0


100%|██████████| 2000/2000 [00:27<00:00, 74.01it/s]


Epoch 283 Mean Reward: 388.0


100%|██████████| 2000/2000 [00:26<00:00, 75.11it/s]


Epoch 284 Mean Reward: 247.0


100%|██████████| 2000/2000 [00:27<00:00, 72.67it/s]


Epoch 285 Mean Reward: 627.0


100%|██████████| 2000/2000 [00:27<00:00, 73.92it/s]


Epoch 286 Mean Reward: 224.0


100%|██████████| 2000/2000 [00:27<00:00, 72.01it/s]


Epoch 287 Mean Reward: 457.0


100%|██████████| 2000/2000 [00:27<00:00, 72.21it/s]


Epoch 288 Mean Reward: 245.0


100%|██████████| 2000/2000 [00:28<00:00, 71.25it/s]


Epoch 289 Mean Reward: 616.0


100%|██████████| 2000/2000 [00:27<00:00, 72.95it/s]


Epoch 290 Mean Reward: 262.0
Epoch 290 Model saved to ./checkpoints/take_cover.ckpt
Epoch 290 test:
Test Episode 1 Reward: 172.0
Test Episode 2 Reward: 183.0
Test Episode 3 Reward: 275.0
Test Episode 4 Reward: 275.0
Test Episode 5 Reward: 275.0
Test Episode 6 Reward: 275.0
Test Episode 7 Reward: 275.0
Test Episode 8 Reward: 238.0
Test Episode 9 Reward: 275.0
Test Episode 10 Reward: 275.0
Test Episode 11 Reward: 235.0
Test Episode 12 Reward: 222.0
Test Episode 13 Reward: 214.0
Test Episode 14 Reward: 275.0
Test Episode 15 Reward: 252.0
Test Episode 16 Reward: 181.0
Test Episode 17 Reward: 255.0
Test Episode 18 Reward: 275.0
Test Episode 19 Reward: 151.0
Test Episode 20 Reward: 181.0
Epoch 290 Average Test Reward: 237.95


100%|██████████| 2000/2000 [00:32<00:00, 62.25it/s]


Epoch 291 Mean Reward: 101.0


100%|██████████| 2000/2000 [00:32<00:00, 61.61it/s]


Epoch 292 Mean Reward: 252.0


100%|██████████| 2000/2000 [00:32<00:00, 60.93it/s]


Epoch 293 Mean Reward: 331.0


100%|██████████| 2000/2000 [00:32<00:00, 61.11it/s]


Epoch 294 Mean Reward: 343.0


100%|██████████| 2000/2000 [00:32<00:00, 62.26it/s]


Epoch 295 Mean Reward: 130.0


100%|██████████| 2000/2000 [00:32<00:00, 61.09it/s]


Epoch 296 Mean Reward: 287.0


100%|██████████| 2000/2000 [00:32<00:00, 60.61it/s]


Epoch 297 Mean Reward: 464.0


100%|██████████| 2000/2000 [00:33<00:00, 59.89it/s]


Epoch 298 Mean Reward: 561.0


100%|██████████| 2000/2000 [00:32<00:00, 60.78it/s]


Epoch 299 Mean Reward: 400.0


100%|██████████| 2000/2000 [00:32<00:00, 60.97it/s]


Epoch 300 Mean Reward: 346.0
Epoch 300 Model saved to ./checkpoints/take_cover.ckpt
Epoch 300 test:
Test Episode 1 Reward: 412.0
Test Episode 2 Reward: 122.0
Test Episode 3 Reward: 412.0
Test Episode 4 Reward: 412.0
Test Episode 5 Reward: 412.0
Test Episode 6 Reward: 393.0
Test Episode 7 Reward: 293.0
Test Episode 8 Reward: 110.0
Test Episode 9 Reward: 412.0
Test Episode 10 Reward: 412.0
Test Episode 11 Reward: 161.0
Test Episode 12 Reward: 168.0
Test Episode 13 Reward: 516.0
Test Episode 14 Reward: 412.0
Test Episode 15 Reward: 100.0
Test Episode 16 Reward: 412.0
Test Episode 17 Reward: 412.0
Test Episode 18 Reward: 412.0
Test Episode 19 Reward: 317.0
Test Episode 20 Reward: 330.0
Epoch 300 Average Test Reward: 331.5


100%|██████████| 2000/2000 [00:32<00:00, 61.22it/s]


Epoch 301 Mean Reward: 320.0


100%|██████████| 2000/2000 [00:33<00:00, 60.21it/s]


Epoch 302 Mean Reward: 276.0


100%|██████████| 2000/2000 [00:33<00:00, 60.55it/s]


Epoch 303 Mean Reward: 218.0


100%|██████████| 2000/2000 [00:33<00:00, 60.06it/s]


Epoch 304 Mean Reward: 542.0


100%|██████████| 2000/2000 [00:33<00:00, 60.45it/s]


Epoch 305 Mean Reward: 488.0


100%|██████████| 2000/2000 [00:32<00:00, 61.76it/s]


Epoch 306 Mean Reward: 201.0


100%|██████████| 2000/2000 [00:33<00:00, 60.34it/s]


Epoch 307 Mean Reward: 513.0


100%|██████████| 2000/2000 [00:32<00:00, 61.54it/s]


Epoch 308 Mean Reward: 201.0


100%|██████████| 2000/2000 [00:32<00:00, 61.63it/s]


Epoch 309 Mean Reward: 169.0


100%|██████████| 2000/2000 [00:32<00:00, 61.12it/s]


Epoch 310 Mean Reward: 317.0
Epoch 310 Model saved to ./checkpoints/take_cover.ckpt
Epoch 310 test:
Test Episode 1 Reward: 222.0
Test Episode 2 Reward: 311.0
Test Episode 3 Reward: 245.0
Test Episode 4 Reward: 216.0
Test Episode 5 Reward: 220.0
Test Episode 6 Reward: 237.0
Test Episode 7 Reward: 245.0
Test Episode 8 Reward: 220.0
Test Episode 9 Reward: 245.0
Test Episode 10 Reward: 245.0
Test Episode 11 Reward: 256.0
Test Episode 12 Reward: 245.0
Test Episode 13 Reward: 388.0
Test Episode 14 Reward: 245.0
Test Episode 15 Reward: 258.0
Test Episode 16 Reward: 264.0
Test Episode 17 Reward: 165.0
Test Episode 18 Reward: 178.0
Test Episode 19 Reward: 294.0
Test Episode 20 Reward: 245.0
Epoch 310 Average Test Reward: 247.2


100%|██████████| 2000/2000 [00:33<00:00, 59.86it/s]


Epoch 311 Mean Reward: 597.0


100%|██████████| 2000/2000 [00:32<00:00, 61.70it/s]


Epoch 312 Mean Reward: 245.0


100%|██████████| 2000/2000 [00:33<00:00, 60.36it/s]


Epoch 313 Mean Reward: 484.0


100%|██████████| 2000/2000 [00:32<00:00, 61.15it/s]


Epoch 314 Mean Reward: 344.0


100%|██████████| 2000/2000 [00:33<00:00, 60.15it/s]


Epoch 315 Mean Reward: 550.0


100%|██████████| 2000/2000 [00:33<00:00, 60.53it/s]


Epoch 316 Mean Reward: 454.0


100%|██████████| 2000/2000 [00:32<00:00, 61.67it/s]


Epoch 317 Mean Reward: 251.0


100%|██████████| 2000/2000 [00:34<00:00, 58.12it/s]


Epoch 318 Mean Reward: 909.0


100%|██████████| 2000/2000 [00:34<00:00, 58.09it/s]


Epoch 319 Mean Reward: 933.0


100%|██████████| 2000/2000 [00:33<00:00, 60.56it/s]


Epoch 320 Mean Reward: 428.0
Epoch 320 Model saved to ./checkpoints/take_cover.ckpt
Epoch 320 test:
Test Episode 1 Reward: 284.0
Test Episode 2 Reward: 250.0
Test Episode 3 Reward: 92.0
Test Episode 4 Reward: 109.0
Test Episode 5 Reward: 250.0
Test Episode 6 Reward: 250.0
Test Episode 7 Reward: 221.0
Test Episode 8 Reward: 250.0
Test Episode 9 Reward: 180.0
Test Episode 10 Reward: 112.0
Test Episode 11 Reward: 224.0
Test Episode 12 Reward: 250.0
Test Episode 13 Reward: 250.0
Test Episode 14 Reward: 250.0
Test Episode 15 Reward: 370.0
Test Episode 16 Reward: 250.0
Test Episode 17 Reward: 250.0
Test Episode 18 Reward: 250.0
Test Episode 19 Reward: 145.0
Test Episode 20 Reward: 207.0
Epoch 320 Average Test Reward: 222.2


100%|██████████| 2000/2000 [00:32<00:00, 60.77it/s]


Epoch 321 Mean Reward: 373.0


100%|██████████| 2000/2000 [00:33<00:00, 59.94it/s]


Epoch 322 Mean Reward: 563.0


100%|██████████| 2000/2000 [00:32<00:00, 61.04it/s]


Epoch 323 Mean Reward: 261.0


100%|██████████| 2000/2000 [00:32<00:00, 60.75it/s]


Epoch 324 Mean Reward: 281.0


100%|██████████| 2000/2000 [00:32<00:00, 62.15it/s]


Epoch 325 Mean Reward: 106.0


100%|██████████| 2000/2000 [00:33<00:00, 60.07it/s]


Epoch 326 Mean Reward: 504.0


100%|██████████| 2000/2000 [00:33<00:00, 60.54it/s]


Epoch 327 Mean Reward: 426.0


100%|██████████| 2000/2000 [00:32<00:00, 61.46it/s]


Epoch 328 Mean Reward: 279.0


100%|██████████| 2000/2000 [00:33<00:00, 60.30it/s]


Epoch 329 Mean Reward: 511.0


100%|██████████| 2000/2000 [00:32<00:00, 61.30it/s]


Epoch 330 Mean Reward: 272.0
Epoch 330 Model saved to ./checkpoints/take_cover.ckpt
Epoch 330 test:
Test Episode 1 Reward: 330.0
Test Episode 2 Reward: 330.0
Test Episode 3 Reward: 330.0
Test Episode 4 Reward: 248.0
Test Episode 5 Reward: 114.0
Test Episode 6 Reward: 257.0
Test Episode 7 Reward: 339.0
Test Episode 8 Reward: 451.0
Test Episode 9 Reward: 181.0
Test Episode 10 Reward: 204.0
Test Episode 11 Reward: 330.0
Test Episode 12 Reward: 330.0
Test Episode 13 Reward: 128.0
Test Episode 14 Reward: 330.0
Test Episode 15 Reward: 330.0
Test Episode 16 Reward: 330.0
Test Episode 17 Reward: 330.0
Test Episode 18 Reward: 197.0
Test Episode 19 Reward: 155.0
Test Episode 20 Reward: 330.0
Epoch 330 Average Test Reward: 278.7


100%|██████████| 2000/2000 [00:32<00:00, 61.47it/s]


Epoch 331 Mean Reward: 209.0


100%|██████████| 2000/2000 [00:32<00:00, 61.69it/s]


Epoch 332 Mean Reward: 166.0


100%|██████████| 2000/2000 [00:33<00:00, 59.72it/s]


Epoch 333 Mean Reward: 508.0


100%|██████████| 2000/2000 [00:33<00:00, 59.62it/s]


Epoch 334 Mean Reward: 510.0


100%|██████████| 2000/2000 [00:33<00:00, 60.09it/s]


Epoch 335 Mean Reward: 484.0


100%|██████████| 2000/2000 [00:33<00:00, 60.17it/s]


Epoch 336 Mean Reward: 478.0


100%|██████████| 2000/2000 [00:33<00:00, 58.99it/s]


Epoch 337 Mean Reward: 766.0


100%|██████████| 2000/2000 [00:33<00:00, 59.97it/s]


Epoch 338 Mean Reward: 528.0


100%|██████████| 2000/2000 [00:32<00:00, 60.79it/s]


Epoch 339 Mean Reward: 286.0


100%|██████████| 2000/2000 [00:33<00:00, 59.38it/s]


Epoch 340 Mean Reward: 293.0
Epoch 340 Model saved to ./checkpoints/take_cover.ckpt
Epoch 340 test:
Test Episode 1 Reward: 106.0
Test Episode 2 Reward: 249.0
Test Episode 3 Reward: 256.0
Test Episode 4 Reward: 249.0
Test Episode 5 Reward: 261.0
Test Episode 6 Reward: 113.0
Test Episode 7 Reward: 249.0
Test Episode 8 Reward: 249.0
Test Episode 9 Reward: 249.0
Test Episode 10 Reward: 249.0
Test Episode 11 Reward: 249.0
Test Episode 12 Reward: 249.0
Test Episode 13 Reward: 249.0
Test Episode 14 Reward: 336.0
Test Episode 15 Reward: 249.0
Test Episode 16 Reward: 124.0
Test Episode 17 Reward: 249.0
Test Episode 18 Reward: 212.0
Test Episode 19 Reward: 142.0
Test Episode 20 Reward: 249.0
Epoch 340 Average Test Reward: 226.9


100%|██████████| 2000/2000 [00:32<00:00, 60.61it/s]


Epoch 341 Mean Reward: 375.0


100%|██████████| 2000/2000 [00:32<00:00, 61.20it/s]


Epoch 342 Mean Reward: 211.0


100%|██████████| 2000/2000 [00:32<00:00, 61.23it/s]


Epoch 343 Mean Reward: 298.0


100%|██████████| 2000/2000 [00:32<00:00, 61.39it/s]


Epoch 344 Mean Reward: 238.0


100%|██████████| 2000/2000 [00:33<00:00, 60.42it/s]


Epoch 345 Mean Reward: 455.0


100%|██████████| 2000/2000 [00:32<00:00, 61.10it/s]


Epoch 346 Mean Reward: 240.0


100%|██████████| 2000/2000 [00:33<00:00, 60.28it/s]


Epoch 347 Mean Reward: 384.0


100%|██████████| 2000/2000 [00:32<00:00, 61.17it/s]


Epoch 348 Mean Reward: 243.0


100%|██████████| 2000/2000 [00:32<00:00, 61.69it/s]


Epoch 349 Mean Reward: 170.0


100%|██████████| 2000/2000 [00:34<00:00, 58.38it/s]


Epoch 350 Mean Reward: 809.0
Epoch 350 Model saved to ./checkpoints/take_cover.ckpt
Epoch 350 test:
Test Episode 1 Reward: 122.0
Test Episode 2 Reward: 401.0
Test Episode 3 Reward: 401.0
Test Episode 4 Reward: 401.0
Test Episode 5 Reward: 401.0
Test Episode 6 Reward: 401.0
Test Episode 7 Reward: 209.0
Test Episode 8 Reward: 401.0
Test Episode 9 Reward: 401.0
Test Episode 10 Reward: 401.0
Test Episode 11 Reward: 100.0
Test Episode 12 Reward: 401.0
Test Episode 13 Reward: 290.0
Test Episode 14 Reward: 502.0
Test Episode 15 Reward: 401.0
Test Episode 16 Reward: 401.0
Test Episode 17 Reward: 202.0
Test Episode 18 Reward: 114.0
Test Episode 19 Reward: 163.0
Test Episode 20 Reward: 401.0
Epoch 350 Average Test Reward: 325.7


100%|██████████| 2000/2000 [00:28<00:00, 69.25it/s]


Epoch 351 Mean Reward: 564.0


100%|██████████| 2000/2000 [00:30<00:00, 66.09it/s]


Epoch 352 Mean Reward: 372.0


100%|██████████| 2000/2000 [00:29<00:00, 66.95it/s]


Epoch 353 Mean Reward: 483.0


100%|██████████| 2000/2000 [00:29<00:00, 67.52it/s]


Epoch 354 Mean Reward: 252.0


100%|██████████| 2000/2000 [00:30<00:00, 65.31it/s]


Epoch 355 Mean Reward: 457.0


100%|██████████| 2000/2000 [00:27<00:00, 73.27it/s]


Epoch 356 Mean Reward: 243.0


100%|██████████| 2000/2000 [00:27<00:00, 71.86it/s]


Epoch 357 Mean Reward: 395.0


100%|██████████| 2000/2000 [00:27<00:00, 71.96it/s]


Epoch 358 Mean Reward: 327.0


100%|██████████| 2000/2000 [00:31<00:00, 63.80it/s]


Epoch 359 Mean Reward: 318.0


100%|██████████| 2000/2000 [00:33<00:00, 60.28it/s]


Epoch 360 Mean Reward: 324.0
Epoch 360 Model saved to ./checkpoints/take_cover.ckpt
Epoch 360 test:
Test Episode 1 Reward: 167.0
Test Episode 2 Reward: 194.0
Test Episode 3 Reward: 258.0
Test Episode 4 Reward: 258.0
Test Episode 5 Reward: 258.0
Test Episode 6 Reward: 114.0
Test Episode 7 Reward: 258.0
Test Episode 8 Reward: 258.0
Test Episode 9 Reward: 264.0
Test Episode 10 Reward: 258.0
Test Episode 11 Reward: 197.0
Test Episode 12 Reward: 258.0
Test Episode 13 Reward: 342.0
Test Episode 14 Reward: 165.0
Test Episode 15 Reward: 127.0
Test Episode 16 Reward: 112.0
Test Episode 17 Reward: 258.0
Test Episode 18 Reward: 258.0
Test Episode 19 Reward: 229.0
Test Episode 20 Reward: 147.0
Epoch 360 Average Test Reward: 219.0


100%|██████████| 2000/2000 [00:33<00:00, 59.13it/s]


Epoch 361 Mean Reward: 138.0


100%|██████████| 2000/2000 [00:32<00:00, 61.13it/s]


Epoch 362 Mean Reward: 285.0


100%|██████████| 2000/2000 [00:32<00:00, 62.13it/s]


Epoch 363 Mean Reward: 231.0


100%|██████████| 2000/2000 [00:32<00:00, 61.97it/s]


Epoch 364 Mean Reward: 218.0


100%|██████████| 2000/2000 [00:31<00:00, 62.60it/s]


Epoch 365 Mean Reward: 132.0


100%|██████████| 2000/2000 [00:32<00:00, 61.58it/s]


Epoch 366 Mean Reward: 265.0


100%|██████████| 2000/2000 [00:34<00:00, 57.90it/s]


Epoch 367 Mean Reward: 1052.0


100%|██████████| 2000/2000 [00:32<00:00, 61.67it/s]


Epoch 368 Mean Reward: 265.0


100%|██████████| 2000/2000 [00:32<00:00, 60.64it/s]


Epoch 369 Mean Reward: 473.0


100%|██████████| 2000/2000 [00:32<00:00, 61.47it/s]


Epoch 370 Mean Reward: 311.0
Epoch 370 Model saved to ./checkpoints/take_cover.ckpt
Epoch 370 test:
Test Episode 1 Reward: 203.0
Test Episode 2 Reward: 175.0
Test Episode 3 Reward: 332.0
Test Episode 4 Reward: 148.0
Test Episode 5 Reward: 332.0
Test Episode 6 Reward: 121.0
Test Episode 7 Reward: 210.0
Test Episode 8 Reward: 332.0
Test Episode 9 Reward: 332.0
Test Episode 10 Reward: 332.0
Test Episode 11 Reward: 276.0
Test Episode 12 Reward: 193.0
Test Episode 13 Reward: 332.0
Test Episode 14 Reward: 130.0
Test Episode 15 Reward: 332.0
Test Episode 16 Reward: 332.0
Test Episode 17 Reward: 332.0
Test Episode 18 Reward: 332.0
Test Episode 19 Reward: 332.0
Test Episode 20 Reward: 371.0
Epoch 370 Average Test Reward: 273.95


100%|██████████| 2000/2000 [00:34<00:00, 57.43it/s]


Epoch 371 Mean Reward: 670.0


100%|██████████| 2000/2000 [00:34<00:00, 57.68it/s]


Epoch 372 Mean Reward: 562.0


100%|██████████| 2000/2000 [00:34<00:00, 58.31it/s]


Epoch 373 Mean Reward: 380.0


100%|██████████| 2000/2000 [00:33<00:00, 59.84it/s]


Epoch 374 Mean Reward: 192.0


100%|██████████| 2000/2000 [00:33<00:00, 59.88it/s]


Epoch 375 Mean Reward: 152.0


100%|██████████| 2000/2000 [00:34<00:00, 57.80it/s]


Epoch 376 Mean Reward: 430.0


100%|██████████| 2000/2000 [00:34<00:00, 58.29it/s]


Epoch 377 Mean Reward: 269.0


100%|██████████| 2000/2000 [00:34<00:00, 58.52it/s]


Epoch 378 Mean Reward: 290.0


100%|██████████| 2000/2000 [00:34<00:00, 58.55it/s]


Epoch 379 Mean Reward: 483.0


100%|██████████| 2000/2000 [00:33<00:00, 59.67it/s]


Epoch 380 Mean Reward: 546.0
Epoch 380 Model saved to ./checkpoints/take_cover.ckpt
Epoch 380 test:
Test Episode 1 Reward: 189.0
Test Episode 2 Reward: 213.0
Test Episode 3 Reward: 206.0
Test Episode 4 Reward: 182.0
Test Episode 5 Reward: 109.0
Test Episode 6 Reward: 213.0
Test Episode 7 Reward: 213.0
Test Episode 8 Reward: 118.0
Test Episode 9 Reward: 213.0
Test Episode 10 Reward: 213.0
Test Episode 11 Reward: 321.0
Test Episode 12 Reward: 150.0
Test Episode 13 Reward: 271.0
Test Episode 14 Reward: 109.0
Test Episode 15 Reward: 125.0
Test Episode 16 Reward: 213.0
Test Episode 17 Reward: 109.0
Test Episode 18 Reward: 102.0
Test Episode 19 Reward: 108.0
Test Episode 20 Reward: 112.0
Epoch 380 Average Test Reward: 174.45


100%|██████████| 2000/2000 [00:32<00:00, 61.08it/s]


Epoch 381 Mean Reward: 401.0


100%|██████████| 2000/2000 [00:33<00:00, 60.48it/s]


Epoch 382 Mean Reward: 550.0


100%|██████████| 2000/2000 [00:33<00:00, 59.82it/s]


Epoch 383 Mean Reward: 607.0


100%|██████████| 2000/2000 [00:33<00:00, 60.40it/s]


Epoch 384 Mean Reward: 517.0


100%|██████████| 2000/2000 [00:32<00:00, 61.50it/s]


Epoch 385 Mean Reward: 317.0


100%|██████████| 2000/2000 [00:33<00:00, 60.60it/s]


Epoch 386 Mean Reward: 561.0


100%|██████████| 2000/2000 [00:34<00:00, 58.65it/s]


Epoch 387 Mean Reward: 861.0


100%|██████████| 2000/2000 [00:33<00:00, 60.36it/s]


Epoch 388 Mean Reward: 548.0


100%|██████████| 2000/2000 [00:32<00:00, 61.35it/s]


Epoch 389 Mean Reward: 313.0


100%|██████████| 2000/2000 [00:32<00:00, 61.27it/s]


Epoch 390 Mean Reward: 398.0
Epoch 390 Model saved to ./checkpoints/take_cover.ckpt
Epoch 390 test:
Test Episode 1 Reward: 277.0
Test Episode 2 Reward: 239.0
Test Episode 3 Reward: 177.0
Test Episode 4 Reward: 143.0
Test Episode 5 Reward: 186.0
Test Episode 6 Reward: 184.0
Test Episode 7 Reward: 370.0
Test Episode 8 Reward: 172.0
Test Episode 9 Reward: 400.0
Test Episode 10 Reward: 277.0
Test Episode 11 Reward: 165.0
Test Episode 12 Reward: 277.0
Test Episode 13 Reward: 277.0
Test Episode 14 Reward: 277.0
Test Episode 15 Reward: 277.0
Test Episode 16 Reward: 277.0
Test Episode 17 Reward: 112.0
Test Episode 18 Reward: 237.0
Test Episode 19 Reward: 277.0
Test Episode 20 Reward: 167.0
Epoch 390 Average Test Reward: 238.4


100%|██████████| 2000/2000 [00:34<00:00, 58.32it/s]


Epoch 391 Mean Reward: 902.0


100%|██████████| 2000/2000 [00:32<00:00, 62.26it/s]


Epoch 392 Mean Reward: 238.0


100%|██████████| 2000/2000 [00:32<00:00, 62.26it/s]


Epoch 393 Mean Reward: 242.0


100%|██████████| 2000/2000 [00:33<00:00, 60.57it/s]


Epoch 394 Mean Reward: 542.0


100%|██████████| 2000/2000 [00:31<00:00, 62.59it/s]


Epoch 395 Mean Reward: 121.0


100%|██████████| 2000/2000 [00:33<00:00, 60.51it/s]


Epoch 396 Mean Reward: 299.0


100%|██████████| 2000/2000 [00:32<00:00, 61.54it/s]


Epoch 397 Mean Reward: 246.0


100%|██████████| 2000/2000 [00:31<00:00, 62.51it/s]


Epoch 398 Mean Reward: 139.0


100%|██████████| 2000/2000 [00:32<00:00, 61.04it/s]


Epoch 399 Mean Reward: 413.0


100%|██████████| 2000/2000 [00:32<00:00, 60.62it/s]


Epoch 400 Mean Reward: 541.0
Epoch 400 Model saved to ./checkpoints/take_cover.ckpt
Epoch 400 test:
Test Episode 1 Reward: 315.0
Test Episode 2 Reward: 121.0
Test Episode 3 Reward: 263.0
Test Episode 4 Reward: 640.0
Test Episode 5 Reward: 121.0
Test Episode 6 Reward: 250.0
Test Episode 7 Reward: 250.0
Test Episode 8 Reward: 250.0
Test Episode 9 Reward: 175.0
Test Episode 10 Reward: 219.0
Test Episode 11 Reward: 250.0
Test Episode 12 Reward: 250.0
Test Episode 13 Reward: 190.0
Test Episode 14 Reward: 118.0
Test Episode 15 Reward: 127.0
Test Episode 16 Reward: 250.0
Test Episode 17 Reward: 250.0
Test Episode 18 Reward: 133.0
Test Episode 19 Reward: 250.0
Test Episode 20 Reward: 438.0
Epoch 400 Average Test Reward: 243.0


100%|██████████| 2000/2000 [00:32<00:00, 61.94it/s]


Epoch 401 Mean Reward: 313.0


100%|██████████| 2000/2000 [00:32<00:00, 62.13it/s]


Epoch 402 Mean Reward: 266.0


100%|██████████| 2000/2000 [00:31<00:00, 63.98it/s]


Epoch 403 Mean Reward: 769.0


100%|██████████| 2000/2000 [00:28<00:00, 70.42it/s]


Epoch 404 Mean Reward: 351.0


100%|██████████| 2000/2000 [00:28<00:00, 69.70it/s]


Epoch 405 Mean Reward: 425.0


100%|██████████| 2000/2000 [00:27<00:00, 71.49it/s]


Epoch 406 Mean Reward: 554.0


100%|██████████| 2000/2000 [00:27<00:00, 73.13it/s]


Epoch 407 Mean Reward: 330.0


100%|██████████| 2000/2000 [00:27<00:00, 72.49it/s]


Epoch 408 Mean Reward: 408.0


100%|██████████| 2000/2000 [00:27<00:00, 72.21it/s]


Epoch 409 Mean Reward: 376.0


100%|██████████| 2000/2000 [00:27<00:00, 72.38it/s]


Epoch 410 Mean Reward: 435.0
Epoch 410 Model saved to ./checkpoints/take_cover.ckpt
Epoch 410 test:
Test Episode 1 Reward: 397.0
Test Episode 2 Reward: 397.0
Test Episode 3 Reward: 397.0
Test Episode 4 Reward: 260.0
Test Episode 5 Reward: 397.0
Test Episode 6 Reward: 397.0
Test Episode 7 Reward: 159.0
Test Episode 8 Reward: 169.0
Test Episode 9 Reward: 397.0
Test Episode 10 Reward: 131.0
Test Episode 11 Reward: 298.0
Test Episode 12 Reward: 146.0
Test Episode 13 Reward: 397.0
Test Episode 14 Reward: 397.0
Test Episode 15 Reward: 187.0
Test Episode 16 Reward: 397.0
Test Episode 17 Reward: 397.0
Test Episode 18 Reward: 341.0
Test Episode 19 Reward: 397.0
Test Episode 20 Reward: 397.0
Epoch 410 Average Test Reward: 322.75


100%|██████████| 2000/2000 [00:33<00:00, 59.00it/s]


Epoch 411 Mean Reward: 795.0


100%|██████████| 2000/2000 [00:33<00:00, 60.10it/s]


Epoch 412 Mean Reward: 216.0


100%|██████████| 2000/2000 [00:33<00:00, 60.30it/s]


Epoch 413 Mean Reward: 410.0


100%|██████████| 2000/2000 [00:32<00:00, 61.55it/s]


Epoch 414 Mean Reward: 237.0


100%|██████████| 2000/2000 [00:33<00:00, 60.27it/s]


Epoch 415 Mean Reward: 541.0


100%|██████████| 2000/2000 [00:32<00:00, 61.30it/s]


Epoch 416 Mean Reward: 311.0


100%|██████████| 2000/2000 [00:32<00:00, 61.99it/s]


Epoch 417 Mean Reward: 146.0


100%|██████████| 2000/2000 [00:32<00:00, 62.31it/s]


Epoch 418 Mean Reward: 94.0


100%|██████████| 2000/2000 [00:32<00:00, 61.23it/s]


Epoch 419 Mean Reward: 270.0


100%|██████████| 2000/2000 [00:33<00:00, 59.08it/s]


Epoch 420 Mean Reward: 750.0
Epoch 420 Model saved to ./checkpoints/take_cover.ckpt
Epoch 420 test:
Test Episode 1 Reward: 419.0
Test Episode 2 Reward: 273.0
Test Episode 3 Reward: 297.0
Test Episode 4 Reward: 273.0
Test Episode 5 Reward: 273.0
Test Episode 6 Reward: 273.0
Test Episode 7 Reward: 121.0
Test Episode 8 Reward: 273.0
Test Episode 9 Reward: 211.0
Test Episode 10 Reward: 273.0
Test Episode 11 Reward: 273.0
Test Episode 12 Reward: 273.0
Test Episode 13 Reward: 273.0
Test Episode 14 Reward: 273.0
Test Episode 15 Reward: 273.0
Test Episode 16 Reward: 273.0
Test Episode 17 Reward: 219.0
Test Episode 18 Reward: 120.0
Test Episode 19 Reward: 273.0
Test Episode 20 Reward: 273.0
Epoch 420 Average Test Reward: 260.45


100%|██████████| 2000/2000 [00:32<00:00, 61.18it/s]


Epoch 421 Mean Reward: 293.0


100%|██████████| 2000/2000 [00:32<00:00, 61.02it/s]


Epoch 422 Mean Reward: 334.0


100%|██████████| 2000/2000 [00:33<00:00, 60.33it/s]


Epoch 423 Mean Reward: 445.0


100%|██████████| 2000/2000 [00:33<00:00, 60.44it/s]


Epoch 424 Mean Reward: 405.0


100%|██████████| 2000/2000 [00:33<00:00, 59.82it/s]


Epoch 425 Mean Reward: 558.0


100%|██████████| 2000/2000 [00:32<00:00, 61.52it/s]


Epoch 426 Mean Reward: 212.0


100%|██████████| 2000/2000 [00:33<00:00, 60.16it/s]


Epoch 427 Mean Reward: 501.0


100%|██████████| 2000/2000 [00:32<00:00, 61.96it/s]


Epoch 428 Mean Reward: 168.0


100%|██████████| 2000/2000 [00:32<00:00, 62.12it/s]


Epoch 429 Mean Reward: 160.0


100%|██████████| 2000/2000 [00:32<00:00, 60.76it/s]


Epoch 430 Mean Reward: 322.0
Epoch 430 Model saved to ./checkpoints/take_cover.ckpt
Epoch 430 test:
Test Episode 1 Reward: 338.0
Test Episode 2 Reward: 126.0
Test Episode 3 Reward: 338.0
Test Episode 4 Reward: 141.0
Test Episode 5 Reward: 182.0
Test Episode 6 Reward: 98.0
Test Episode 7 Reward: 212.0
Test Episode 8 Reward: 126.0
Test Episode 9 Reward: 338.0
Test Episode 10 Reward: 338.0
Test Episode 11 Reward: 169.0
Test Episode 12 Reward: 127.0
Test Episode 13 Reward: 224.0
Test Episode 14 Reward: 139.0
Test Episode 15 Reward: 137.0
Test Episode 16 Reward: 338.0
Test Episode 17 Reward: 338.0
Test Episode 18 Reward: 241.0
Test Episode 19 Reward: 338.0
Test Episode 20 Reward: 338.0
Epoch 430 Average Test Reward: 231.3


100%|██████████| 2000/2000 [00:34<00:00, 58.32it/s]


Epoch 431 Mean Reward: 331.0


100%|██████████| 2000/2000 [00:34<00:00, 58.45it/s]


Epoch 432 Mean Reward: 314.0


100%|██████████| 2000/2000 [00:33<00:00, 59.00it/s]


Epoch 433 Mean Reward: 217.0


100%|██████████| 2000/2000 [00:35<00:00, 56.00it/s]


Epoch 434 Mean Reward: 789.0


100%|██████████| 2000/2000 [00:34<00:00, 58.39it/s]


Epoch 435 Mean Reward: 435.0


100%|██████████| 2000/2000 [00:32<00:00, 62.19it/s]


Epoch 436 Mean Reward: 126.0


100%|██████████| 2000/2000 [00:34<00:00, 58.01it/s]


Epoch 437 Mean Reward: 914.0


100%|██████████| 2000/2000 [00:32<00:00, 62.18it/s]


Epoch 438 Mean Reward: 146.0


100%|██████████| 2000/2000 [00:32<00:00, 61.35it/s]


Epoch 439 Mean Reward: 279.0


100%|██████████| 2000/2000 [00:34<00:00, 58.16it/s]


Epoch 440 Mean Reward: 935.0
Epoch 440 Model saved to ./checkpoints/take_cover.ckpt
Epoch 440 test:
Test Episode 1 Reward: 512.0
Test Episode 2 Reward: 512.0
Test Episode 3 Reward: 512.0
Test Episode 4 Reward: 212.0
Test Episode 5 Reward: 512.0
Test Episode 6 Reward: 273.0
Test Episode 7 Reward: 512.0
Test Episode 8 Reward: 130.0
Test Episode 9 Reward: 512.0
Test Episode 10 Reward: 512.0
Test Episode 11 Reward: 363.0
Test Episode 12 Reward: 512.0
Test Episode 13 Reward: 512.0
Test Episode 14 Reward: 468.0
Test Episode 15 Reward: 130.0
Test Episode 16 Reward: 512.0
Test Episode 17 Reward: 248.0
Test Episode 18 Reward: 512.0
Test Episode 19 Reward: 512.0
Test Episode 20 Reward: 512.0
Epoch 440 Average Test Reward: 424.0


100%|██████████| 2000/2000 [00:34<00:00, 57.53it/s]


Epoch 441 Mean Reward: 537.0


100%|██████████| 2000/2000 [00:33<00:00, 58.95it/s]


Epoch 442 Mean Reward: 308.0


100%|██████████| 2000/2000 [00:35<00:00, 57.12it/s]


Epoch 443 Mean Reward: 490.0


100%|██████████| 2000/2000 [00:34<00:00, 58.37it/s]


Epoch 444 Mean Reward: 348.0


100%|██████████| 2000/2000 [00:34<00:00, 57.74it/s]


Epoch 445 Mean Reward: 890.0


100%|██████████| 2000/2000 [00:33<00:00, 60.23it/s]


Epoch 446 Mean Reward: 396.0


100%|██████████| 2000/2000 [00:33<00:00, 59.95it/s]


Epoch 447 Mean Reward: 428.0


100%|██████████| 2000/2000 [00:33<00:00, 59.69it/s]


Epoch 448 Mean Reward: 563.0


100%|██████████| 2000/2000 [00:34<00:00, 57.76it/s]


Epoch 449 Mean Reward: 870.0


100%|██████████| 2000/2000 [00:32<00:00, 60.95it/s]


Epoch 450 Mean Reward: 289.0
Epoch 450 Model saved to ./checkpoints/take_cover.ckpt
Epoch 450 test:
Test Episode 1 Reward: 409.0
Test Episode 2 Reward: 193.0
Test Episode 3 Reward: 111.0
Test Episode 4 Reward: 409.0
Test Episode 5 Reward: 155.0
Test Episode 6 Reward: 409.0
Test Episode 7 Reward: 111.0
Test Episode 8 Reward: 409.0
Test Episode 9 Reward: 409.0
Test Episode 10 Reward: 165.0
Test Episode 11 Reward: 380.0
Test Episode 12 Reward: 305.0
Test Episode 13 Reward: 409.0
Test Episode 14 Reward: 98.0
Test Episode 15 Reward: 409.0
Test Episode 16 Reward: 160.0
Test Episode 17 Reward: 210.0
Test Episode 18 Reward: 142.0
Test Episode 19 Reward: 409.0
Test Episode 20 Reward: 132.0
Epoch 450 Average Test Reward: 271.7


100%|██████████| 2000/2000 [00:33<00:00, 60.03it/s]


Epoch 451 Mean Reward: 532.0


100%|██████████| 2000/2000 [00:33<00:00, 60.58it/s]


Epoch 452 Mean Reward: 341.0


100%|██████████| 2000/2000 [00:32<00:00, 61.28it/s]


Epoch 453 Mean Reward: 333.0


100%|██████████| 2000/2000 [00:32<00:00, 61.46it/s]


Epoch 454 Mean Reward: 272.0


100%|██████████| 2000/2000 [00:32<00:00, 61.37it/s]


Epoch 455 Mean Reward: 287.0


100%|██████████| 2000/2000 [00:32<00:00, 61.13it/s]


Epoch 456 Mean Reward: 334.0


100%|██████████| 2000/2000 [00:32<00:00, 60.97it/s]


Epoch 457 Mean Reward: 319.0


100%|██████████| 2000/2000 [00:33<00:00, 59.66it/s]


Epoch 458 Mean Reward: 510.0


100%|██████████| 2000/2000 [00:32<00:00, 60.87it/s]


Epoch 459 Mean Reward: 267.0


100%|██████████| 2000/2000 [00:32<00:00, 60.75it/s]


Epoch 460 Mean Reward: 283.0
Epoch 460 Model saved to ./checkpoints/take_cover.ckpt
Epoch 460 test:
Test Episode 1 Reward: 246.0
Test Episode 2 Reward: 197.0
Test Episode 3 Reward: 315.0
Test Episode 4 Reward: 246.0
Test Episode 5 Reward: 246.0
Test Episode 6 Reward: 377.0
Test Episode 7 Reward: 163.0
Test Episode 8 Reward: 246.0
Test Episode 9 Reward: 246.0
Test Episode 10 Reward: 305.0
Test Episode 11 Reward: 202.0
Test Episode 12 Reward: 246.0
Test Episode 13 Reward: 246.0
Test Episode 14 Reward: 242.0
Test Episode 15 Reward: 274.0
Test Episode 16 Reward: 246.0
Test Episode 17 Reward: 224.0
Test Episode 18 Reward: 246.0
Test Episode 19 Reward: 340.0
Test Episode 20 Reward: 359.0
Epoch 460 Average Test Reward: 260.6


100%|██████████| 2000/2000 [00:33<00:00, 60.12it/s]


Epoch 461 Mean Reward: 375.0


100%|██████████| 2000/2000 [00:33<00:00, 59.47it/s]


Epoch 462 Mean Reward: 465.0


100%|██████████| 2000/2000 [00:35<00:00, 56.76it/s]


Epoch 463 Mean Reward: 901.0


100%|██████████| 2000/2000 [00:32<00:00, 60.79it/s]


Epoch 464 Mean Reward: 279.0


100%|██████████| 2000/2000 [00:33<00:00, 60.18it/s]


Epoch 465 Mean Reward: 543.0


100%|██████████| 2000/2000 [00:33<00:00, 60.48it/s]


Epoch 466 Mean Reward: 305.0


100%|██████████| 2000/2000 [00:33<00:00, 59.02it/s]


Epoch 467 Mean Reward: 408.0


100%|██████████| 2000/2000 [00:32<00:00, 61.79it/s]


Epoch 468 Mean Reward: 211.0


100%|██████████| 2000/2000 [00:32<00:00, 61.29it/s]


Epoch 469 Mean Reward: 296.0


100%|██████████| 2000/2000 [00:32<00:00, 61.93it/s]


Epoch 470 Mean Reward: 219.0
Epoch 470 Model saved to ./checkpoints/take_cover.ckpt
Epoch 470 test:
Test Episode 1 Reward: 213.0
Test Episode 2 Reward: 217.0
Test Episode 3 Reward: 213.0
Test Episode 4 Reward: 175.0
Test Episode 5 Reward: 213.0
Test Episode 6 Reward: 451.0
Test Episode 7 Reward: 213.0
Test Episode 8 Reward: 213.0
Test Episode 9 Reward: 115.0
Test Episode 10 Reward: 224.0
Test Episode 11 Reward: 213.0
Test Episode 12 Reward: 173.0
Test Episode 13 Reward: 198.0
Test Episode 14 Reward: 129.0
Test Episode 15 Reward: 223.0
Test Episode 16 Reward: 213.0
Test Episode 17 Reward: 213.0
Test Episode 18 Reward: 213.0
Test Episode 19 Reward: 213.0
Test Episode 20 Reward: 213.0
Epoch 470 Average Test Reward: 212.4


100%|██████████| 2000/2000 [00:32<00:00, 61.35it/s]


Epoch 471 Mean Reward: 225.0


100%|██████████| 2000/2000 [00:32<00:00, 61.10it/s]


Epoch 472 Mean Reward: 286.0


100%|██████████| 2000/2000 [00:33<00:00, 60.55it/s]


Epoch 473 Mean Reward: 386.0


100%|██████████| 2000/2000 [00:32<00:00, 61.25it/s]


Epoch 474 Mean Reward: 249.0


100%|██████████| 2000/2000 [00:32<00:00, 61.06it/s]


Epoch 475 Mean Reward: 321.0


100%|██████████| 2000/2000 [00:32<00:00, 60.64it/s]


Epoch 476 Mean Reward: 371.0


100%|██████████| 2000/2000 [00:32<00:00, 61.64it/s]


Epoch 477 Mean Reward: 212.0


100%|██████████| 2000/2000 [00:32<00:00, 62.18it/s]


Epoch 478 Mean Reward: 95.0


100%|██████████| 2000/2000 [00:32<00:00, 61.12it/s]


Epoch 479 Mean Reward: 288.0


100%|██████████| 2000/2000 [00:32<00:00, 61.04it/s]


Epoch 480 Mean Reward: 306.0
Epoch 480 Model saved to ./checkpoints/take_cover.ckpt
Epoch 480 test:
Test Episode 1 Reward: 280.0
Test Episode 2 Reward: 280.0
Test Episode 3 Reward: 429.0
Test Episode 4 Reward: 197.0
Test Episode 5 Reward: 280.0
Test Episode 6 Reward: 231.0
Test Episode 7 Reward: 280.0
Test Episode 8 Reward: 139.0
Test Episode 9 Reward: 373.0
Test Episode 10 Reward: 286.0
Test Episode 11 Reward: 280.0
Test Episode 12 Reward: 222.0
Test Episode 13 Reward: 280.0
Test Episode 14 Reward: 107.0
Test Episode 15 Reward: 280.0
Test Episode 16 Reward: 280.0
Test Episode 17 Reward: 144.0
Test Episode 18 Reward: 213.0
Test Episode 19 Reward: 225.0
Test Episode 20 Reward: 280.0
Epoch 480 Average Test Reward: 254.3


100%|██████████| 2000/2000 [00:34<00:00, 57.89it/s]


Epoch 481 Mean Reward: 962.0


100%|██████████| 2000/2000 [00:33<00:00, 59.84it/s]


Epoch 482 Mean Reward: 544.0


100%|██████████| 2000/2000 [00:35<00:00, 56.15it/s]


Epoch 483 Mean Reward: 870.0


100%|██████████| 2000/2000 [00:32<00:00, 61.28it/s]


Epoch 484 Mean Reward: 288.0


100%|██████████| 2000/2000 [00:32<00:00, 60.95it/s]


Epoch 485 Mean Reward: 377.0


100%|██████████| 2000/2000 [00:32<00:00, 62.44it/s]


Epoch 486 Mean Reward: 322.0


100%|██████████| 2000/2000 [00:31<00:00, 63.34it/s]


Epoch 487 Mean Reward: 292.0


100%|██████████| 2000/2000 [00:31<00:00, 63.42it/s]


Epoch 488 Mean Reward: 331.0


100%|██████████| 2000/2000 [00:31<00:00, 63.79it/s]


Epoch 489 Mean Reward: 259.0


100%|██████████| 2000/2000 [00:31<00:00, 63.47it/s]


Epoch 490 Mean Reward: 283.0
Epoch 490 Model saved to ./checkpoints/take_cover.ckpt
Epoch 490 test:
Test Episode 1 Reward: 261.0
Test Episode 2 Reward: 297.0
Test Episode 3 Reward: 95.0
Test Episode 4 Reward: 225.0
Test Episode 5 Reward: 275.0
Test Episode 6 Reward: 188.0
Test Episode 7 Reward: 279.0
Test Episode 8 Reward: 171.0
Test Episode 9 Reward: 139.0
Test Episode 10 Reward: 275.0
Test Episode 11 Reward: 260.0
Test Episode 12 Reward: 275.0
Test Episode 13 Reward: 275.0
Test Episode 14 Reward: 149.0
Test Episode 15 Reward: 275.0
Test Episode 16 Reward: 275.0
Test Episode 17 Reward: 146.0
Test Episode 18 Reward: 275.0
Test Episode 19 Reward: 275.0
Test Episode 20 Reward: 275.0
Epoch 490 Average Test Reward: 234.25


100%|██████████| 2000/2000 [00:29<00:00, 67.27it/s]


Epoch 491 Mean Reward: 250.0


100%|██████████| 2000/2000 [00:30<00:00, 66.55it/s]


Epoch 492 Mean Reward: 292.0


100%|██████████| 2000/2000 [00:30<00:00, 66.67it/s]


Epoch 493 Mean Reward: 297.0


100%|██████████| 2000/2000 [00:30<00:00, 65.79it/s]


Epoch 494 Mean Reward: 345.0


100%|██████████| 2000/2000 [00:30<00:00, 65.43it/s]


Epoch 495 Mean Reward: 416.0


100%|██████████| 2000/2000 [00:29<00:00, 66.74it/s]


Epoch 496 Mean Reward: 196.0


100%|██████████| 2000/2000 [00:30<00:00, 65.86it/s]


Epoch 497 Mean Reward: 414.0


100%|██████████| 2000/2000 [00:30<00:00, 66.62it/s]


Epoch 498 Mean Reward: 287.0


100%|██████████| 2000/2000 [00:31<00:00, 64.30it/s]


Epoch 499 Mean Reward: 662.0


100%|██████████| 2000/2000 [00:30<00:00, 65.91it/s]


Epoch 500 Mean Reward: 269.0
Epoch 500 Model saved to ./checkpoints/take_cover.ckpt
Epoch 500 test:
Test Episode 1 Reward: 317.0
Test Episode 2 Reward: 334.0
Test Episode 3 Reward: 107.0
Test Episode 4 Reward: 136.0
Test Episode 5 Reward: 102.0
Test Episode 6 Reward: 101.0
Test Episode 7 Reward: 334.0
Test Episode 8 Reward: 334.0
Test Episode 9 Reward: 334.0
Test Episode 10 Reward: 334.0
Test Episode 11 Reward: 334.0
Test Episode 12 Reward: 167.0
Test Episode 13 Reward: 334.0
Test Episode 14 Reward: 334.0
Test Episode 15 Reward: 274.0
Test Episode 16 Reward: 334.0
Test Episode 17 Reward: 106.0
Test Episode 18 Reward: 352.0
Test Episode 19 Reward: 174.0
Test Episode 20 Reward: 358.0
Epoch 500 Average Test Reward: 260.0


100%|██████████| 2000/2000 [00:27<00:00, 71.78it/s]


Epoch 501 Mean Reward: 153.0


100%|██████████| 2000/2000 [00:26<00:00, 74.24it/s]


Epoch 502 Mean Reward: 127.0


100%|██████████| 2000/2000 [00:27<00:00, 71.66it/s]


Epoch 503 Mean Reward: 542.0


100%|██████████| 2000/2000 [00:27<00:00, 72.09it/s]


Epoch 504 Mean Reward: 538.0


100%|██████████| 2000/2000 [00:27<00:00, 72.14it/s]


Epoch 505 Mean Reward: 502.0


100%|██████████| 2000/2000 [00:27<00:00, 72.32it/s]


Epoch 506 Mean Reward: 474.0


100%|██████████| 2000/2000 [00:27<00:00, 73.95it/s]


Epoch 507 Mean Reward: 208.0


100%|██████████| 2000/2000 [00:27<00:00, 72.17it/s]


Epoch 508 Mean Reward: 455.0


100%|██████████| 2000/2000 [00:27<00:00, 73.84it/s]


Epoch 509 Mean Reward: 247.0


100%|██████████| 2000/2000 [00:27<00:00, 71.62it/s]


Epoch 510 Mean Reward: 593.0
Epoch 510 Model saved to ./checkpoints/take_cover.ckpt
Epoch 510 test:
Test Episode 1 Reward: 253.0
Test Episode 2 Reward: 253.0
Test Episode 3 Reward: 174.0
Test Episode 4 Reward: 253.0
Test Episode 5 Reward: 180.0
Test Episode 6 Reward: 319.0
Test Episode 7 Reward: 185.0
Test Episode 8 Reward: 317.0
Test Episode 9 Reward: 132.0
Test Episode 10 Reward: 460.0
Test Episode 11 Reward: 171.0
Test Episode 12 Reward: 253.0
Test Episode 13 Reward: 196.0
Test Episode 14 Reward: 253.0
Test Episode 15 Reward: 114.0
Test Episode 16 Reward: 253.0
Test Episode 17 Reward: 333.0
Test Episode 18 Reward: 253.0
Test Episode 19 Reward: 467.0
Test Episode 20 Reward: 253.0
Epoch 510 Average Test Reward: 253.6


100%|██████████| 2000/2000 [00:35<00:00, 56.35it/s]


Epoch 511 Mean Reward: 1043.0


100%|██████████| 2000/2000 [00:33<00:00, 59.85it/s]


Epoch 512 Mean Reward: 168.0


100%|██████████| 2000/2000 [00:33<00:00, 60.09it/s]


Epoch 513 Mean Reward: 464.0


100%|██████████| 2000/2000 [00:32<00:00, 60.67it/s]


Epoch 514 Mean Reward: 353.0


100%|██████████| 2000/2000 [00:32<00:00, 61.54it/s]


Epoch 515 Mean Reward: 269.0


100%|██████████| 2000/2000 [00:33<00:00, 59.40it/s]


Epoch 516 Mean Reward: 610.0


100%|██████████| 2000/2000 [00:33<00:00, 60.46it/s]


Epoch 517 Mean Reward: 362.0


100%|██████████| 2000/2000 [00:33<00:00, 60.61it/s]


Epoch 518 Mean Reward: 280.0


100%|██████████| 2000/2000 [00:32<00:00, 60.91it/s]


Epoch 519 Mean Reward: 284.0


100%|██████████| 2000/2000 [00:32<00:00, 61.06it/s]


Epoch 520 Mean Reward: 284.0
Epoch 520 Model saved to ./checkpoints/take_cover.ckpt
Epoch 520 test:
Test Episode 1 Reward: 244.0
Test Episode 2 Reward: 244.0
Test Episode 3 Reward: 119.0
Test Episode 4 Reward: 300.0
Test Episode 5 Reward: 140.0
Test Episode 6 Reward: 101.0
Test Episode 7 Reward: 289.0
Test Episode 8 Reward: 244.0
Test Episode 9 Reward: 244.0
Test Episode 10 Reward: 444.0
Test Episode 11 Reward: 244.0
Test Episode 12 Reward: 123.0
Test Episode 13 Reward: 244.0
Test Episode 14 Reward: 244.0
Test Episode 15 Reward: 244.0
Test Episode 16 Reward: 244.0
Test Episode 17 Reward: 365.0
Test Episode 18 Reward: 272.0
Test Episode 19 Reward: 244.0
Test Episode 20 Reward: 194.0
Epoch 520 Average Test Reward: 239.35


100%|██████████| 2000/2000 [00:35<00:00, 55.90it/s]


Epoch 521 Mean Reward: 857.0


100%|██████████| 2000/2000 [00:34<00:00, 58.12it/s]


Epoch 522 Mean Reward: 319.0


100%|██████████| 2000/2000 [00:34<00:00, 58.78it/s]


Epoch 523 Mean Reward: 257.0


100%|██████████| 2000/2000 [00:35<00:00, 56.83it/s]


Epoch 524 Mean Reward: 662.0


100%|██████████| 2000/2000 [00:32<00:00, 60.90it/s]


Epoch 525 Mean Reward: 283.0


100%|██████████| 2000/2000 [00:32<00:00, 61.01it/s]


Epoch 526 Mean Reward: 288.0


100%|██████████| 2000/2000 [00:32<00:00, 60.95it/s]


Epoch 527 Mean Reward: 317.0


100%|██████████| 2000/2000 [00:34<00:00, 58.48it/s]


Epoch 528 Mean Reward: 798.0


100%|██████████| 2000/2000 [00:32<00:00, 62.36it/s]


Epoch 529 Mean Reward: 117.0


100%|██████████| 2000/2000 [00:33<00:00, 60.22it/s]


Epoch 530 Mean Reward: 491.0
Epoch 530 Model saved to ./checkpoints/take_cover.ckpt
Epoch 530 test:
Test Episode 1 Reward: 288.0
Test Episode 2 Reward: 288.0
Test Episode 3 Reward: 288.0
Test Episode 4 Reward: 288.0
Test Episode 5 Reward: 241.0
Test Episode 6 Reward: 288.0
Test Episode 7 Reward: 317.0
Test Episode 8 Reward: 116.0
Test Episode 9 Reward: 288.0
Test Episode 10 Reward: 288.0
Test Episode 11 Reward: 272.0
Test Episode 12 Reward: 208.0
Test Episode 13 Reward: 288.0
Test Episode 14 Reward: 167.0
Test Episode 15 Reward: 288.0
Test Episode 16 Reward: 244.0
Test Episode 17 Reward: 117.0
Test Episode 18 Reward: 288.0
Test Episode 19 Reward: 227.0
Test Episode 20 Reward: 288.0
Epoch 530 Average Test Reward: 253.85


100%|██████████| 2000/2000 [00:32<00:00, 60.63it/s]


Epoch 531 Mean Reward: 400.0


100%|██████████| 2000/2000 [00:33<00:00, 59.80it/s]


Epoch 532 Mean Reward: 494.0


100%|██████████| 2000/2000 [00:33<00:00, 59.72it/s]


Epoch 533 Mean Reward: 488.0


100%|██████████| 2000/2000 [00:32<00:00, 60.88it/s]


Epoch 534 Mean Reward: 288.0


100%|██████████| 2000/2000 [00:33<00:00, 59.43it/s]


Epoch 535 Mean Reward: 616.0


100%|██████████| 2000/2000 [00:33<00:00, 59.88it/s]


Epoch 536 Mean Reward: 513.0


100%|██████████| 2000/2000 [00:33<00:00, 60.31it/s]


Epoch 537 Mean Reward: 471.0


100%|██████████| 2000/2000 [00:32<00:00, 62.04it/s]


Epoch 538 Mean Reward: 127.0


100%|██████████| 2000/2000 [00:33<00:00, 59.72it/s]


Epoch 539 Mean Reward: 574.0


100%|██████████| 2000/2000 [00:33<00:00, 59.82it/s]


Epoch 540 Mean Reward: 534.0
Epoch 540 Model saved to ./checkpoints/take_cover.ckpt
Epoch 540 test:
Test Episode 1 Reward: 138.0
Test Episode 2 Reward: 284.0
Test Episode 3 Reward: 315.0
Test Episode 4 Reward: 171.0
Test Episode 5 Reward: 284.0
Test Episode 6 Reward: 284.0
Test Episode 7 Reward: 275.0
Test Episode 8 Reward: 284.0
Test Episode 9 Reward: 203.0
Test Episode 10 Reward: 284.0
Test Episode 11 Reward: 284.0
Test Episode 12 Reward: 284.0
Test Episode 13 Reward: 146.0
Test Episode 14 Reward: 284.0
Test Episode 15 Reward: 284.0
Test Episode 16 Reward: 127.0
Test Episode 17 Reward: 137.0
Test Episode 18 Reward: 116.0
Test Episode 19 Reward: 284.0
Test Episode 20 Reward: 159.0
Epoch 540 Average Test Reward: 231.35


100%|██████████| 2000/2000 [00:33<00:00, 60.61it/s]


Epoch 541 Mean Reward: 283.0


100%|██████████| 2000/2000 [00:32<00:00, 61.17it/s]


Epoch 542 Mean Reward: 242.0


100%|██████████| 2000/2000 [00:32<00:00, 60.77it/s]


Epoch 543 Mean Reward: 341.0


100%|██████████| 2000/2000 [00:32<00:00, 61.40it/s]


Epoch 544 Mean Reward: 270.0


100%|██████████| 2000/2000 [00:33<00:00, 59.96it/s]


Epoch 545 Mean Reward: 525.0


100%|██████████| 2000/2000 [00:32<00:00, 61.94it/s]


Epoch 546 Mean Reward: 185.0


100%|██████████| 2000/2000 [00:32<00:00, 61.52it/s]


Epoch 547 Mean Reward: 272.0


100%|██████████| 2000/2000 [00:32<00:00, 61.29it/s]


Epoch 548 Mean Reward: 270.0


100%|██████████| 2000/2000 [00:32<00:00, 61.04it/s]


Epoch 549 Mean Reward: 289.0


100%|██████████| 2000/2000 [00:32<00:00, 62.16it/s]


Epoch 550 Mean Reward: 116.0
Epoch 550 Model saved to ./checkpoints/take_cover.ckpt
Epoch 550 test:
Test Episode 1 Reward: 322.0
Test Episode 2 Reward: 158.0
Test Episode 3 Reward: 322.0
Test Episode 4 Reward: 190.0
Test Episode 5 Reward: 322.0
Test Episode 6 Reward: 218.0
Test Episode 7 Reward: 201.0
Test Episode 8 Reward: 322.0
Test Episode 9 Reward: 223.0
Test Episode 10 Reward: 322.0
Test Episode 11 Reward: 143.0
Test Episode 12 Reward: 252.0
Test Episode 13 Reward: 115.0
Test Episode 14 Reward: 165.0
Test Episode 15 Reward: 428.0
Test Episode 16 Reward: 211.0
Test Episode 17 Reward: 322.0
Test Episode 18 Reward: 322.0
Test Episode 19 Reward: 213.0
Test Episode 20 Reward: 325.0
Epoch 550 Average Test Reward: 254.8


100%|██████████| 2000/2000 [00:30<00:00, 65.98it/s]


Epoch 551 Mean Reward: 155.0


100%|██████████| 2000/2000 [00:30<00:00, 66.33it/s]


Epoch 552 Mean Reward: 271.0


100%|██████████| 2000/2000 [00:30<00:00, 66.60it/s]


Epoch 553 Mean Reward: 775.0


100%|██████████| 2000/2000 [00:27<00:00, 71.91it/s]


Epoch 554 Mean Reward: 361.0


100%|██████████| 2000/2000 [00:27<00:00, 72.75it/s]


Epoch 555 Mean Reward: 223.0


100%|██████████| 2000/2000 [00:28<00:00, 70.44it/s]


Epoch 556 Mean Reward: 562.0


100%|██████████| 2000/2000 [00:29<00:00, 68.73it/s]


Epoch 557 Mean Reward: 544.0


100%|██████████| 2000/2000 [00:27<00:00, 72.12it/s]


Epoch 558 Mean Reward: 325.0


100%|██████████| 2000/2000 [00:27<00:00, 71.99it/s]


Epoch 559 Mean Reward: 314.0


100%|██████████| 2000/2000 [00:27<00:00, 71.94it/s]


Epoch 560 Mean Reward: 346.0
Epoch 560 Model saved to ./checkpoints/take_cover.ckpt
Epoch 560 test:
Test Episode 1 Reward: 244.0
Test Episode 2 Reward: 254.0
Test Episode 3 Reward: 138.0
Test Episode 4 Reward: 143.0
Test Episode 5 Reward: 302.0
Test Episode 6 Reward: 244.0
Test Episode 7 Reward: 244.0
Test Episode 8 Reward: 244.0
Test Episode 9 Reward: 183.0
Test Episode 10 Reward: 244.0
Test Episode 11 Reward: 244.0
Test Episode 12 Reward: 244.0
Test Episode 13 Reward: 244.0
Test Episode 14 Reward: 244.0
Test Episode 15 Reward: 146.0
Test Episode 16 Reward: 244.0
Test Episode 17 Reward: 167.0
Test Episode 18 Reward: 244.0
Test Episode 19 Reward: 244.0
Test Episode 20 Reward: 160.0
Epoch 560 Average Test Reward: 221.05


100%|██████████| 2000/2000 [00:26<00:00, 75.06it/s]


Epoch 561 Mean Reward: 296.0


100%|██████████| 2000/2000 [00:26<00:00, 74.34it/s]


Epoch 562 Mean Reward: 374.0


100%|██████████| 2000/2000 [00:26<00:00, 74.83it/s]


Epoch 563 Mean Reward: 301.0


100%|██████████| 2000/2000 [00:26<00:00, 74.76it/s]


Epoch 564 Mean Reward: 308.0


100%|██████████| 2000/2000 [00:26<00:00, 75.35it/s]


Epoch 565 Mean Reward: 240.0


100%|██████████| 2000/2000 [00:27<00:00, 73.31it/s]


Epoch 566 Mean Reward: 550.0


100%|██████████| 2000/2000 [00:27<00:00, 73.33it/s]


Epoch 567 Mean Reward: 587.0


100%|██████████| 2000/2000 [00:27<00:00, 73.82it/s]


Epoch 568 Mean Reward: 426.0


100%|██████████| 2000/2000 [00:27<00:00, 74.00it/s]


Epoch 569 Mean Reward: 420.0


100%|██████████| 2000/2000 [00:27<00:00, 73.70it/s]


Epoch 570 Mean Reward: 479.0
Epoch 570 Model saved to ./checkpoints/take_cover.ckpt
Epoch 570 test:
Test Episode 1 Reward: 215.0
Test Episode 2 Reward: 215.0
Test Episode 3 Reward: 215.0
Test Episode 4 Reward: 347.0
Test Episode 5 Reward: 122.0
Test Episode 6 Reward: 132.0
Test Episode 7 Reward: 215.0
Test Episode 8 Reward: 124.0
Test Episode 9 Reward: 135.0
Test Episode 10 Reward: 98.0
Test Episode 11 Reward: 250.0
Test Episode 12 Reward: 215.0
Test Episode 13 Reward: 215.0
Test Episode 14 Reward: 123.0
Test Episode 15 Reward: 93.0
Test Episode 16 Reward: 215.0
Test Episode 17 Reward: 211.0
Test Episode 18 Reward: 113.0
Test Episode 19 Reward: 281.0
Test Episode 20 Reward: 457.0
Epoch 570 Average Test Reward: 199.55


100%|██████████| 2000/2000 [00:26<00:00, 75.11it/s]


Epoch 571 Mean Reward: 248.0


100%|██████████| 2000/2000 [00:26<00:00, 74.41it/s]


Epoch 572 Mean Reward: 345.0


100%|██████████| 2000/2000 [00:27<00:00, 73.20it/s]


Epoch 573 Mean Reward: 534.0


100%|██████████| 2000/2000 [00:26<00:00, 75.53it/s]


Epoch 574 Mean Reward: 154.0


100%|██████████| 2000/2000 [00:26<00:00, 75.22it/s]


Epoch 575 Mean Reward: 186.0


100%|██████████| 2000/2000 [00:26<00:00, 74.97it/s]


Epoch 576 Mean Reward: 230.0


100%|██████████| 2000/2000 [00:28<00:00, 71.28it/s]


Epoch 577 Mean Reward: 879.0


100%|██████████| 2000/2000 [00:26<00:00, 74.43it/s]


Epoch 578 Mean Reward: 331.0


100%|██████████| 2000/2000 [00:26<00:00, 75.53it/s]


Epoch 579 Mean Reward: 191.0


100%|██████████| 2000/2000 [00:26<00:00, 75.81it/s]


Epoch 580 Mean Reward: 153.0
Epoch 580 Model saved to ./checkpoints/take_cover.ckpt
Epoch 580 test:
Test Episode 1 Reward: 328.0
Test Episode 2 Reward: 301.0
Test Episode 3 Reward: 208.0
Test Episode 4 Reward: 197.0
Test Episode 5 Reward: 328.0
Test Episode 6 Reward: 186.0
Test Episode 7 Reward: 328.0
Test Episode 8 Reward: 328.0
Test Episode 9 Reward: 202.0
Test Episode 10 Reward: 328.0
Test Episode 11 Reward: 164.0
Test Episode 12 Reward: 328.0
Test Episode 13 Reward: 267.0
Test Episode 14 Reward: 328.0
Test Episode 15 Reward: 328.0
Test Episode 16 Reward: 245.0
Test Episode 17 Reward: 144.0
Test Episode 18 Reward: 230.0
Test Episode 19 Reward: 150.0
Test Episode 20 Reward: 328.0
Epoch 580 Average Test Reward: 262.3


100%|██████████| 2000/2000 [00:34<00:00, 58.65it/s]


Epoch 581 Mean Reward: 243.0


100%|██████████| 2000/2000 [00:34<00:00, 57.96it/s]


Epoch 582 Mean Reward: 380.0


100%|██████████| 2000/2000 [00:33<00:00, 59.28it/s]


Epoch 583 Mean Reward: 181.0


100%|██████████| 2000/2000 [00:34<00:00, 58.22it/s]


Epoch 584 Mean Reward: 299.0


100%|██████████| 2000/2000 [00:34<00:00, 58.51it/s]


Epoch 585 Mean Reward: 305.0


100%|██████████| 2000/2000 [00:33<00:00, 59.52it/s]


Epoch 586 Mean Reward: 94.0


100%|██████████| 2000/2000 [00:33<00:00, 59.75it/s]


Epoch 587 Mean Reward: 437.0


100%|██████████| 2000/2000 [00:33<00:00, 59.47it/s]


Epoch 588 Mean Reward: 517.0


100%|██████████| 2000/2000 [00:33<00:00, 60.49it/s]


Epoch 589 Mean Reward: 379.0


100%|██████████| 2000/2000 [00:33<00:00, 58.95it/s]


Epoch 590 Mean Reward: 704.0
Epoch 590 Model saved to ./checkpoints/take_cover.ckpt
Epoch 590 test:
Test Episode 1 Reward: 183.0
Test Episode 2 Reward: 380.0
Test Episode 3 Reward: 164.0
Test Episode 4 Reward: 104.0
Test Episode 5 Reward: 475.0
Test Episode 6 Reward: 121.0
Test Episode 7 Reward: 380.0
Test Episode 8 Reward: 351.0
Test Episode 9 Reward: 380.0
Test Episode 10 Reward: 380.0
Test Episode 11 Reward: 380.0
Test Episode 12 Reward: 380.0
Test Episode 13 Reward: 380.0
Test Episode 14 Reward: 380.0
Test Episode 15 Reward: 380.0
Test Episode 16 Reward: 380.0
Test Episode 17 Reward: 244.0
Test Episode 18 Reward: 120.0
Test Episode 19 Reward: 109.0
Test Episode 20 Reward: 380.0
Epoch 590 Average Test Reward: 302.55


100%|██████████| 2000/2000 [00:32<00:00, 61.28it/s]


Epoch 591 Mean Reward: 161.0


100%|██████████| 2000/2000 [00:32<00:00, 60.90it/s]


Epoch 592 Mean Reward: 286.0


100%|██████████| 2000/2000 [00:32<00:00, 61.01it/s]


Epoch 593 Mean Reward: 291.0


100%|██████████| 2000/2000 [00:33<00:00, 60.22it/s]


Epoch 594 Mean Reward: 438.0


100%|██████████| 2000/2000 [00:33<00:00, 60.45it/s]


Epoch 595 Mean Reward: 402.0


100%|██████████| 2000/2000 [00:33<00:00, 59.68it/s]


Epoch 596 Mean Reward: 548.0


100%|██████████| 2000/2000 [00:32<00:00, 60.83it/s]


Epoch 597 Mean Reward: 344.0


100%|██████████| 2000/2000 [00:32<00:00, 60.89it/s]


Epoch 598 Mean Reward: 287.0


100%|██████████| 2000/2000 [00:34<00:00, 57.85it/s]


Epoch 599 Mean Reward: 920.0


100%|██████████| 2000/2000 [00:34<00:00, 58.55it/s]


Epoch 600 Mean Reward: 709.0
Epoch 600 Model saved to ./checkpoints/take_cover.ckpt
Epoch 600 test:
Test Episode 1 Reward: 212.0
Test Episode 2 Reward: 164.0
Test Episode 3 Reward: 162.0
Test Episode 4 Reward: 212.0
Test Episode 5 Reward: 246.0
Test Episode 6 Reward: 129.0
Test Episode 7 Reward: 313.0
Test Episode 8 Reward: 212.0
Test Episode 9 Reward: 212.0
Test Episode 10 Reward: 101.0
Test Episode 11 Reward: 212.0
Test Episode 12 Reward: 212.0
Test Episode 13 Reward: 276.0
Test Episode 14 Reward: 212.0
Test Episode 15 Reward: 213.0
Test Episode 16 Reward: 212.0
Test Episode 17 Reward: 324.0
Test Episode 18 Reward: 145.0
Test Episode 19 Reward: 167.0
Test Episode 20 Reward: 158.0
Epoch 600 Average Test Reward: 204.7


100%|██████████| 2000/2000 [00:34<00:00, 58.02it/s]


Epoch 601 Mean Reward: 658.0


100%|██████████| 2000/2000 [00:34<00:00, 58.32it/s]


Epoch 602 Mean Reward: 633.0


100%|██████████| 2000/2000 [00:32<00:00, 60.84it/s]


Epoch 603 Mean Reward: 266.0


100%|██████████| 2000/2000 [00:32<00:00, 61.13it/s]


Epoch 604 Mean Reward: 293.0


100%|██████████| 2000/2000 [00:32<00:00, 60.65it/s]


Epoch 605 Mean Reward: 408.0


100%|██████████| 2000/2000 [00:32<00:00, 61.55it/s]


Epoch 606 Mean Reward: 186.0


100%|██████████| 2000/2000 [00:32<00:00, 62.26it/s]


Epoch 607 Mean Reward: 112.0


100%|██████████| 2000/2000 [00:32<00:00, 62.38it/s]


Epoch 608 Mean Reward: 104.0


100%|██████████| 2000/2000 [00:32<00:00, 60.73it/s]


Epoch 609 Mean Reward: 337.0


100%|██████████| 2000/2000 [00:32<00:00, 61.22it/s]


Epoch 610 Mean Reward: 269.0
Epoch 610 Model saved to ./checkpoints/take_cover.ckpt
Epoch 610 test:
Test Episode 1 Reward: 242.0
Test Episode 2 Reward: 204.0
Test Episode 3 Reward: 359.0
Test Episode 4 Reward: 311.0
Test Episode 5 Reward: 242.0
Test Episode 6 Reward: 189.0
Test Episode 7 Reward: 199.0
Test Episode 8 Reward: 242.0
Test Episode 9 Reward: 92.0
Test Episode 10 Reward: 242.0
Test Episode 11 Reward: 242.0
Test Episode 12 Reward: 260.0
Test Episode 13 Reward: 285.0
Test Episode 14 Reward: 243.0
Test Episode 15 Reward: 226.0
Test Episode 16 Reward: 148.0
Test Episode 17 Reward: 242.0
Test Episode 18 Reward: 242.0
Test Episode 19 Reward: 275.0
Test Episode 20 Reward: 324.0
Epoch 610 Average Test Reward: 240.45


100%|██████████| 2000/2000 [00:32<00:00, 61.11it/s]


Epoch 611 Mean Reward: 333.0


100%|██████████| 2000/2000 [00:33<00:00, 60.12it/s]


Epoch 612 Mean Reward: 487.0


100%|██████████| 2000/2000 [00:32<00:00, 61.63it/s]


Epoch 613 Mean Reward: 219.0


100%|██████████| 2000/2000 [00:33<00:00, 59.14it/s]


Epoch 614 Mean Reward: 616.0


100%|██████████| 2000/2000 [00:33<00:00, 60.03it/s]


Epoch 615 Mean Reward: 510.0


100%|██████████| 2000/2000 [00:32<00:00, 62.04it/s]


Epoch 616 Mean Reward: 134.0


100%|██████████| 2000/2000 [00:32<00:00, 61.23it/s]


Epoch 617 Mean Reward: 274.0


100%|██████████| 2000/2000 [00:33<00:00, 60.14it/s]


Epoch 618 Mean Reward: 446.0


100%|██████████| 2000/2000 [00:33<00:00, 59.72it/s]


Epoch 619 Mean Reward: 583.0


100%|██████████| 2000/2000 [00:34<00:00, 58.40it/s]


Epoch 620 Mean Reward: 824.0
Epoch 620 Model saved to ./checkpoints/take_cover.ckpt
Epoch 620 test:
Test Episode 1 Reward: 402.0
Test Episode 2 Reward: 285.0
Test Episode 3 Reward: 285.0
Test Episode 4 Reward: 285.0
Test Episode 5 Reward: 285.0
Test Episode 6 Reward: 114.0
Test Episode 7 Reward: 285.0
Test Episode 8 Reward: 327.0
Test Episode 9 Reward: 115.0
Test Episode 10 Reward: 178.0
Test Episode 11 Reward: 162.0
Test Episode 12 Reward: 285.0
Test Episode 13 Reward: 285.0
Test Episode 14 Reward: 204.0
Test Episode 15 Reward: 285.0
Test Episode 16 Reward: 240.0
Test Episode 17 Reward: 285.0
Test Episode 18 Reward: 129.0
Test Episode 19 Reward: 191.0
Test Episode 20 Reward: 285.0
Epoch 620 Average Test Reward: 245.6


100%|██████████| 2000/2000 [00:33<00:00, 59.74it/s]


Epoch 621 Mean Reward: 550.0


100%|██████████| 2000/2000 [00:32<00:00, 61.11it/s]


Epoch 622 Mean Reward: 283.0


100%|██████████| 2000/2000 [00:33<00:00, 59.06it/s]


Epoch 623 Mean Reward: 615.0


100%|██████████| 2000/2000 [00:34<00:00, 58.39it/s]


Epoch 624 Mean Reward: 799.0


100%|██████████| 2000/2000 [00:33<00:00, 60.39it/s]


Epoch 625 Mean Reward: 487.0


100%|██████████| 2000/2000 [00:32<00:00, 61.09it/s]


Epoch 626 Mean Reward: 288.0


100%|██████████| 2000/2000 [00:32<00:00, 60.92it/s]


Epoch 627 Mean Reward: 267.0


100%|██████████| 2000/2000 [00:32<00:00, 60.95it/s]


Epoch 628 Mean Reward: 342.0


100%|██████████| 2000/2000 [00:33<00:00, 60.55it/s]


Epoch 629 Mean Reward: 408.0


100%|██████████| 2000/2000 [00:33<00:00, 60.53it/s]


Epoch 630 Mean Reward: 437.0
Epoch 630 Model saved to ./checkpoints/take_cover.ckpt
Epoch 630 test:
Test Episode 1 Reward: 213.0
Test Episode 2 Reward: 301.0
Test Episode 3 Reward: 337.0
Test Episode 4 Reward: 213.0
Test Episode 5 Reward: 260.0
Test Episode 6 Reward: 213.0
Test Episode 7 Reward: 213.0
Test Episode 8 Reward: 213.0
Test Episode 9 Reward: 259.0
Test Episode 10 Reward: 213.0
Test Episode 11 Reward: 213.0
Test Episode 12 Reward: 316.0
Test Episode 13 Reward: 213.0
Test Episode 14 Reward: 374.0
Test Episode 15 Reward: 254.0
Test Episode 16 Reward: 213.0
Test Episode 17 Reward: 131.0
Test Episode 18 Reward: 213.0
Test Episode 19 Reward: 116.0
Test Episode 20 Reward: 108.0
Epoch 630 Average Test Reward: 229.3


100%|██████████| 2000/2000 [00:32<00:00, 61.94it/s]


Epoch 631 Mean Reward: 118.0


100%|██████████| 2000/2000 [00:32<00:00, 61.56it/s]


Epoch 632 Mean Reward: 227.0


100%|██████████| 2000/2000 [00:34<00:00, 58.77it/s]


Epoch 633 Mean Reward: 708.0


100%|██████████| 2000/2000 [00:34<00:00, 58.81it/s]


Epoch 634 Mean Reward: 702.0


100%|██████████| 2000/2000 [00:33<00:00, 60.55it/s]


Epoch 635 Mean Reward: 370.0


100%|██████████| 2000/2000 [00:33<00:00, 60.53it/s]


Epoch 636 Mean Reward: 408.0


100%|██████████| 2000/2000 [00:32<00:00, 60.80it/s]


Epoch 637 Mean Reward: 335.0


100%|██████████| 2000/2000 [00:33<00:00, 59.46it/s]


Epoch 638 Mean Reward: 554.0


100%|██████████| 2000/2000 [00:33<00:00, 59.22it/s]


Epoch 639 Mean Reward: 354.0


100%|██████████| 2000/2000 [00:33<00:00, 58.93it/s]


Epoch 640 Mean Reward: 541.0
Epoch 640 Model saved to ./checkpoints/take_cover.ckpt
Epoch 640 test:
Test Episode 1 Reward: 273.0
Test Episode 2 Reward: 273.0
Test Episode 3 Reward: 273.0
Test Episode 4 Reward: 142.0
Test Episode 5 Reward: 247.0
Test Episode 6 Reward: 256.0
Test Episode 7 Reward: 543.0
Test Episode 8 Reward: 288.0
Test Episode 9 Reward: 96.0
Test Episode 10 Reward: 185.0
Test Episode 11 Reward: 273.0
Test Episode 12 Reward: 273.0
Test Episode 13 Reward: 95.0
Test Episode 14 Reward: 297.0
Test Episode 15 Reward: 120.0
Test Episode 16 Reward: 209.0
Test Episode 17 Reward: 273.0
Test Episode 18 Reward: 273.0
Test Episode 19 Reward: 107.0
Test Episode 20 Reward: 273.0
Epoch 640 Average Test Reward: 238.45


100%|██████████| 2000/2000 [00:33<00:00, 60.32it/s]


Epoch 641 Mean Reward: 471.0


100%|██████████| 2000/2000 [00:32<00:00, 61.39it/s]


Epoch 642 Mean Reward: 288.0


100%|██████████| 2000/2000 [00:32<00:00, 60.74it/s]


Epoch 643 Mean Reward: 281.0


100%|██████████| 2000/2000 [00:31<00:00, 63.23it/s]


Epoch 644 Mean Reward: 292.0


100%|██████████| 2000/2000 [00:29<00:00, 67.59it/s]


Epoch 645 Mean Reward: 565.0


100%|██████████| 2000/2000 [00:28<00:00, 70.86it/s]


Epoch 646 Mean Reward: 290.0


100%|██████████| 2000/2000 [00:29<00:00, 66.94it/s]


Epoch 647 Mean Reward: 884.0


100%|██████████| 2000/2000 [00:27<00:00, 74.04it/s]


Epoch 648 Mean Reward: 205.0


100%|██████████| 2000/2000 [00:27<00:00, 73.11it/s]


Epoch 649 Mean Reward: 287.0


100%|██████████| 2000/2000 [00:28<00:00, 69.78it/s]


Epoch 650 Mean Reward: 753.0
Epoch 650 Model saved to ./checkpoints/take_cover.ckpt
Epoch 650 test:
Test Episode 1 Reward: 110.0
Test Episode 2 Reward: 293.0
Test Episode 3 Reward: 133.0
Test Episode 4 Reward: 293.0
Test Episode 5 Reward: 293.0
Test Episode 6 Reward: 172.0
Test Episode 7 Reward: 293.0
Test Episode 8 Reward: 293.0
Test Episode 9 Reward: 207.0
Test Episode 10 Reward: 153.0
Test Episode 11 Reward: 293.0
Test Episode 12 Reward: 143.0
Test Episode 13 Reward: 293.0
Test Episode 14 Reward: 171.0
Test Episode 15 Reward: 293.0
Test Episode 16 Reward: 293.0
Test Episode 17 Reward: 293.0
Test Episode 18 Reward: 293.0
Test Episode 19 Reward: 164.0
Test Episode 20 Reward: 207.0
Epoch 650 Average Test Reward: 234.15


100%|██████████| 2000/2000 [00:29<00:00, 68.96it/s]


Epoch 651 Mean Reward: 402.0


100%|██████████| 2000/2000 [00:27<00:00, 72.14it/s]


Epoch 652 Mean Reward: 217.0


100%|██████████| 2000/2000 [00:27<00:00, 72.49it/s]


Epoch 653 Mean Reward: 172.0


100%|██████████| 2000/2000 [00:29<00:00, 68.58it/s]


Epoch 654 Mean Reward: 709.0


100%|██████████| 2000/2000 [00:27<00:00, 71.71it/s]


Epoch 655 Mean Reward: 271.0


100%|██████████| 2000/2000 [00:28<00:00, 71.29it/s]


Epoch 656 Mean Reward: 271.0


100%|██████████| 2000/2000 [00:29<00:00, 68.54it/s]


Epoch 657 Mean Reward: 651.0


100%|██████████| 2000/2000 [00:32<00:00, 61.50it/s]


Epoch 658 Mean Reward: 578.0


100%|██████████| 2000/2000 [00:33<00:00, 60.09it/s]


Epoch 659 Mean Reward: 112.0


100%|██████████| 2000/2000 [00:33<00:00, 59.16it/s]


Epoch 660 Mean Reward: 316.0
Epoch 660 Model saved to ./checkpoints/take_cover.ckpt
Epoch 660 test:
Test Episode 1 Reward: 380.0
Test Episode 2 Reward: 110.0
Test Episode 3 Reward: 387.0
Test Episode 4 Reward: 380.0
Test Episode 5 Reward: 105.0
Test Episode 6 Reward: 380.0
Test Episode 7 Reward: 380.0
Test Episode 8 Reward: 380.0
Test Episode 9 Reward: 380.0
Test Episode 10 Reward: 242.0
Test Episode 11 Reward: 380.0
Test Episode 12 Reward: 212.0
Test Episode 13 Reward: 337.0
Test Episode 14 Reward: 380.0
Test Episode 15 Reward: 453.0
Test Episode 16 Reward: 254.0
Test Episode 17 Reward: 119.0
Test Episode 18 Reward: 380.0
Test Episode 19 Reward: 380.0
Test Episode 20 Reward: 228.0
Epoch 660 Average Test Reward: 312.35


100%|██████████| 2000/2000 [00:34<00:00, 58.18it/s]


Epoch 661 Mean Reward: 401.0


100%|██████████| 2000/2000 [00:33<00:00, 59.91it/s]


Epoch 662 Mean Reward: 180.0


100%|██████████| 2000/2000 [00:33<00:00, 60.05it/s]


Epoch 663 Mean Reward: 217.0


100%|██████████| 2000/2000 [00:33<00:00, 59.63it/s]


Epoch 664 Mean Reward: 240.0


100%|██████████| 2000/2000 [00:33<00:00, 59.80it/s]


Epoch 665 Mean Reward: 143.0


100%|██████████| 2000/2000 [00:34<00:00, 58.31it/s]


Epoch 666 Mean Reward: 384.0


100%|██████████| 2000/2000 [00:34<00:00, 58.06it/s]


Epoch 667 Mean Reward: 497.0


100%|██████████| 2000/2000 [00:33<00:00, 59.78it/s]


Epoch 668 Mean Reward: 178.0


100%|██████████| 2000/2000 [00:33<00:00, 58.88it/s]


Epoch 669 Mean Reward: 422.0


100%|██████████| 2000/2000 [00:33<00:00, 60.39it/s]


Epoch 670 Mean Reward: 169.0
Epoch 670 Model saved to ./checkpoints/take_cover.ckpt
Epoch 670 test:
Test Episode 1 Reward: 278.0
Test Episode 2 Reward: 278.0
Test Episode 3 Reward: 278.0
Test Episode 4 Reward: 168.0
Test Episode 5 Reward: 91.0
Test Episode 6 Reward: 197.0
Test Episode 7 Reward: 278.0
Test Episode 8 Reward: 278.0
Test Episode 9 Reward: 278.0
Test Episode 10 Reward: 278.0
Test Episode 11 Reward: 278.0
Test Episode 12 Reward: 324.0
Test Episode 13 Reward: 205.0
Test Episode 14 Reward: 242.0
Test Episode 15 Reward: 278.0
Test Episode 16 Reward: 163.0
Test Episode 17 Reward: 278.0
Test Episode 18 Reward: 152.0
Test Episode 19 Reward: 292.0
Test Episode 20 Reward: 207.0
Epoch 670 Average Test Reward: 241.05


100%|██████████| 2000/2000 [00:33<00:00, 58.86it/s]


Epoch 671 Mean Reward: 418.0


100%|██████████| 2000/2000 [00:35<00:00, 57.14it/s]


Epoch 672 Mean Reward: 511.0


100%|██████████| 2000/2000 [00:36<00:00, 55.31it/s]


Epoch 673 Mean Reward: 860.0


100%|██████████| 2000/2000 [00:34<00:00, 58.77it/s]


Epoch 674 Mean Reward: 300.0


100%|██████████| 2000/2000 [00:34<00:00, 57.27it/s]


Epoch 675 Mean Reward: 515.0


100%|██████████| 2000/2000 [00:34<00:00, 58.35it/s]


Epoch 676 Mean Reward: 347.0


100%|██████████| 2000/2000 [00:34<00:00, 57.83it/s]


Epoch 677 Mean Reward: 456.0


100%|██████████| 2000/2000 [00:33<00:00, 59.22it/s]


Epoch 678 Mean Reward: 251.0


100%|██████████| 2000/2000 [00:33<00:00, 58.89it/s]


Epoch 679 Mean Reward: 604.0


100%|██████████| 2000/2000 [00:33<00:00, 60.10it/s]


Epoch 680 Mean Reward: 548.0
Epoch 680 Model saved to ./checkpoints/take_cover.ckpt
Epoch 680 test:
Test Episode 1 Reward: 281.0
Test Episode 2 Reward: 281.0
Test Episode 3 Reward: 281.0
Test Episode 4 Reward: 281.0
Test Episode 5 Reward: 230.0
Test Episode 6 Reward: 281.0
Test Episode 7 Reward: 136.0
Test Episode 8 Reward: 331.0
Test Episode 9 Reward: 281.0
Test Episode 10 Reward: 330.0
Test Episode 11 Reward: 281.0
Test Episode 12 Reward: 422.0
Test Episode 13 Reward: 281.0
Test Episode 14 Reward: 198.0
Test Episode 15 Reward: 143.0
Test Episode 16 Reward: 281.0
Test Episode 17 Reward: 195.0
Test Episode 18 Reward: 239.0
Test Episode 19 Reward: 281.0
Test Episode 20 Reward: 325.0
Epoch 680 Average Test Reward: 267.95


100%|██████████| 2000/2000 [00:33<00:00, 59.57it/s]


Epoch 681 Mean Reward: 453.0


100%|██████████| 2000/2000 [00:33<00:00, 59.56it/s]


Epoch 682 Mean Reward: 535.0


100%|██████████| 2000/2000 [00:32<00:00, 61.17it/s]


Epoch 683 Mean Reward: 287.0


100%|██████████| 2000/2000 [00:33<00:00, 60.20it/s]


Epoch 684 Mean Reward: 491.0


100%|██████████| 2000/2000 [00:32<00:00, 61.10it/s]


Epoch 685 Mean Reward: 318.0


100%|██████████| 2000/2000 [00:33<00:00, 60.13it/s]


Epoch 686 Mean Reward: 563.0


100%|██████████| 2000/2000 [00:32<00:00, 61.34it/s]


Epoch 687 Mean Reward: 208.0


100%|██████████| 2000/2000 [00:32<00:00, 61.87it/s]


Epoch 688 Mean Reward: 182.0


100%|██████████| 2000/2000 [00:32<00:00, 61.35it/s]


Epoch 689 Mean Reward: 263.0


100%|██████████| 2000/2000 [00:32<00:00, 61.98it/s]


Epoch 690 Mean Reward: 115.0
Epoch 690 Model saved to ./checkpoints/take_cover.ckpt
Epoch 690 test:
Test Episode 1 Reward: 248.0
Test Episode 2 Reward: 112.0
Test Episode 3 Reward: 419.0
Test Episode 4 Reward: 181.0
Test Episode 5 Reward: 248.0
Test Episode 6 Reward: 222.0
Test Episode 7 Reward: 248.0
Test Episode 8 Reward: 248.0
Test Episode 9 Reward: 248.0
Test Episode 10 Reward: 324.0
Test Episode 11 Reward: 144.0
Test Episode 12 Reward: 194.0
Test Episode 13 Reward: 152.0
Test Episode 14 Reward: 247.0
Test Episode 15 Reward: 314.0
Test Episode 16 Reward: 248.0
Test Episode 17 Reward: 303.0
Test Episode 18 Reward: 248.0
Test Episode 19 Reward: 326.0
Test Episode 20 Reward: 248.0
Epoch 690 Average Test Reward: 246.1


100%|██████████| 2000/2000 [00:33<00:00, 60.19it/s]


Epoch 691 Mean Reward: 401.0


100%|██████████| 2000/2000 [00:33<00:00, 58.95it/s]


Epoch 692 Mean Reward: 691.0


100%|██████████| 2000/2000 [00:33<00:00, 59.29it/s]


Epoch 693 Mean Reward: 418.0


100%|██████████| 2000/2000 [00:34<00:00, 58.81it/s]


Epoch 694 Mean Reward: 500.0


100%|██████████| 2000/2000 [00:33<00:00, 59.88it/s]


Epoch 695 Mean Reward: 557.0


100%|██████████| 2000/2000 [00:32<00:00, 61.23it/s]


Epoch 696 Mean Reward: 343.0


100%|██████████| 2000/2000 [00:32<00:00, 61.38it/s]


Epoch 697 Mean Reward: 294.0


100%|██████████| 2000/2000 [00:32<00:00, 61.72it/s]


Epoch 698 Mean Reward: 240.0


100%|██████████| 2000/2000 [00:32<00:00, 61.05it/s]


Epoch 699 Mean Reward: 295.0


100%|██████████| 2000/2000 [00:32<00:00, 61.12it/s]


Epoch 700 Mean Reward: 300.0
Epoch 700 Model saved to ./checkpoints/take_cover.ckpt
Epoch 700 test:
Test Episode 1 Reward: 289.0
Test Episode 2 Reward: 289.0
Test Episode 3 Reward: 289.0
Test Episode 4 Reward: 289.0
Test Episode 5 Reward: 385.0
Test Episode 6 Reward: 689.0
Test Episode 7 Reward: 289.0
Test Episode 8 Reward: 267.0
Test Episode 9 Reward: 478.0
Test Episode 10 Reward: 289.0
Test Episode 11 Reward: 289.0
Test Episode 12 Reward: 289.0
Test Episode 13 Reward: 185.0
Test Episode 14 Reward: 114.0
Test Episode 15 Reward: 289.0
Test Episode 16 Reward: 289.0
Test Episode 17 Reward: 289.0
Test Episode 18 Reward: 289.0
Test Episode 19 Reward: 288.0
Test Episode 20 Reward: 289.0
Epoch 700 Average Test Reward: 308.15


100%|██████████| 2000/2000 [00:28<00:00, 70.98it/s]


Epoch 701 Mean Reward: 479.0


100%|██████████| 2000/2000 [00:28<00:00, 70.83it/s]


Epoch 702 Mean Reward: 358.0


100%|██████████| 2000/2000 [00:27<00:00, 72.19it/s]


Epoch 703 Mean Reward: 375.0


100%|██████████| 2000/2000 [00:26<00:00, 74.31it/s]


Epoch 704 Mean Reward: 260.0


100%|██████████| 2000/2000 [00:26<00:00, 74.74it/s]


Epoch 705 Mean Reward: 317.0


100%|██████████| 2000/2000 [00:26<00:00, 74.72it/s]


Epoch 706 Mean Reward: 281.0


100%|██████████| 2000/2000 [00:26<00:00, 74.46it/s]


Epoch 707 Mean Reward: 295.0


100%|██████████| 2000/2000 [00:26<00:00, 75.74it/s]


Epoch 708 Mean Reward: 141.0


100%|██████████| 2000/2000 [00:28<00:00, 70.40it/s]


Epoch 709 Mean Reward: 1003.0


100%|██████████| 2000/2000 [00:26<00:00, 75.77it/s]


Epoch 710 Mean Reward: 163.0
Epoch 710 Model saved to ./checkpoints/take_cover.ckpt
Epoch 710 test:
Test Episode 1 Reward: 283.0
Test Episode 2 Reward: 283.0
Test Episode 3 Reward: 283.0
Test Episode 4 Reward: 158.0
Test Episode 5 Reward: 283.0
Test Episode 6 Reward: 118.0
Test Episode 7 Reward: 283.0
Test Episode 8 Reward: 253.0
Test Episode 9 Reward: 194.0
Test Episode 10 Reward: 283.0
Test Episode 11 Reward: 92.0
Test Episode 12 Reward: 283.0
Test Episode 13 Reward: 283.0
Test Episode 14 Reward: 283.0
Test Episode 15 Reward: 283.0
Test Episode 16 Reward: 283.0
Test Episode 17 Reward: 283.0
Test Episode 18 Reward: 180.0
Test Episode 19 Reward: 283.0
Test Episode 20 Reward: 155.0
Epoch 710 Average Test Reward: 241.45


100%|██████████| 2000/2000 [00:27<00:00, 73.15it/s]


Epoch 711 Mean Reward: 217.0


100%|██████████| 2000/2000 [00:27<00:00, 73.03it/s]


Epoch 712 Mean Reward: 221.0


100%|██████████| 2000/2000 [00:27<00:00, 72.28it/s]


Epoch 713 Mean Reward: 317.0


100%|██████████| 2000/2000 [00:28<00:00, 70.91it/s]


Epoch 714 Mean Reward: 542.0


100%|██████████| 2000/2000 [00:28<00:00, 70.39it/s]


Epoch 715 Mean Reward: 615.0


100%|██████████| 2000/2000 [00:27<00:00, 71.96it/s]


Epoch 716 Mean Reward: 397.0


100%|██████████| 2000/2000 [00:27<00:00, 72.44it/s]


Epoch 717 Mean Reward: 289.0


100%|██████████| 2000/2000 [00:27<00:00, 73.41it/s]


Epoch 718 Mean Reward: 157.0


100%|██████████| 2000/2000 [00:27<00:00, 71.46it/s]


Epoch 719 Mean Reward: 651.0


100%|██████████| 2000/2000 [00:27<00:00, 72.65it/s]


Epoch 720 Mean Reward: 603.0
Epoch 720 Model saved to ./checkpoints/take_cover.ckpt
Epoch 720 test:
Test Episode 1 Reward: 216.0
Test Episode 2 Reward: 216.0
Test Episode 3 Reward: 181.0
Test Episode 4 Reward: 362.0
Test Episode 5 Reward: 273.0
Test Episode 6 Reward: 216.0
Test Episode 7 Reward: 257.0
Test Episode 8 Reward: 216.0
Test Episode 9 Reward: 216.0
Test Episode 10 Reward: 216.0
Test Episode 11 Reward: 188.0
Test Episode 12 Reward: 216.0
Test Episode 13 Reward: 134.0
Test Episode 14 Reward: 216.0
Test Episode 15 Reward: 216.0
Test Episode 16 Reward: 226.0
Test Episode 17 Reward: 177.0
Test Episode 18 Reward: 216.0
Test Episode 19 Reward: 216.0
Test Episode 20 Reward: 216.0
Epoch 720 Average Test Reward: 219.5


100%|██████████| 2000/2000 [00:27<00:00, 72.06it/s]


Epoch 721 Mean Reward: 700.0


100%|██████████| 2000/2000 [00:27<00:00, 72.55it/s]


Epoch 722 Mean Reward: 691.0


100%|██████████| 2000/2000 [00:27<00:00, 73.61it/s]


Epoch 723 Mean Reward: 488.0


100%|██████████| 2000/2000 [00:26<00:00, 74.82it/s]


Epoch 724 Mean Reward: 287.0


100%|██████████| 2000/2000 [00:27<00:00, 73.14it/s]


Epoch 725 Mean Reward: 542.0


100%|██████████| 2000/2000 [00:26<00:00, 74.20it/s]


Epoch 726 Mean Reward: 400.0


100%|██████████| 2000/2000 [00:27<00:00, 72.55it/s]


Epoch 727 Mean Reward: 670.0


100%|██████████| 2000/2000 [00:26<00:00, 74.50it/s]


Epoch 728 Mean Reward: 354.0


100%|██████████| 2000/2000 [00:27<00:00, 72.24it/s]


Epoch 729 Mean Reward: 702.0


100%|██████████| 2000/2000 [00:28<00:00, 71.06it/s]


Epoch 730 Mean Reward: 856.0
Epoch 730 Model saved to ./checkpoints/take_cover.ckpt
Epoch 730 test:
Test Episode 1 Reward: 204.0
Test Episode 2 Reward: 255.0
Test Episode 3 Reward: 309.0
Test Episode 4 Reward: 255.0
Test Episode 5 Reward: 255.0
Test Episode 6 Reward: 255.0
Test Episode 7 Reward: 522.0
Test Episode 8 Reward: 120.0
Test Episode 9 Reward: 255.0
Test Episode 10 Reward: 255.0
Test Episode 11 Reward: 255.0
Test Episode 12 Reward: 255.0
Test Episode 13 Reward: 186.0
Test Episode 14 Reward: 255.0
Test Episode 15 Reward: 300.0
Test Episode 16 Reward: 116.0
Test Episode 17 Reward: 255.0
Test Episode 18 Reward: 255.0
Test Episode 19 Reward: 201.0
Test Episode 20 Reward: 224.0
Epoch 730 Average Test Reward: 249.35


100%|██████████| 2000/2000 [00:26<00:00, 75.70it/s]


Epoch 731 Mean Reward: 147.0


100%|██████████| 2000/2000 [00:26<00:00, 74.39it/s]


Epoch 732 Mean Reward: 251.0


100%|██████████| 2000/2000 [00:27<00:00, 74.02it/s]


Epoch 733 Mean Reward: 218.0


100%|██████████| 2000/2000 [00:28<00:00, 70.08it/s]


Epoch 734 Mean Reward: 584.0


100%|██████████| 2000/2000 [00:27<00:00, 73.24it/s]


Epoch 735 Mean Reward: 346.0


100%|██████████| 2000/2000 [00:26<00:00, 74.36it/s]


Epoch 736 Mean Reward: 347.0


100%|██████████| 2000/2000 [00:26<00:00, 75.41it/s]


Epoch 737 Mean Reward: 139.0


100%|██████████| 2000/2000 [00:28<00:00, 70.80it/s]


Epoch 738 Mean Reward: 885.0


100%|██████████| 2000/2000 [00:27<00:00, 73.29it/s]


Epoch 739 Mean Reward: 224.0


100%|██████████| 2000/2000 [00:27<00:00, 73.70it/s]


Epoch 740 Mean Reward: 347.0
Epoch 740 Model saved to ./checkpoints/take_cover.ckpt
Epoch 740 test:
Test Episode 1 Reward: 260.0
Test Episode 2 Reward: 262.0
Test Episode 3 Reward: 438.0
Test Episode 4 Reward: 175.0
Test Episode 5 Reward: 260.0
Test Episode 6 Reward: 260.0
Test Episode 7 Reward: 260.0
Test Episode 8 Reward: 260.0
Test Episode 9 Reward: 260.0
Test Episode 10 Reward: 260.0
Test Episode 11 Reward: 289.0
Test Episode 12 Reward: 100.0
Test Episode 13 Reward: 260.0
Test Episode 14 Reward: 260.0
Test Episode 15 Reward: 260.0
Test Episode 16 Reward: 260.0
Test Episode 17 Reward: 260.0
Test Episode 18 Reward: 260.0
Test Episode 19 Reward: 260.0
Test Episode 20 Reward: 260.0
Epoch 740 Average Test Reward: 258.2


100%|██████████| 2000/2000 [00:26<00:00, 75.83it/s]


Epoch 741 Mean Reward: 137.0


100%|██████████| 2000/2000 [00:26<00:00, 74.28it/s]


Epoch 742 Mean Reward: 360.0


100%|██████████| 2000/2000 [00:26<00:00, 74.24it/s]


Epoch 743 Mean Reward: 379.0


100%|██████████| 2000/2000 [00:26<00:00, 74.57it/s]


Epoch 744 Mean Reward: 332.0


100%|██████████| 2000/2000 [00:26<00:00, 74.69it/s]


Epoch 745 Mean Reward: 328.0


100%|██████████| 2000/2000 [00:27<00:00, 71.79it/s]


Epoch 746 Mean Reward: 794.0


100%|██████████| 2000/2000 [00:27<00:00, 73.69it/s]


Epoch 747 Mean Reward: 456.0


100%|██████████| 2000/2000 [00:27<00:00, 71.60it/s]


Epoch 748 Mean Reward: 804.0


100%|██████████| 2000/2000 [00:26<00:00, 75.24it/s]


Epoch 749 Mean Reward: 199.0


100%|██████████| 2000/2000 [00:26<00:00, 74.25it/s]


Epoch 750 Mean Reward: 403.0
Epoch 750 Model saved to ./checkpoints/take_cover.ckpt
Epoch 750 test:
Test Episode 1 Reward: 214.0
Test Episode 2 Reward: 214.0
Test Episode 3 Reward: 214.0
Test Episode 4 Reward: 214.0
Test Episode 5 Reward: 214.0
Test Episode 6 Reward: 484.0
Test Episode 7 Reward: 269.0
Test Episode 8 Reward: 214.0
Test Episode 9 Reward: 214.0
Test Episode 10 Reward: 94.0
Test Episode 11 Reward: 214.0
Test Episode 12 Reward: 214.0
Test Episode 13 Reward: 284.0
Test Episode 14 Reward: 161.0
Test Episode 15 Reward: 214.0
Test Episode 16 Reward: 214.0
Test Episode 17 Reward: 212.0
Test Episode 18 Reward: 377.0
Test Episode 19 Reward: 214.0
Test Episode 20 Reward: 214.0
Epoch 750 Average Test Reward: 233.15


100%|██████████| 2000/2000 [00:27<00:00, 73.30it/s]


Epoch 751 Mean Reward: 502.0


100%|██████████| 2000/2000 [00:26<00:00, 74.73it/s]


Epoch 752 Mean Reward: 305.0


100%|██████████| 2000/2000 [00:26<00:00, 74.42it/s]


Epoch 753 Mean Reward: 317.0


100%|██████████| 2000/2000 [00:27<00:00, 72.95it/s]


Epoch 754 Mean Reward: 548.0


100%|██████████| 2000/2000 [00:27<00:00, 72.90it/s]


Epoch 755 Mean Reward: 528.0


100%|██████████| 2000/2000 [00:26<00:00, 74.21it/s]


Epoch 756 Mean Reward: 342.0


100%|██████████| 2000/2000 [00:27<00:00, 74.06it/s]


Epoch 757 Mean Reward: 319.0


100%|██████████| 2000/2000 [00:26<00:00, 74.86it/s]


Epoch 758 Mean Reward: 242.0


100%|██████████| 2000/2000 [00:27<00:00, 73.88it/s]


Epoch 759 Mean Reward: 317.0


100%|██████████| 2000/2000 [00:27<00:00, 72.66it/s]


Epoch 760 Mean Reward: 307.0
Epoch 760 Model saved to ./checkpoints/take_cover.ckpt
Epoch 760 test:
Test Episode 1 Reward: 278.0
Test Episode 2 Reward: 309.0
Test Episode 3 Reward: 278.0
Test Episode 4 Reward: 268.0
Test Episode 5 Reward: 278.0
Test Episode 6 Reward: 278.0
Test Episode 7 Reward: 278.0
Test Episode 8 Reward: 278.0
Test Episode 9 Reward: 107.0
Test Episode 10 Reward: 278.0
Test Episode 11 Reward: 278.0
Test Episode 12 Reward: 163.0
Test Episode 13 Reward: 278.0
Test Episode 14 Reward: 278.0
Test Episode 15 Reward: 278.0
Test Episode 16 Reward: 182.0
Test Episode 17 Reward: 250.0
Test Episode 18 Reward: 226.0
Test Episode 19 Reward: 122.0
Test Episode 20 Reward: 278.0
Epoch 760 Average Test Reward: 248.15


100%|██████████| 2000/2000 [00:26<00:00, 74.11it/s]


Epoch 761 Mean Reward: 400.0


100%|██████████| 2000/2000 [00:27<00:00, 73.99it/s]


Epoch 762 Mean Reward: 401.0


100%|██████████| 2000/2000 [00:26<00:00, 75.42it/s]


Epoch 763 Mean Reward: 141.0


100%|██████████| 2000/2000 [00:26<00:00, 74.39it/s]


Epoch 764 Mean Reward: 347.0


100%|██████████| 2000/2000 [00:26<00:00, 75.22it/s]


Epoch 765 Mean Reward: 181.0


100%|██████████| 2000/2000 [00:26<00:00, 74.65it/s]


Epoch 766 Mean Reward: 296.0


100%|██████████| 2000/2000 [00:27<00:00, 73.46it/s]


Epoch 767 Mean Reward: 500.0


100%|██████████| 2000/2000 [00:26<00:00, 75.57it/s]


Epoch 768 Mean Reward: 138.0


100%|██████████| 2000/2000 [00:27<00:00, 72.52it/s]


Epoch 769 Mean Reward: 581.0


100%|██████████| 2000/2000 [00:26<00:00, 74.73it/s]


Epoch 770 Mean Reward: 269.0
Epoch 770 Model saved to ./checkpoints/take_cover.ckpt
Epoch 770 test:
Test Episode 1 Reward: 216.0
Test Episode 2 Reward: 337.0
Test Episode 3 Reward: 169.0
Test Episode 4 Reward: 168.0
Test Episode 5 Reward: 337.0
Test Episode 6 Reward: 140.0
Test Episode 7 Reward: 337.0
Test Episode 8 Reward: 179.0
Test Episode 9 Reward: 209.0
Test Episode 10 Reward: 337.0
Test Episode 11 Reward: 337.0
Test Episode 12 Reward: 337.0
Test Episode 13 Reward: 337.0
Test Episode 14 Reward: 381.0
Test Episode 15 Reward: 337.0
Test Episode 16 Reward: 337.0
Test Episode 17 Reward: 204.0
Test Episode 18 Reward: 337.0
Test Episode 19 Reward: 121.0
Test Episode 20 Reward: 197.0
Epoch 770 Average Test Reward: 267.7


100%|██████████| 2000/2000 [00:27<00:00, 72.90it/s]


Epoch 771 Mean Reward: 606.0


100%|██████████| 2000/2000 [00:27<00:00, 72.50it/s]


Epoch 772 Mean Reward: 637.0


100%|██████████| 2000/2000 [00:26<00:00, 75.34it/s]


Epoch 773 Mean Reward: 208.0


100%|██████████| 2000/2000 [00:27<00:00, 73.56it/s]


Epoch 774 Mean Reward: 472.0


100%|██████████| 2000/2000 [00:26<00:00, 75.18it/s]


Epoch 775 Mean Reward: 218.0


100%|██████████| 2000/2000 [00:27<00:00, 73.38it/s]


Epoch 776 Mean Reward: 469.0


100%|██████████| 2000/2000 [00:27<00:00, 73.01it/s]


Epoch 777 Mean Reward: 583.0


100%|██████████| 2000/2000 [00:26<00:00, 74.17it/s]


Epoch 778 Mean Reward: 376.0


100%|██████████| 2000/2000 [00:26<00:00, 75.47it/s]


Epoch 779 Mean Reward: 130.0


100%|██████████| 2000/2000 [00:27<00:00, 72.79it/s]


Epoch 780 Mean Reward: 271.0
Epoch 780 Model saved to ./checkpoints/take_cover.ckpt
Epoch 780 test:
Test Episode 1 Reward: 184.0
Test Episode 2 Reward: 455.0
Test Episode 3 Reward: 455.0
Test Episode 4 Reward: 455.0
Test Episode 5 Reward: 257.0
Test Episode 6 Reward: 455.0
Test Episode 7 Reward: 455.0
Test Episode 8 Reward: 366.0
Test Episode 9 Reward: 455.0
Test Episode 10 Reward: 203.0
Test Episode 11 Reward: 455.0
Test Episode 12 Reward: 332.0
Test Episode 13 Reward: 455.0
Test Episode 14 Reward: 455.0
Test Episode 15 Reward: 455.0
Test Episode 16 Reward: 455.0
Test Episode 17 Reward: 298.0
Test Episode 18 Reward: 137.0
Test Episode 19 Reward: 119.0
Test Episode 20 Reward: 455.0
Epoch 780 Average Test Reward: 367.8


100%|██████████| 2000/2000 [00:26<00:00, 74.55it/s]


Epoch 781 Mean Reward: 271.0


100%|██████████| 2000/2000 [00:26<00:00, 75.40it/s]


Epoch 782 Mean Reward: 183.0


100%|██████████| 2000/2000 [00:27<00:00, 73.20it/s]


Epoch 783 Mean Reward: 546.0


100%|██████████| 2000/2000 [00:26<00:00, 75.03it/s]


Epoch 784 Mean Reward: 249.0


100%|██████████| 2000/2000 [00:26<00:00, 74.33it/s]


Epoch 785 Mean Reward: 337.0


100%|██████████| 2000/2000 [00:26<00:00, 74.52it/s]


Epoch 786 Mean Reward: 305.0


100%|██████████| 2000/2000 [00:26<00:00, 75.12it/s]


Epoch 787 Mean Reward: 230.0


100%|██████████| 2000/2000 [00:27<00:00, 73.74it/s]


Epoch 788 Mean Reward: 399.0


100%|██████████| 2000/2000 [00:26<00:00, 74.56it/s]


Epoch 789 Mean Reward: 298.0


100%|██████████| 2000/2000 [00:26<00:00, 75.69it/s]


Epoch 790 Mean Reward: 136.0
Epoch 790 Model saved to ./checkpoints/take_cover.ckpt
Epoch 790 test:
Test Episode 1 Reward: 204.0
Test Episode 2 Reward: 103.0
Test Episode 3 Reward: 146.0
Test Episode 4 Reward: 277.0
Test Episode 5 Reward: 265.0
Test Episode 6 Reward: 277.0
Test Episode 7 Reward: 277.0
Test Episode 8 Reward: 106.0
Test Episode 9 Reward: 277.0
Test Episode 10 Reward: 157.0
Test Episode 11 Reward: 277.0
Test Episode 12 Reward: 277.0
Test Episode 13 Reward: 277.0
Test Episode 14 Reward: 130.0
Test Episode 15 Reward: 277.0
Test Episode 16 Reward: 277.0
Test Episode 17 Reward: 277.0
Test Episode 18 Reward: 277.0
Test Episode 19 Reward: 277.0
Test Episode 20 Reward: 288.0
Epoch 790 Average Test Reward: 236.15


100%|██████████| 2000/2000 [00:26<00:00, 74.78it/s]


Epoch 791 Mean Reward: 299.0


100%|██████████| 2000/2000 [00:27<00:00, 73.30it/s]


Epoch 792 Mean Reward: 516.0


100%|██████████| 2000/2000 [00:28<00:00, 71.32it/s]


Epoch 793 Mean Reward: 862.0


100%|██████████| 2000/2000 [00:26<00:00, 74.22it/s]


Epoch 794 Mean Reward: 347.0


100%|██████████| 2000/2000 [00:27<00:00, 73.12it/s]


Epoch 795 Mean Reward: 540.0


100%|██████████| 2000/2000 [00:26<00:00, 74.75it/s]


Epoch 796 Mean Reward: 267.0


100%|██████████| 2000/2000 [00:26<00:00, 74.10it/s]


Epoch 797 Mean Reward: 247.0


100%|██████████| 2000/2000 [00:27<00:00, 72.84it/s]


Epoch 798 Mean Reward: 374.0


100%|██████████| 2000/2000 [00:27<00:00, 73.69it/s]


Epoch 799 Mean Reward: 435.0


100%|██████████| 2000/2000 [00:26<00:00, 74.31it/s]


Epoch 800 Mean Reward: 337.0
Epoch 800 Model saved to ./checkpoints/take_cover.ckpt
Epoch 800 test:
Test Episode 1 Reward: 213.0
Test Episode 2 Reward: 213.0
Test Episode 3 Reward: 213.0
Test Episode 4 Reward: 178.0
Test Episode 5 Reward: 213.0
Test Episode 6 Reward: 131.0
Test Episode 7 Reward: 213.0
Test Episode 8 Reward: 213.0
Test Episode 9 Reward: 448.0
Test Episode 10 Reward: 116.0
Test Episode 11 Reward: 213.0
Test Episode 12 Reward: 213.0
Test Episode 13 Reward: 113.0
Test Episode 14 Reward: 213.0
Test Episode 15 Reward: 213.0
Test Episode 16 Reward: 198.0
Test Episode 17 Reward: 213.0
Test Episode 18 Reward: 213.0
Test Episode 19 Reward: 169.0
Test Episode 20 Reward: 307.0
Epoch 800 Average Test Reward: 210.8


100%|██████████| 2000/2000 [00:27<00:00, 72.99it/s]


Epoch 801 Mean Reward: 554.0


100%|██████████| 2000/2000 [00:28<00:00, 71.01it/s]


Epoch 802 Mean Reward: 920.0


100%|██████████| 2000/2000 [00:28<00:00, 70.74it/s]


Epoch 803 Mean Reward: 914.0


100%|██████████| 2000/2000 [00:26<00:00, 75.24it/s]


Epoch 804 Mean Reward: 176.0


100%|██████████| 2000/2000 [00:27<00:00, 73.10it/s]


Epoch 805 Mean Reward: 548.0


100%|██████████| 2000/2000 [00:27<00:00, 73.41it/s]


Epoch 806 Mean Reward: 498.0


100%|██████████| 2000/2000 [00:26<00:00, 74.79it/s]


Epoch 807 Mean Reward: 305.0


100%|██████████| 2000/2000 [00:26<00:00, 74.08it/s]


Epoch 808 Mean Reward: 376.0


100%|██████████| 2000/2000 [00:26<00:00, 74.84it/s]


Epoch 809 Mean Reward: 283.0


100%|██████████| 2000/2000 [00:26<00:00, 74.98it/s]


Epoch 810 Mean Reward: 241.0
Epoch 810 Model saved to ./checkpoints/take_cover.ckpt
Epoch 810 test:
Test Episode 1 Reward: 247.0
Test Episode 2 Reward: 114.0
Test Episode 3 Reward: 247.0
Test Episode 4 Reward: 247.0
Test Episode 5 Reward: 247.0
Test Episode 6 Reward: 247.0
Test Episode 7 Reward: 326.0
Test Episode 8 Reward: 247.0
Test Episode 9 Reward: 247.0
Test Episode 10 Reward: 273.0
Test Episode 11 Reward: 112.0
Test Episode 12 Reward: 211.0
Test Episode 13 Reward: 249.0
Test Episode 14 Reward: 247.0
Test Episode 15 Reward: 247.0
Test Episode 16 Reward: 247.0
Test Episode 17 Reward: 164.0
Test Episode 18 Reward: 140.0
Test Episode 19 Reward: 247.0
Test Episode 20 Reward: 247.0
Epoch 810 Average Test Reward: 227.65


100%|██████████| 2000/2000 [00:26<00:00, 74.91it/s]


Epoch 811 Mean Reward: 269.0


100%|██████████| 2000/2000 [00:27<00:00, 73.51it/s]


Epoch 812 Mean Reward: 496.0


100%|██████████| 2000/2000 [00:27<00:00, 73.51it/s]


Epoch 813 Mean Reward: 469.0


100%|██████████| 2000/2000 [00:26<00:00, 74.32it/s]


Epoch 814 Mean Reward: 386.0


100%|██████████| 2000/2000 [00:26<00:00, 74.72it/s]


Epoch 815 Mean Reward: 271.0


100%|██████████| 2000/2000 [00:26<00:00, 74.51it/s]


Epoch 816 Mean Reward: 316.0


100%|██████████| 2000/2000 [00:26<00:00, 75.05it/s]


Epoch 817 Mean Reward: 220.0


100%|██████████| 2000/2000 [00:26<00:00, 74.18it/s]


Epoch 818 Mean Reward: 378.0


100%|██████████| 2000/2000 [00:27<00:00, 72.52it/s]


Epoch 819 Mean Reward: 495.0


100%|██████████| 2000/2000 [00:27<00:00, 73.06it/s]


Epoch 820 Mean Reward: 268.0
Epoch 820 Model saved to ./checkpoints/take_cover.ckpt
Epoch 820 test:
Test Episode 1 Reward: 333.0
Test Episode 2 Reward: 333.0
Test Episode 3 Reward: 333.0
Test Episode 4 Reward: 114.0
Test Episode 5 Reward: 255.0
Test Episode 6 Reward: 176.0
Test Episode 7 Reward: 333.0
Test Episode 8 Reward: 333.0
Test Episode 9 Reward: 387.0
Test Episode 10 Reward: 296.0
Test Episode 11 Reward: 130.0
Test Episode 12 Reward: 108.0
Test Episode 13 Reward: 222.0
Test Episode 14 Reward: 333.0
Test Episode 15 Reward: 333.0
Test Episode 16 Reward: 373.0
Test Episode 17 Reward: 333.0
Test Episode 18 Reward: 333.0
Test Episode 19 Reward: 155.0
Test Episode 20 Reward: 333.0
Epoch 820 Average Test Reward: 277.3


100%|██████████| 2000/2000 [00:27<00:00, 73.38it/s]


Epoch 821 Mean Reward: 513.0


100%|██████████| 2000/2000 [00:27<00:00, 73.04it/s]


Epoch 822 Mean Reward: 565.0


100%|██████████| 2000/2000 [00:27<00:00, 72.83it/s]


Epoch 823 Mean Reward: 610.0


100%|██████████| 2000/2000 [00:26<00:00, 74.37it/s]


Epoch 824 Mean Reward: 329.0


100%|██████████| 2000/2000 [00:27<00:00, 73.03it/s]


Epoch 825 Mean Reward: 500.0


100%|██████████| 2000/2000 [00:27<00:00, 74.00it/s]


Epoch 826 Mean Reward: 378.0


100%|██████████| 2000/2000 [00:26<00:00, 74.80it/s]


Epoch 827 Mean Reward: 288.0


100%|██████████| 2000/2000 [00:27<00:00, 73.79it/s]


Epoch 828 Mean Reward: 434.0


100%|██████████| 2000/2000 [00:26<00:00, 74.29it/s]


Epoch 829 Mean Reward: 329.0


100%|██████████| 2000/2000 [00:27<00:00, 71.98it/s]


Epoch 830 Mean Reward: 771.0
Epoch 830 Model saved to ./checkpoints/take_cover.ckpt
Epoch 830 test:
Test Episode 1 Reward: 270.0
Test Episode 2 Reward: 227.0
Test Episode 3 Reward: 285.0
Test Episode 4 Reward: 285.0
Test Episode 5 Reward: 270.0
Test Episode 6 Reward: 97.0
Test Episode 7 Reward: 216.0
Test Episode 8 Reward: 285.0
Test Episode 9 Reward: 119.0
Test Episode 10 Reward: 187.0
Test Episode 11 Reward: 285.0
Test Episode 12 Reward: 196.0
Test Episode 13 Reward: 249.0
Test Episode 14 Reward: 172.0
Test Episode 15 Reward: 266.0
Test Episode 16 Reward: 285.0
Test Episode 17 Reward: 285.0
Test Episode 18 Reward: 285.0
Test Episode 19 Reward: 103.0
Test Episode 20 Reward: 285.0
Epoch 830 Average Test Reward: 232.6


100%|██████████| 2000/2000 [00:26<00:00, 74.89it/s]


Epoch 831 Mean Reward: 274.0


100%|██████████| 2000/2000 [00:26<00:00, 75.52it/s]


Epoch 832 Mean Reward: 150.0


100%|██████████| 2000/2000 [00:26<00:00, 74.82it/s]


Epoch 833 Mean Reward: 250.0


100%|██████████| 2000/2000 [00:26<00:00, 75.32it/s]


Epoch 834 Mean Reward: 179.0


100%|██████████| 2000/2000 [00:27<00:00, 73.17it/s]


Epoch 835 Mean Reward: 457.0


100%|██████████| 2000/2000 [00:27<00:00, 73.12it/s]


Epoch 836 Mean Reward: 505.0


100%|██████████| 2000/2000 [00:26<00:00, 75.22it/s]


Epoch 837 Mean Reward: 213.0


100%|██████████| 2000/2000 [00:27<00:00, 73.14it/s]


Epoch 838 Mean Reward: 514.0


100%|██████████| 2000/2000 [00:27<00:00, 73.96it/s]


Epoch 839 Mean Reward: 305.0


100%|██████████| 2000/2000 [00:27<00:00, 72.58it/s]


Epoch 840 Mean Reward: 406.0
Epoch 840 Model saved to ./checkpoints/take_cover.ckpt
Epoch 840 test:
Test Episode 1 Reward: 212.0
Test Episode 2 Reward: 251.0
Test Episode 3 Reward: 251.0
Test Episode 4 Reward: 180.0
Test Episode 5 Reward: 167.0
Test Episode 6 Reward: 107.0
Test Episode 7 Reward: 247.0
Test Episode 8 Reward: 251.0
Test Episode 9 Reward: 124.0
Test Episode 10 Reward: 211.0
Test Episode 11 Reward: 251.0
Test Episode 12 Reward: 251.0
Test Episode 13 Reward: 247.0
Test Episode 14 Reward: 251.0
Test Episode 15 Reward: 266.0
Test Episode 16 Reward: 251.0
Test Episode 17 Reward: 251.0
Test Episode 18 Reward: 105.0
Test Episode 19 Reward: 99.0
Test Episode 20 Reward: 251.0
Epoch 840 Average Test Reward: 211.2


100%|██████████| 2000/2000 [00:27<00:00, 73.03it/s]


Epoch 841 Mean Reward: 563.0


100%|██████████| 2000/2000 [00:26<00:00, 74.21it/s]


Epoch 842 Mean Reward: 386.0


100%|██████████| 2000/2000 [00:26<00:00, 74.83it/s]


Epoch 843 Mean Reward: 284.0


100%|██████████| 2000/2000 [00:26<00:00, 74.99it/s]


Epoch 844 Mean Reward: 236.0


100%|██████████| 2000/2000 [00:27<00:00, 72.63it/s]


Epoch 845 Mean Reward: 515.0


100%|██████████| 2000/2000 [00:26<00:00, 74.50it/s]


Epoch 846 Mean Reward: 276.0


100%|██████████| 2000/2000 [00:27<00:00, 73.72it/s]


Epoch 847 Mean Reward: 466.0


100%|██████████| 2000/2000 [00:28<00:00, 71.07it/s]


Epoch 848 Mean Reward: 890.0


100%|██████████| 2000/2000 [00:26<00:00, 74.20it/s]


Epoch 849 Mean Reward: 295.0


100%|██████████| 2000/2000 [00:26<00:00, 74.60it/s]


Epoch 850 Mean Reward: 297.0
Epoch 850 Model saved to ./checkpoints/take_cover.ckpt
Epoch 850 test:
Test Episode 1 Reward: 282.0
Test Episode 2 Reward: 282.0
Test Episode 3 Reward: 282.0
Test Episode 4 Reward: 255.0
Test Episode 5 Reward: 282.0
Test Episode 6 Reward: 282.0
Test Episode 7 Reward: 282.0
Test Episode 8 Reward: 282.0
Test Episode 9 Reward: 282.0
Test Episode 10 Reward: 106.0
Test Episode 11 Reward: 134.0
Test Episode 12 Reward: 282.0
Test Episode 13 Reward: 125.0
Test Episode 14 Reward: 282.0
Test Episode 15 Reward: 282.0
Test Episode 16 Reward: 259.0
Test Episode 17 Reward: 282.0
Test Episode 18 Reward: 282.0
Test Episode 19 Reward: 282.0
Test Episode 20 Reward: 282.0
Epoch 850 Average Test Reward: 255.45


100%|██████████| 2000/2000 [00:26<00:00, 74.40it/s]


Epoch 851 Mean Reward: 343.0


100%|██████████| 2000/2000 [00:26<00:00, 74.61it/s]


Epoch 852 Mean Reward: 300.0


100%|██████████| 2000/2000 [00:26<00:00, 74.47it/s]


Epoch 853 Mean Reward: 337.0


100%|██████████| 2000/2000 [00:27<00:00, 71.44it/s]


Epoch 854 Mean Reward: 809.0


100%|██████████| 2000/2000 [00:28<00:00, 69.60it/s]


Epoch 855 Mean Reward: 1184.0


100%|██████████| 2000/2000 [00:26<00:00, 74.94it/s]


Epoch 856 Mean Reward: 229.0


100%|██████████| 2000/2000 [00:26<00:00, 75.19it/s]


Epoch 857 Mean Reward: 213.0


100%|██████████| 2000/2000 [00:27<00:00, 72.56it/s]


Epoch 858 Mean Reward: 615.0


100%|██████████| 2000/2000 [00:26<00:00, 75.06it/s]


Epoch 859 Mean Reward: 243.0


100%|██████████| 2000/2000 [00:26<00:00, 74.92it/s]


Epoch 860 Mean Reward: 262.0
Epoch 860 Model saved to ./checkpoints/take_cover.ckpt
Epoch 860 test:
Test Episode 1 Reward: 280.0
Test Episode 2 Reward: 133.0
Test Episode 3 Reward: 374.0
Test Episode 4 Reward: 169.0
Test Episode 5 Reward: 495.0
Test Episode 6 Reward: 127.0
Test Episode 7 Reward: 368.0
Test Episode 8 Reward: 279.0
Test Episode 9 Reward: 280.0
Test Episode 10 Reward: 280.0
Test Episode 11 Reward: 280.0
Test Episode 12 Reward: 130.0
Test Episode 13 Reward: 280.0
Test Episode 14 Reward: 280.0
Test Episode 15 Reward: 280.0
Test Episode 16 Reward: 97.0
Test Episode 17 Reward: 280.0
Test Episode 18 Reward: 280.0
Test Episode 19 Reward: 280.0
Test Episode 20 Reward: 443.0
Epoch 860 Average Test Reward: 270.75


100%|██████████| 2000/2000 [00:27<00:00, 73.97it/s]


Epoch 861 Mean Reward: 423.0


100%|██████████| 2000/2000 [00:26<00:00, 74.37it/s]


Epoch 862 Mean Reward: 372.0


100%|██████████| 2000/2000 [00:27<00:00, 72.60it/s]


Epoch 863 Mean Reward: 553.0


100%|██████████| 2000/2000 [00:26<00:00, 74.14it/s]


Epoch 864 Mean Reward: 378.0


100%|██████████| 2000/2000 [00:26<00:00, 74.69it/s]


Epoch 865 Mean Reward: 286.0


100%|██████████| 2000/2000 [00:26<00:00, 74.41it/s]


Epoch 866 Mean Reward: 331.0


100%|██████████| 2000/2000 [00:26<00:00, 74.07it/s]


Epoch 867 Mean Reward: 387.0


100%|██████████| 2000/2000 [00:26<00:00, 74.15it/s]


Epoch 868 Mean Reward: 332.0


100%|██████████| 2000/2000 [00:26<00:00, 74.86it/s]


Epoch 869 Mean Reward: 287.0


100%|██████████| 2000/2000 [00:27<00:00, 74.05it/s]


Epoch 870 Mean Reward: 337.0
Epoch 870 Model saved to ./checkpoints/take_cover.ckpt
Epoch 870 test:
Test Episode 1 Reward: 213.0
Test Episode 2 Reward: 191.0
Test Episode 3 Reward: 296.0
Test Episode 4 Reward: 227.0
Test Episode 5 Reward: 213.0
Test Episode 6 Reward: 213.0
Test Episode 7 Reward: 200.0
Test Episode 8 Reward: 310.0
Test Episode 9 Reward: 190.0
Test Episode 10 Reward: 236.0
Test Episode 11 Reward: 213.0
Test Episode 12 Reward: 213.0
Test Episode 13 Reward: 213.0
Test Episode 14 Reward: 255.0
Test Episode 15 Reward: 143.0
Test Episode 16 Reward: 209.0
Test Episode 17 Reward: 213.0
Test Episode 18 Reward: 213.0
Test Episode 19 Reward: 243.0
Test Episode 20 Reward: 213.0
Epoch 870 Average Test Reward: 220.85


100%|██████████| 2000/2000 [00:26<00:00, 74.09it/s]


Epoch 871 Mean Reward: 318.0


100%|██████████| 2000/2000 [00:26<00:00, 75.01it/s]


Epoch 872 Mean Reward: 217.0


100%|██████████| 2000/2000 [00:27<00:00, 73.00it/s]


Epoch 873 Mean Reward: 554.0


100%|██████████| 2000/2000 [00:26<00:00, 75.09it/s]


Epoch 874 Mean Reward: 222.0


100%|██████████| 2000/2000 [00:27<00:00, 72.58it/s]


Epoch 875 Mean Reward: 622.0


100%|██████████| 2000/2000 [00:27<00:00, 72.83it/s]


Epoch 876 Mean Reward: 545.0


100%|██████████| 2000/2000 [00:26<00:00, 74.08it/s]


Epoch 877 Mean Reward: 382.0


100%|██████████| 2000/2000 [00:26<00:00, 74.50it/s]


Epoch 878 Mean Reward: 321.0


100%|██████████| 2000/2000 [00:27<00:00, 73.02it/s]


Epoch 879 Mean Reward: 543.0


100%|██████████| 2000/2000 [00:26<00:00, 75.74it/s]


Epoch 880 Mean Reward: 126.0
Epoch 880 Model saved to ./checkpoints/take_cover.ckpt
Epoch 880 test:
Test Episode 1 Reward: 256.0
Test Episode 2 Reward: 155.0
Test Episode 3 Reward: 706.0
Test Episode 4 Reward: 706.0
Test Episode 5 Reward: 177.0
Test Episode 6 Reward: 139.0
Test Episode 7 Reward: 131.0
Test Episode 8 Reward: 107.0
Test Episode 9 Reward: 107.0
Test Episode 10 Reward: 706.0
Test Episode 11 Reward: 706.0
Test Episode 12 Reward: 706.0
Test Episode 13 Reward: 706.0
Test Episode 14 Reward: 185.0
Test Episode 15 Reward: 706.0
Test Episode 16 Reward: 706.0
Test Episode 17 Reward: 401.0
Test Episode 18 Reward: 706.0
Test Episode 19 Reward: 706.0
Test Episode 20 Reward: 134.0
Epoch 880 Average Test Reward: 442.6


100%|██████████| 2000/2000 [00:26<00:00, 74.55it/s]


Epoch 881 Mean Reward: 313.0


100%|██████████| 2000/2000 [00:28<00:00, 71.23it/s]


Epoch 882 Mean Reward: 879.0


100%|██████████| 2000/2000 [00:27<00:00, 73.70it/s]


Epoch 883 Mean Reward: 430.0


100%|██████████| 2000/2000 [00:26<00:00, 74.28it/s]


Epoch 884 Mean Reward: 290.0


100%|██████████| 2000/2000 [00:26<00:00, 74.45it/s]


Epoch 885 Mean Reward: 269.0


100%|██████████| 2000/2000 [00:26<00:00, 75.68it/s]


Epoch 886 Mean Reward: 121.0


100%|██████████| 2000/2000 [00:26<00:00, 74.52it/s]


Epoch 887 Mean Reward: 251.0


100%|██████████| 2000/2000 [00:26<00:00, 74.10it/s]


Epoch 888 Mean Reward: 326.0


100%|██████████| 2000/2000 [00:27<00:00, 74.05it/s]


Epoch 889 Mean Reward: 397.0


100%|██████████| 2000/2000 [00:27<00:00, 73.36it/s]


Epoch 890 Mean Reward: 514.0
Epoch 890 Model saved to ./checkpoints/take_cover.ckpt
Epoch 890 test:
Test Episode 1 Reward: 128.0
Test Episode 2 Reward: 329.0
Test Episode 3 Reward: 329.0
Test Episode 4 Reward: 184.0
Test Episode 5 Reward: 329.0
Test Episode 6 Reward: 329.0
Test Episode 7 Reward: 206.0
Test Episode 8 Reward: 329.0
Test Episode 9 Reward: 344.0
Test Episode 10 Reward: 329.0
Test Episode 11 Reward: 214.0
Test Episode 12 Reward: 94.0
Test Episode 13 Reward: 329.0
Test Episode 14 Reward: 329.0
Test Episode 15 Reward: 272.0
Test Episode 16 Reward: 329.0
Test Episode 17 Reward: 193.0
Test Episode 18 Reward: 182.0
Test Episode 19 Reward: 228.0
Test Episode 20 Reward: 329.0
Epoch 890 Average Test Reward: 266.75


100%|██████████| 2000/2000 [00:26<00:00, 75.45it/s]


Epoch 891 Mean Reward: 187.0


100%|██████████| 2000/2000 [00:26<00:00, 74.44it/s]


Epoch 892 Mean Reward: 367.0


100%|██████████| 2000/2000 [00:27<00:00, 72.37it/s]


Epoch 893 Mean Reward: 695.0


100%|██████████| 2000/2000 [00:27<00:00, 71.45it/s]


Epoch 894 Mean Reward: 807.0


100%|██████████| 2000/2000 [00:26<00:00, 75.18it/s]


Epoch 895 Mean Reward: 179.0


100%|██████████| 2000/2000 [00:27<00:00, 72.60it/s]


Epoch 896 Mean Reward: 516.0


100%|██████████| 2000/2000 [00:27<00:00, 73.41it/s]


Epoch 897 Mean Reward: 331.0


100%|██████████| 2000/2000 [00:26<00:00, 75.50it/s]


Epoch 898 Mean Reward: 153.0


100%|██████████| 2000/2000 [00:27<00:00, 72.59it/s]


Epoch 899 Mean Reward: 641.0


100%|██████████| 2000/2000 [00:26<00:00, 74.55it/s]


Epoch 900 Mean Reward: 330.0
Epoch 900 Model saved to ./checkpoints/take_cover.ckpt
Epoch 900 test:
Test Episode 1 Reward: 405.0
Test Episode 2 Reward: 405.0
Test Episode 3 Reward: 405.0
Test Episode 4 Reward: 405.0
Test Episode 5 Reward: 405.0
Test Episode 6 Reward: 135.0
Test Episode 7 Reward: 228.0
Test Episode 8 Reward: 405.0
Test Episode 9 Reward: 123.0
Test Episode 10 Reward: 405.0
Test Episode 11 Reward: 405.0
Test Episode 12 Reward: 185.0
Test Episode 13 Reward: 128.0
Test Episode 14 Reward: 240.0
Test Episode 15 Reward: 137.0
Test Episode 16 Reward: 207.0
Test Episode 17 Reward: 405.0
Test Episode 18 Reward: 405.0
Test Episode 19 Reward: 405.0
Test Episode 20 Reward: 405.0
Epoch 900 Average Test Reward: 312.15


100%|██████████| 2000/2000 [00:27<00:00, 71.68it/s]


Epoch 901 Mean Reward: 812.0


100%|██████████| 2000/2000 [00:26<00:00, 75.10it/s]


Epoch 902 Mean Reward: 231.0


100%|██████████| 2000/2000 [00:27<00:00, 73.38it/s]


Epoch 903 Mean Reward: 509.0


100%|██████████| 2000/2000 [00:26<00:00, 74.45it/s]


Epoch 904 Mean Reward: 319.0


100%|██████████| 2000/2000 [00:26<00:00, 74.50it/s]


Epoch 905 Mean Reward: 278.0


100%|██████████| 2000/2000 [00:26<00:00, 74.19it/s]


Epoch 906 Mean Reward: 373.0


100%|██████████| 2000/2000 [00:26<00:00, 74.68it/s]


Epoch 907 Mean Reward: 288.0


100%|██████████| 2000/2000 [00:26<00:00, 75.13it/s]


Epoch 908 Mean Reward: 231.0


100%|██████████| 2000/2000 [00:26<00:00, 74.80it/s]


Epoch 909 Mean Reward: 265.0


100%|██████████| 2000/2000 [00:26<00:00, 74.14it/s]


Epoch 910 Mean Reward: 403.0
Epoch 910 Model saved to ./checkpoints/take_cover.ckpt
Epoch 910 test:
Test Episode 1 Reward: 333.0
Test Episode 2 Reward: 231.0
Test Episode 3 Reward: 161.0
Test Episode 4 Reward: 333.0
Test Episode 5 Reward: 186.0
Test Episode 6 Reward: 333.0
Test Episode 7 Reward: 333.0
Test Episode 8 Reward: 281.0
Test Episode 9 Reward: 214.0
Test Episode 10 Reward: 126.0
Test Episode 11 Reward: 333.0
Test Episode 12 Reward: 333.0
Test Episode 13 Reward: 208.0
Test Episode 14 Reward: 333.0
Test Episode 15 Reward: 185.0
Test Episode 16 Reward: 333.0
Test Episode 17 Reward: 237.0
Test Episode 18 Reward: 333.0
Test Episode 19 Reward: 251.0
Test Episode 20 Reward: 150.0
Epoch 910 Average Test Reward: 261.35


100%|██████████| 2000/2000 [00:26<00:00, 74.88it/s]


Epoch 911 Mean Reward: 269.0


100%|██████████| 2000/2000 [00:27<00:00, 72.15it/s]


Epoch 912 Mean Reward: 744.0


100%|██████████| 2000/2000 [00:27<00:00, 73.23it/s]


Epoch 913 Mean Reward: 541.0


100%|██████████| 2000/2000 [00:27<00:00, 73.96it/s]


Epoch 914 Mean Reward: 386.0


100%|██████████| 2000/2000 [00:27<00:00, 72.26it/s]


Epoch 915 Mean Reward: 332.0


100%|██████████| 2000/2000 [00:27<00:00, 72.36it/s]


Epoch 916 Mean Reward: 552.0


100%|██████████| 2000/2000 [00:26<00:00, 74.31it/s]


Epoch 917 Mean Reward: 334.0


100%|██████████| 2000/2000 [00:26<00:00, 75.16it/s]


Epoch 918 Mean Reward: 218.0


100%|██████████| 2000/2000 [00:26<00:00, 75.40it/s]


Epoch 919 Mean Reward: 159.0


100%|██████████| 2000/2000 [00:27<00:00, 72.99it/s]


Epoch 920 Mean Reward: 576.0
Epoch 920 Model saved to ./checkpoints/take_cover.ckpt
Epoch 920 test:
Test Episode 1 Reward: 106.0
Test Episode 2 Reward: 187.0
Test Episode 3 Reward: 143.0
Test Episode 4 Reward: 249.0
Test Episode 5 Reward: 149.0
Test Episode 6 Reward: 101.0
Test Episode 7 Reward: 185.0
Test Episode 8 Reward: 249.0
Test Episode 9 Reward: 241.0
Test Episode 10 Reward: 249.0
Test Episode 11 Reward: 123.0
Test Episode 12 Reward: 249.0
Test Episode 13 Reward: 211.0
Test Episode 14 Reward: 249.0
Test Episode 15 Reward: 323.0
Test Episode 16 Reward: 344.0
Test Episode 17 Reward: 223.0
Test Episode 18 Reward: 149.0
Test Episode 19 Reward: 122.0
Test Episode 20 Reward: 249.0
Epoch 920 Average Test Reward: 205.05


100%|██████████| 2000/2000 [00:27<00:00, 73.08it/s]


Epoch 921 Mean Reward: 519.0


100%|██████████| 2000/2000 [00:26<00:00, 74.50it/s]


Epoch 922 Mean Reward: 318.0


100%|██████████| 2000/2000 [00:26<00:00, 74.76it/s]


Epoch 923 Mean Reward: 288.0


100%|██████████| 2000/2000 [00:26<00:00, 74.88it/s]


Epoch 924 Mean Reward: 254.0


100%|██████████| 2000/2000 [00:26<00:00, 74.37it/s]


Epoch 925 Mean Reward: 305.0


100%|██████████| 2000/2000 [00:27<00:00, 73.02it/s]


Epoch 926 Mean Reward: 447.0


100%|██████████| 2000/2000 [00:26<00:00, 75.41it/s]


Epoch 927 Mean Reward: 145.0


100%|██████████| 2000/2000 [00:26<00:00, 74.53it/s]


Epoch 928 Mean Reward: 335.0


100%|██████████| 2000/2000 [00:27<00:00, 73.77it/s]


Epoch 929 Mean Reward: 343.0


100%|██████████| 2000/2000 [00:26<00:00, 74.54it/s]


Epoch 930 Mean Reward: 285.0
Epoch 930 Model saved to ./checkpoints/take_cover.ckpt
Epoch 930 test:
Test Episode 1 Reward: 355.0
Test Episode 2 Reward: 495.0
Test Episode 3 Reward: 495.0
Test Episode 4 Reward: 257.0
Test Episode 5 Reward: 160.0
Test Episode 6 Reward: 258.0
Test Episode 7 Reward: 271.0
Test Episode 8 Reward: 495.0
Test Episode 9 Reward: 495.0
Test Episode 10 Reward: 166.0
Test Episode 11 Reward: 495.0
Test Episode 12 Reward: 495.0
Test Episode 13 Reward: 495.0
Test Episode 14 Reward: 106.0
Test Episode 15 Reward: 495.0
Test Episode 16 Reward: 495.0
Test Episode 17 Reward: 495.0
Test Episode 18 Reward: 307.0
Test Episode 19 Reward: 495.0
Test Episode 20 Reward: 495.0
Epoch 930 Average Test Reward: 391.0


100%|██████████| 2000/2000 [00:26<00:00, 74.57it/s]


Epoch 931 Mean Reward: 307.0


100%|██████████| 2000/2000 [00:27<00:00, 73.32it/s]


Epoch 932 Mean Reward: 513.0


100%|██████████| 2000/2000 [00:27<00:00, 72.67it/s]


Epoch 933 Mean Reward: 264.0


100%|██████████| 2000/2000 [00:27<00:00, 72.18it/s]


Epoch 934 Mean Reward: 560.0


100%|██████████| 2000/2000 [00:27<00:00, 72.82it/s]


Epoch 935 Mean Reward: 610.0


100%|██████████| 2000/2000 [00:26<00:00, 74.49it/s]


Epoch 936 Mean Reward: 284.0


100%|██████████| 2000/2000 [00:26<00:00, 74.20it/s]


Epoch 937 Mean Reward: 355.0


100%|██████████| 2000/2000 [00:27<00:00, 72.94it/s]


Epoch 938 Mean Reward: 553.0


100%|██████████| 2000/2000 [00:26<00:00, 74.77it/s]


Epoch 939 Mean Reward: 274.0


100%|██████████| 2000/2000 [00:26<00:00, 74.47it/s]


Epoch 940 Mean Reward: 281.0
Epoch 940 Model saved to ./checkpoints/take_cover.ckpt
Epoch 940 test:
Test Episode 1 Reward: 455.0
Test Episode 2 Reward: 261.0
Test Episode 3 Reward: 204.0
Test Episode 4 Reward: 227.0
Test Episode 5 Reward: 281.0
Test Episode 6 Reward: 455.0
Test Episode 7 Reward: 129.0
Test Episode 8 Reward: 455.0
Test Episode 9 Reward: 168.0
Test Episode 10 Reward: 455.0
Test Episode 11 Reward: 145.0
Test Episode 12 Reward: 455.0
Test Episode 13 Reward: 294.0
Test Episode 14 Reward: 455.0
Test Episode 15 Reward: 128.0
Test Episode 16 Reward: 455.0
Test Episode 17 Reward: 455.0
Test Episode 18 Reward: 455.0
Test Episode 19 Reward: 104.0
Test Episode 20 Reward: 359.0
Epoch 940 Average Test Reward: 319.75


100%|██████████| 2000/2000 [00:26<00:00, 74.61it/s]


Epoch 941 Mean Reward: 316.0


100%|██████████| 2000/2000 [00:27<00:00, 72.63it/s]


Epoch 942 Mean Reward: 652.0


100%|██████████| 2000/2000 [00:26<00:00, 74.61it/s]


Epoch 943 Mean Reward: 281.0


100%|██████████| 2000/2000 [00:27<00:00, 72.59it/s]


Epoch 944 Mean Reward: 526.0


100%|██████████| 2000/2000 [00:27<00:00, 73.07it/s]


Epoch 945 Mean Reward: 520.0


100%|██████████| 2000/2000 [00:26<00:00, 74.54it/s]


Epoch 946 Mean Reward: 317.0


100%|██████████| 2000/2000 [00:26<00:00, 74.77it/s]


Epoch 947 Mean Reward: 277.0


100%|██████████| 2000/2000 [00:26<00:00, 74.55it/s]


Epoch 948 Mean Reward: 268.0


100%|██████████| 2000/2000 [00:26<00:00, 75.50it/s]


Epoch 949 Mean Reward: 125.0


100%|██████████| 2000/2000 [00:28<00:00, 70.99it/s]


Epoch 950 Mean Reward: 906.0
Epoch 950 Model saved to ./checkpoints/take_cover.ckpt
Epoch 950 test:
Test Episode 1 Reward: 520.0
Test Episode 2 Reward: 520.0
Test Episode 3 Reward: 520.0
Test Episode 4 Reward: 520.0
Test Episode 5 Reward: 520.0
Test Episode 6 Reward: 520.0
Test Episode 7 Reward: 211.0
Test Episode 8 Reward: 208.0
Test Episode 9 Reward: 231.0
Test Episode 10 Reward: 520.0
Test Episode 11 Reward: 520.0
Test Episode 12 Reward: 172.0
Test Episode 13 Reward: 520.0
Test Episode 14 Reward: 184.0
Test Episode 15 Reward: 123.0
Test Episode 16 Reward: 520.0
Test Episode 17 Reward: 101.0
Test Episode 18 Reward: 131.0
Test Episode 19 Reward: 292.0
Test Episode 20 Reward: 520.0
Epoch 950 Average Test Reward: 368.65


100%|██████████| 2000/2000 [00:26<00:00, 74.83it/s]


Epoch 951 Mean Reward: 273.0


100%|██████████| 2000/2000 [00:26<00:00, 74.54it/s]


Epoch 952 Mean Reward: 316.0


100%|██████████| 2000/2000 [00:27<00:00, 72.49it/s]


Epoch 953 Mean Reward: 623.0


100%|██████████| 2000/2000 [00:27<00:00, 71.63it/s]


Epoch 954 Mean Reward: 838.0


100%|██████████| 2000/2000 [00:27<00:00, 72.26it/s]


Epoch 955 Mean Reward: 636.0


100%|██████████| 2000/2000 [00:26<00:00, 75.56it/s]


Epoch 956 Mean Reward: 153.0


100%|██████████| 2000/2000 [00:26<00:00, 74.56it/s]


Epoch 957 Mean Reward: 319.0


100%|██████████| 2000/2000 [00:26<00:00, 74.88it/s]


Epoch 958 Mean Reward: 248.0


100%|██████████| 2000/2000 [00:26<00:00, 75.13it/s]


Epoch 959 Mean Reward: 216.0


100%|██████████| 2000/2000 [00:26<00:00, 74.60it/s]


Epoch 960 Mean Reward: 297.0
Epoch 960 Model saved to ./checkpoints/take_cover.ckpt
Epoch 960 test:
Test Episode 1 Reward: 245.0
Test Episode 2 Reward: 245.0
Test Episode 3 Reward: 245.0
Test Episode 4 Reward: 145.0
Test Episode 5 Reward: 245.0
Test Episode 6 Reward: 245.0
Test Episode 7 Reward: 280.0
Test Episode 8 Reward: 245.0
Test Episode 9 Reward: 245.0
Test Episode 10 Reward: 245.0
Test Episode 11 Reward: 139.0
Test Episode 12 Reward: 216.0
Test Episode 13 Reward: 229.0
Test Episode 14 Reward: 245.0
Test Episode 15 Reward: 133.0
Test Episode 16 Reward: 245.0
Test Episode 17 Reward: 477.0
Test Episode 18 Reward: 245.0
Test Episode 19 Reward: 245.0
Test Episode 20 Reward: 245.0
Epoch 960 Average Test Reward: 240.2


100%|██████████| 2000/2000 [00:28<00:00, 70.53it/s]


Epoch 961 Mean Reward: 379.0


100%|██████████| 2000/2000 [00:28<00:00, 70.63it/s]


Epoch 962 Mean Reward: 263.0


100%|██████████| 2000/2000 [00:28<00:00, 70.50it/s]


Epoch 963 Mean Reward: 291.0


100%|██████████| 2000/2000 [00:28<00:00, 71.09it/s]


Epoch 964 Mean Reward: 399.0


100%|██████████| 2000/2000 [00:27<00:00, 71.98it/s]


Epoch 965 Mean Reward: 213.0


100%|██████████| 2000/2000 [00:28<00:00, 70.51it/s]


Epoch 966 Mean Reward: 271.0


100%|██████████| 2000/2000 [00:28<00:00, 70.24it/s]


Epoch 967 Mean Reward: 273.0


100%|██████████| 2000/2000 [00:29<00:00, 67.73it/s]


Epoch 968 Mean Reward: 804.0


100%|██████████| 2000/2000 [00:27<00:00, 71.51it/s]


Epoch 969 Mean Reward: 323.0


100%|██████████| 2000/2000 [00:27<00:00, 73.39it/s]


Epoch 970 Mean Reward: 199.0
Epoch 970 Model saved to ./checkpoints/take_cover.ckpt
Epoch 970 test:
Test Episode 1 Reward: 328.0
Test Episode 2 Reward: 306.0
Test Episode 3 Reward: 328.0
Test Episode 4 Reward: 182.0
Test Episode 5 Reward: 225.0
Test Episode 6 Reward: 136.0
Test Episode 7 Reward: 355.0
Test Episode 8 Reward: 328.0
Test Episode 9 Reward: 117.0
Test Episode 10 Reward: 328.0
Test Episode 11 Reward: 328.0
Test Episode 12 Reward: 328.0
Test Episode 13 Reward: 114.0
Test Episode 14 Reward: 106.0
Test Episode 15 Reward: 136.0
Test Episode 16 Reward: 328.0
Test Episode 17 Reward: 107.0
Test Episode 18 Reward: 328.0
Test Episode 19 Reward: 328.0
Test Episode 20 Reward: 328.0
Epoch 970 Average Test Reward: 253.2


100%|██████████| 2000/2000 [00:27<00:00, 72.11it/s]


Epoch 971 Mean Reward: 294.0


100%|██████████| 2000/2000 [00:27<00:00, 71.60it/s]


Epoch 972 Mean Reward: 184.0


100%|██████████| 2000/2000 [00:27<00:00, 72.32it/s]


Epoch 973 Mean Reward: 149.0


100%|██████████| 2000/2000 [00:27<00:00, 72.23it/s]


Epoch 974 Mean Reward: 415.0


100%|██████████| 2000/2000 [00:27<00:00, 72.83it/s]


Epoch 975 Mean Reward: 351.0


100%|██████████| 2000/2000 [00:27<00:00, 71.54it/s]


Epoch 976 Mean Reward: 140.0


100%|██████████| 2000/2000 [00:28<00:00, 71.12it/s]


Epoch 977 Mean Reward: 248.0


100%|██████████| 2000/2000 [00:27<00:00, 71.46it/s]


Epoch 978 Mean Reward: 245.0


100%|██████████| 2000/2000 [00:27<00:00, 71.70it/s]


Epoch 979 Mean Reward: 144.0


100%|██████████| 2000/2000 [00:28<00:00, 69.87it/s]


Epoch 980 Mean Reward: 427.0
Epoch 980 Model saved to ./checkpoints/take_cover.ckpt
Epoch 980 test:
Test Episode 1 Reward: 116.0
Test Episode 2 Reward: 98.0
Test Episode 3 Reward: 107.0
Test Episode 4 Reward: 231.0
Test Episode 5 Reward: 126.0
Test Episode 6 Reward: 98.0
Test Episode 7 Reward: 153.0
Test Episode 8 Reward: 376.0
Test Episode 9 Reward: 376.0
Test Episode 10 Reward: 347.0
Test Episode 11 Reward: 376.0
Test Episode 12 Reward: 376.0
Test Episode 13 Reward: 181.0
Test Episode 14 Reward: 183.0
Test Episode 15 Reward: 376.0
Test Episode 16 Reward: 233.0
Test Episode 17 Reward: 122.0
Test Episode 18 Reward: 376.0
Test Episode 19 Reward: 376.0
Test Episode 20 Reward: 457.0
Epoch 980 Average Test Reward: 254.2


100%|██████████| 2000/2000 [00:27<00:00, 72.84it/s]


Epoch 981 Mean Reward: 206.0


100%|██████████| 2000/2000 [00:28<00:00, 71.20it/s]


Epoch 982 Mean Reward: 286.0


100%|██████████| 2000/2000 [00:27<00:00, 72.41it/s]


Epoch 983 Mean Reward: 194.0


100%|██████████| 2000/2000 [00:28<00:00, 69.49it/s]


Epoch 984 Mean Reward: 703.0


100%|██████████| 2000/2000 [00:27<00:00, 71.63it/s]


Epoch 985 Mean Reward: 284.0


100%|██████████| 2000/2000 [00:27<00:00, 73.86it/s]


Epoch 986 Mean Reward: 236.0


100%|██████████| 2000/2000 [00:27<00:00, 72.84it/s]


Epoch 987 Mean Reward: 187.0


100%|██████████| 2000/2000 [00:28<00:00, 71.05it/s]


Epoch 988 Mean Reward: 438.0


100%|██████████| 2000/2000 [00:27<00:00, 72.78it/s]


Epoch 989 Mean Reward: 102.0


100%|██████████| 2000/2000 [00:28<00:00, 69.89it/s]


Epoch 990 Mean Reward: 515.0
Epoch 990 Model saved to ./checkpoints/take_cover.ckpt
Epoch 990 test:
Test Episode 1 Reward: 250.0
Test Episode 2 Reward: 250.0
Test Episode 3 Reward: 175.0
Test Episode 4 Reward: 169.0
Test Episode 5 Reward: 250.0
Test Episode 6 Reward: 250.0
Test Episode 7 Reward: 282.0
Test Episode 8 Reward: 250.0
Test Episode 9 Reward: 126.0
Test Episode 10 Reward: 213.0
Test Episode 11 Reward: 250.0
Test Episode 12 Reward: 129.0
Test Episode 13 Reward: 158.0
Test Episode 14 Reward: 463.0
Test Episode 15 Reward: 250.0
Test Episode 16 Reward: 250.0
Test Episode 17 Reward: 250.0
Test Episode 18 Reward: 154.0
Test Episode 19 Reward: 250.0
Test Episode 20 Reward: 250.0
Epoch 990 Average Test Reward: 230.95


100%|██████████| 2000/2000 [00:27<00:00, 72.92it/s]


Epoch 991 Mean Reward: 120.0


100%|██████████| 2000/2000 [00:28<00:00, 69.71it/s]


Epoch 992 Mean Reward: 544.0


100%|██████████| 2000/2000 [00:28<00:00, 71.00it/s]


Epoch 993 Mean Reward: 542.0


100%|██████████| 2000/2000 [00:29<00:00, 68.27it/s]


Epoch 994 Mean Reward: 722.0


100%|██████████| 2000/2000 [00:27<00:00, 72.56it/s]


Epoch 995 Mean Reward: 188.0


100%|██████████| 2000/2000 [00:27<00:00, 71.74it/s]


Epoch 996 Mean Reward: 287.0


100%|██████████| 2000/2000 [00:28<00:00, 70.31it/s]


Epoch 997 Mean Reward: 346.0


100%|██████████| 2000/2000 [00:28<00:00, 69.85it/s]


Epoch 998 Mean Reward: 430.0


100%|██████████| 2000/2000 [00:28<00:00, 71.05it/s]


Epoch 999 Mean Reward: 242.0


100%|██████████| 2000/2000 [00:28<00:00, 69.98it/s]


Epoch 1000 Mean Reward: 430.0
Epoch 1000 Model saved to ./checkpoints/take_cover.ckpt
Epoch 1000 test:
Test Episode 1 Reward: 250.0
Test Episode 2 Reward: 175.0
Test Episode 3 Reward: 213.0
Test Episode 4 Reward: 213.0
Test Episode 5 Reward: 213.0
Test Episode 6 Reward: 193.0
Test Episode 7 Reward: 267.0
Test Episode 8 Reward: 353.0
Test Episode 9 Reward: 214.0
Test Episode 10 Reward: 213.0
Test Episode 11 Reward: 213.0
Test Episode 12 Reward: 157.0
Test Episode 13 Reward: 213.0
Test Episode 14 Reward: 119.0
Test Episode 15 Reward: 213.0
Test Episode 16 Reward: 213.0
Test Episode 17 Reward: 213.0
Test Episode 18 Reward: 213.0
Test Episode 19 Reward: 213.0
Test Episode 20 Reward: 213.0
Epoch 1000 Average Test Reward: 214.2
[(442.6, 880), (424.0, 440), (391.0, 930), (368.65, 950), (367.8, 780), (336.05, 10), (331.5, 300), (325.7, 350), (322.75, 410), (319.75, 940), (312.35, 660), (312.15, 900), (308.15, 700), (302.55, 590), (298.6, 50), (291.3, 240), (288.75, 70), (286.75, 100), (278.75,

In [8]:
"""
Delete the "checkpoint" file in model directory and manually specify a checkpoint file path
if a screen resolution that's different from the one that the agent was trained on is to be used.
"""

game.set_window_visible(True)

#Get a list of checkpoints saved during training

ckpts = tf.train.get_checkpoint_state('checkpoints').all_model_checkpoint_paths

#Test the trained model at a specific checkpoint by only choosing actions with a greedy strategy

ckpt_rank = list()

test_reward = test_agent(DQN, num_episodes=20,
                                 training=False,
                                 load_model=True,
                                 depth=False,
                                 model_dir=ckpts[-13])
    
#print('Epoch {} Average Test Reward: {}'.format(checkpoint, test_reward))
ckpt_rank.append((test_reward, int(checkpoint.split('-')[-1])))
    
print(sorted(ckpt_rank, reverse=True))


Loading model from checkpoints\take_cover.ckpt-880
INFO:tensorflow:Restoring parameters from checkpoints\take_cover.ckpt-880
Test Episode 1 Reward: 706.0
Test Episode 2 Reward: 706.0
Test Episode 3 Reward: 299.0
Test Episode 4 Reward: 706.0
Test Episode 5 Reward: 706.0
Test Episode 6 Reward: 232.0
Test Episode 7 Reward: 288.0
Test Episode 8 Reward: 706.0
Test Episode 9 Reward: 706.0
Test Episode 10 Reward: 433.0
Test Episode 11 Reward: 706.0
Test Episode 12 Reward: 341.0
Test Episode 13 Reward: 158.0
Test Episode 14 Reward: 706.0
Test Episode 15 Reward: 706.0
Test Episode 16 Reward: 124.0
Test Episode 17 Reward: 241.0
Test Episode 18 Reward: 706.0
Test Episode 19 Reward: 706.0
Test Episode 20 Reward: 159.0
[(502.05, 1000)]
