In [1]:
import itertools as it
import os
from time import time, sleep

import torch
import vizdoom as vzd
import skimage
import numpy as np
from tqdm import trange

from DoubleDQNAgent import DoubleDQNAgent


def preprocess(img):
    """Down samples image to resolution"""
    img = skimage.transform.resize(img, (30, 45))
    img = img.astype(np.float32)
    img = np.expand_dims(img, axis=0)
    return img


def create_game_environment(config_file_path):
    print("Creating game environment ...")
    game = vzd.DoomGame()
    game.load_config(config_file_path)
    game.set_window_visible(False)
    game.set_mode(vzd.Mode.PLAYER)
    game.set_screen_format(vzd.ScreenFormat.GRAY8)
    game.set_screen_resolution(vzd.ScreenResolution.RES_640X480)
    game.init()
    print("Game environment initialized ...")
    return game


def test_agent(game, agent, actions, frame_repeat, test_episodes_per_epoch=10):
    """Runs a test_episodes_per_epoch episodes and prints the result"""
    print("\nTesting...")
    test_scores = []
    for _ in trange(test_episodes_per_epoch, leave=False):
        game.new_episode()
        while not game.is_episode_finished():
            state = preprocess(game.get_state().screen_buffer)
            best_action_index = agent.get_action(state)
            game.make_action(actions[best_action_index], frame_repeat)
        reward = game.get_total_reward()
        test_scores.append(reward)

    test_scores = np.array(test_scores)
    return test_scores


def train_agent(game, agent, actions, num_epochs, frame_repeat, steps_per_epoch, save_model, model_path):
    """
    Trains the DQN Agent by running num_epochs of training episodes.
    Skip frame_repeat number of frames after each action.
    """

    start_time = time()

    for epoch in range(num_epochs):
        game.new_episode()
        train_scores = []
        global_step = 0

        print("\nEpoch #" + str(epoch + 1))

        for _ in trange(steps_per_epoch):
            state = preprocess(game.get_state().screen_buffer)
            action = agent.get_action(state)
            reward = game.make_action(actions[action], frame_repeat)
            done = game.is_episode_finished()

            if not done:
                next_state = preprocess(game.get_state().screen_buffer)
            else:
                next_state = np.zeros((1, 30, 45)).astype(np.float32)

            agent.append_memory(state, action, reward, next_state, done)

            if global_step > agent.batch_size:
                agent.train()

            if done:
                train_scores.append(game.get_total_reward())
                game.new_episode()

            global_step += 1

        train_scores = np.array(train_scores)
        print(
            "Results (Train): mean: {:.1f} +/- {:.1f},".format(
                train_scores.mean(), train_scores.std()
            ),
            "min: %.1f," % train_scores.min(),
            "max: %.1f," % train_scores.max(),
        )

        test_scores = test_agent(game, agent, actions, frame_repeat)
        print(
            "Results (Test): mean: {:.1f} +/- {:.1f},".format(
                test_scores.mean(), test_scores.std()
            ),
            "min: %.1f" % test_scores.min(),
            "max: %.1f" % test_scores.max(),
        )

        if save_model:
            print("Saving the network weights to:", model_path)
            torch.save(agent.q_net, model_path)
        print("Total elapsed time: %.2f minutes" %
              ((time() - start_time) / 60.0))

    game.close()
    return agent, game


if __name__ == "__main__":
    config_file_path = os.path.join(vzd.scenarios_path, "rocket_basic.cfg")
    game = create_game_environment(config_file_path)
    n = game.get_available_buttons_size()
    actions = [list(a) for a in it.product([0, 1], repeat=n)]
    
      

    # Set the hyperparameters
    batch_size = 128
    lr = 0.006491568748327548
    discount_factor = 0.8876233928106229
    memory_size = 10000
    frame_repeat = 12
    steps_per_epoch = 2000
    epsilon_decay = 0.995655176912701

    # Use GPU if available
    if torch.cuda.is_available():
        device = torch.device("cuda")
        torch.backends.cudnn.benchmark = True
    else:
        device = torch.device("cpu")
    print(f"Using device={device} ...")

    # Initialize our agent with the set parameters
    agent = DoubleDQNAgent(
        action_size=len(actions),
        lr=lr,
        batch_size=batch_size,
        memory_size=memory_size,
        discount_factor=discount_factor,
        device=device,
        load_model=False,
        optimizer = "Adam",
        epsilon_decay=epsilon_decay
        
    )

    # Run the training for the set number of epochs
    skip_learning = False
    if not skip_learning:
        agent, game = train_agent(
            game,
            agent,
            actions,
            num_epochs=50,
            frame_repeat=frame_repeat,
            steps_per_epoch=steps_per_epoch,
            save_model=True,
            model_path="checkpoints/Doom_DoubleDQN.pth"
        )

        print("======================================")
        print("Training finished. It's time to watch!")

    # Reinitialize the game with window visible
    game.close()
    game.set_window_visible(True)
    game.set_mode(vzd.Mode.ASYNC_PLAYER)
    game.init()

    for _ in range(10):
        game.new_episode()
        while not game.is_episode_finished():
            state = preprocess(game.get_state().screen_buffer)
            best_action_index = agent.get_action(state)

            # Instead of make_action(a, frame_repeat) in order to make the animation smooth
            game.set_action(actions[best_action_index])
            for _ in range(12):
                game.advance_action()

        # Sleep between episodes
        sleep(1.0)
        score = game.get_total_reward()
        print("Total score: ", score)


Creating game environment ...
Game environment initialized ...
Using device=cuda ...
Initializing New Model

Epoch #1


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:55<00:00, 17.28it/s]


Results (Train): mean: -45.6 +/- 149.1, min: -365.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -59.5 +/- 156.0, min: -365.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 1.97 minutes

Epoch #2


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:53<00:00, 17.58it/s]


Results (Train): mean: 10.8 +/- 95.6, min: -370.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: 33.1 +/- 32.0, min: -21.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 3.88 minutes

Epoch #3


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:50<00:00, 18.15it/s]


Results (Train): mean: 17.2 +/- 85.4, min: -355.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: 18.3 +/- 108.4, min: -300.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 5.74 minutes

Epoch #4


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:19<00:00, 14.36it/s]


Results (Train): mean: 12.2 +/- 87.3, min: -355.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -44.5 +/- 151.0, min: -325.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 8.15 minutes

Epoch #5


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:11<00:00, 15.23it/s]


Results (Train): mean: 11.0 +/- 86.4, min: -350.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: 28.3 +/- 17.7, min: 13.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 10.37 minutes

Epoch #6


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:12<00:00, 15.06it/s]


Results (Train): mean: -22.4 +/- 104.3, min: -350.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: 43.8 +/- 41.8, min: -79.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 12.61 minutes

Epoch #7


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:26<00:00, 13.61it/s]


Results (Train): mean: -11.9 +/- 95.1, min: -345.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -34.8 +/- 120.1, min: -355.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 15.11 minutes

Epoch #8


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:35<00:00, 12.82it/s]


Results (Train): mean: -8.7 +/- 79.2, min: -355.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: 30.4 +/- 74.3, min: -179.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 17.77 minutes

Epoch #9


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:13<00:00, 14.95it/s]


Results (Train): mean: 2.6 +/- 77.0, min: -340.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -14.5 +/- 102.5, min: -310.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 20.04 minutes

Epoch #10


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:02<00:00, 16.36it/s]


Results (Train): mean: -34.1 +/- 128.0, min: -335.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -1.2 +/- 112.0, min: -310.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 22.12 minutes

Epoch #11


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:02<00:00, 16.36it/s]


Results (Train): mean: -32.9 +/- 125.0, min: -350.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -57.5 +/- 91.9, min: -300.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 24.22 minutes

Epoch #12


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:20<00:00, 14.20it/s]


Results (Train): mean: -33.6 +/- 118.9, min: -350.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -33.9 +/- 111.8, min: -345.0 max: 54.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 26.62 minutes

Epoch #13


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:22<00:00, 14.00it/s]


Results (Train): mean: -23.5 +/- 113.0, min: -355.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -37.8 +/- 102.0, min: -320.0 max: 18.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 29.05 minutes

Epoch #14


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:30<00:00, 13.31it/s]


Results (Train): mean: -26.5 +/- 115.6, min: -355.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -101.8 +/- 133.0, min: -335.0 max: 30.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 31.63 minutes

Epoch #15


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:12<00:00, 15.05it/s]


Results (Train): mean: -21.4 +/- 111.1, min: -370.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: 3.1 +/- 49.6, min: -54.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 33.89 minutes

Epoch #16


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:05<00:00, 15.90it/s]


Results (Train): mean: -32.9 +/- 117.7, min: -355.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -49.0 +/- 146.5, min: -335.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 36.04 minutes

Epoch #17


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:03<00:00, 16.26it/s]


Results (Train): mean: -13.9 +/- 111.6, min: -360.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: 20.5 +/- 30.8, min: -47.0 max: 54.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 38.14 minutes

Epoch #18


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:10<00:00, 15.37it/s]


Results (Train): mean: -38.1 +/- 133.1, min: -365.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -41.8 +/- 139.2, min: -325.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 40.35 minutes

Epoch #19


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:44<00:00, 19.20it/s]


Results (Train): mean: -18.2 +/- 115.2, min: -365.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -21.7 +/- 108.2, min: -315.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 42.12 minutes

Epoch #20


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:42<00:00, 19.46it/s]


Results (Train): mean: -40.8 +/- 141.6, min: -365.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -46.9 +/- 146.0, min: -340.0 max: 54.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 43.86 minutes

Epoch #21


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:40<00:00, 19.90it/s]


Results (Train): mean: -51.3 +/- 143.3, min: -370.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -6.4 +/- 103.8, min: -310.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 45.57 minutes

Epoch #22


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:41<00:00, 19.75it/s]


Results (Train): mean: -36.8 +/- 126.1, min: -365.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: 10.7 +/- 47.1, min: -112.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 47.28 minutes

Epoch #23


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:46<00:00, 18.82it/s]


Results (Train): mean: -6.8 +/- 91.9, min: -355.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: 20.1 +/- 43.0, min: -98.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 49.07 minutes

Epoch #24


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:42<00:00, 19.50it/s]


Results (Train): mean: 4.1 +/- 73.7, min: -315.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -65.7 +/- 136.1, min: -320.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 50.81 minutes

Epoch #25


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:37<00:00, 20.51it/s]


Results (Train): mean: -33.6 +/- 111.3, min: -365.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -39.7 +/- 101.5, min: -310.0 max: 30.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 52.47 minutes

Epoch #26


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:37<00:00, 20.42it/s]


Results (Train): mean: -100.3 +/- 150.8, min: -355.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -12.1 +/- 116.8, min: -310.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 54.12 minutes

Epoch #27


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:38<00:00, 20.31it/s]


Results (Train): mean: -110.1 +/- 167.9, min: -360.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -229.7 +/- 120.5, min: -345.0 max: 5.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 55.82 minutes

Epoch #28


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:38<00:00, 20.34it/s]


Results (Train): mean: -48.0 +/- 127.9, min: -355.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -22.9 +/- 123.3, min: -350.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 57.49 minutes

Epoch #29


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:42<00:00, 19.45it/s]


Results (Train): mean: -20.6 +/- 103.7, min: -370.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -45.3 +/- 103.1, min: -305.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 59.25 minutes

Epoch #30


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:39<00:00, 20.17it/s]


Results (Train): mean: -14.5 +/- 95.0, min: -345.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -29.0 +/- 109.5, min: -345.0 max: 42.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 60.92 minutes

Epoch #31


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:36<00:00, 20.83it/s]


Results (Train): mean: -53.4 +/- 133.7, min: -360.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -123.4 +/- 175.6, min: -365.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 62.56 minutes

Epoch #32


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:36<00:00, 20.74it/s]


Results (Train): mean: -65.0 +/- 137.7, min: -365.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -25.2 +/- 107.6, min: -310.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 64.20 minutes

Epoch #33


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:43<00:00, 19.30it/s]


Results (Train): mean: -111.0 +/- 155.3, min: -365.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -32.2 +/- 68.2, min: -210.0 max: 54.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 65.96 minutes

Epoch #34


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:35<00:00, 20.94it/s]


Results (Train): mean: -92.2 +/- 145.2, min: -360.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -39.1 +/- 118.0, min: -355.0 max: 30.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 67.58 minutes

Epoch #35


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:40<00:00, 19.84it/s]


Results (Train): mean: -102.5 +/- 164.7, min: -365.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -103.4 +/- 155.0, min: -345.0 max: 29.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 69.29 minutes

Epoch #36


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:45<00:00, 18.91it/s]


Results (Train): mean: -84.1 +/- 157.4, min: -355.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -100.4 +/- 151.4, min: -345.0 max: 30.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 71.11 minutes

Epoch #37


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:54<00:00, 17.47it/s]


Results (Train): mean: -43.6 +/- 137.3, min: -360.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -38.4 +/- 152.5, min: -345.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 73.06 minutes

Epoch #38


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:09<00:00, 15.49it/s]


Results (Train): mean: -6.0 +/- 101.0, min: -345.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: 25.1 +/- 29.5, min: -28.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 75.25 minutes

Epoch #39


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:10<00:00, 15.38it/s]


Results (Train): mean: -14.7 +/- 117.5, min: -365.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -59.9 +/- 142.4, min: -320.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 77.48 minutes

Epoch #40


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:14<00:00, 14.85it/s]


Results (Train): mean: -14.6 +/- 123.0, min: -350.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -68.9 +/- 138.4, min: -320.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 79.80 minutes

Epoch #41


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:10<00:00, 15.35it/s]


Results (Train): mean: -59.6 +/- 150.0, min: -350.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -9.2 +/- 109.8, min: -305.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 82.03 minutes

Epoch #42


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:09<00:00, 15.50it/s]


Results (Train): mean: -32.0 +/- 134.3, min: -360.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -129.2 +/- 158.3, min: -345.0 max: 41.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 84.26 minutes

Epoch #43


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:07<00:00, 15.66it/s]


Results (Train): mean: -72.1 +/- 153.1, min: -365.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -5.1 +/- 104.5, min: -300.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 86.43 minutes

Epoch #44


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:03<00:00, 16.13it/s]


Results (Train): mean: -50.4 +/- 139.8, min: -355.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -12.4 +/- 153.8, min: -320.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 88.53 minutes

Epoch #45


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:05<00:00, 15.98it/s]


Results (Train): mean: -39.7 +/- 138.5, min: -350.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -146.8 +/- 146.3, min: -320.0 max: 42.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 90.71 minutes

Epoch #46


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:12<00:00, 15.10it/s]


Results (Train): mean: -66.4 +/- 159.6, min: -345.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -9.0 +/- 117.6, min: -310.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 93.00 minutes

Epoch #47


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:13<00:00, 14.99it/s]


Results (Train): mean: -140.8 +/- 170.3, min: -360.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -27.1 +/- 112.2, min: -315.0 max: 42.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 95.28 minutes

Epoch #48


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [03:19<00:00, 10.03it/s]


Results (Train): mean: -63.8 +/- 156.8, min: -355.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -148.3 +/- 161.2, min: -315.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 98.69 minutes

Epoch #49


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [02:23<00:00, 13.91it/s]


Results (Train): mean: -64.4 +/- 164.4, min: -355.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: 20.7 +/- 111.0, min: -310.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 101.12 minutes

Epoch #50


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [01:57<00:00, 16.99it/s]


Results (Train): mean: -159.2 +/- 175.0, min: -345.0, max: 66.0,

Testing...


                                                                                                                       

Results (Test): mean: -116.1 +/- 152.4, min: -340.0 max: 66.0
Saving the network weights to: checkpoints/Doom_DoubleDQN.pth
Total elapsed time: 103.12 minutes
Training finished. It's time to watch!
Total score:  77.0
Total score:  66.0
Total score:  -268.0
Total score:  81.0
Total score:  59.0
Total score:  -262.0
Total score:  69.0
Total score:  39.0
Total score:  -266.0
Total score:  63.0
